In [22]:
from bs4 import BeautifulSoup
import requests
import json

In [8]:
home_url = 'https://www.simplytarot.com'
base_urls = [
    'https://www.simplytarot.com/tarot-card-meanings/major-arcana-tarot-card-meanings/',
    'https://www.simplytarot.com/tarot-card-meanings/minor-arcana/swords/',
    'https://www.simplytarot.com/tarot-card-meanings/minor-arcana/cups/',
    'https://www.simplytarot.com/tarot-card-meanings/minor-arcana/wands/',
    'https://www.simplytarot.com/tarot-card-meanings/minor-arcana/pentacles/'
]
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0'}

In [3]:
bodies = {}
for url in base_urls:
    html_body = requests.get(
        url,
        headers=headers).content
    bodies[url] = BeautifulSoup(html_body).find_all('div', class_='cardContainer')

In [25]:
cards = {}
errors = []
for url, body in bodies.items():
    for card in body:
        card_name = card.find('img')['alt'].lower().replace(' ', '-').replace('care', 'card').replace('taort', 'tarot')
        card_name = card_name.replace('the-', '').replace('.', '-').replace('-tarot-card', '')
        response = requests.get(url + card_name, headers=headers)
        
        if response.status_code > 300:
            errors.append(response)
        else:
            cards[card_name] = response.content

In [38]:
cards_formatted = []
for card, body in cards.items():
    card_bs = BeautifulSoup(body)
    
    content = [c for c in card_bs.find_all('div', class_='card') if c.get_text() != ''][0]
    
    if content != None:
        image_url = 'https://www.simplytarot.com' + card_bs.find('', class_='cardImgLink').img['src']
        parragraphs = content.find_all('p')
    
        cards_formatted.append({
            'name': card,
            'summary': parragraphs[0].get_text(),
            'full_meaning': parragraphs[2].get_text(),
            'image': image_url
        })

In [116]:
def save_json(obj, file_name):
    with open(file_name, 'w') as file:
        json.dump(obj, file)
save_json(cards_formatted, 'cards.json')

In [41]:
# Load the cards
cards = json.loads(open('./lib/cards.json', 'r').read())

In [52]:
meanings_url = 'https://labyrinthos.co/blogs/tarot-card-meanings-list'

html_body = requests.get(
        meanings_url,
        headers=headers).content
meanings_bs = BeautifulSoup(html_body)

In [59]:
html_cards = meanings_bs.find_all('div', class_='grid__item large--one-quarter medium--one-third text-center card')

In [103]:
cards_as_map = { card['name'].replace('"', ''): card for card in cards }

In [104]:
cards_as_map.keys()

dict_keys(['the-fool', 'the-magician', 'the-empress', 'the-emperor', 'the-high-priestess', 'the-hierophant', 'the-lovers', 'the-chariot', 'justice', 'temperance', 'strength', 'the-hermit', 'the-wheel-of-fortune', 'the-hanged-man', 'death', 'the-devil', 'the-tower', 'the-star', 'the-moon', 'the-sun', 'judgement', 'the-world', 'ace-of-swords', 'two-of-swords', 'three-of-swords', 'four-of-swords', 'five-of-swords', 'six-of-swords', 'seven-of-swords', 'eight-of-swords', 'nine-of-swords', 'ten-of-swords', 'page-of-swords', 'knight-of-swords', 'queen-of-swords', 'king-of-swords', 'ace-of-cups', 'two-of-cups', 'three-of-cups', 'four-of-cups', 'five-of-cups', 'six-of-cups', 'seven-of-cups', 'eight-of-cups', 'nine-of-cups', 'ten-of-cups', 'page-of-cups', 'knight-of-cups', 'queen-of-cups', 'king-of-cups', 'ace-of-wands', 'two-of-wands', 'three-of-wands', 'four-of-wands', 'five-of-wands', 'six-of-wands', 'seven-of-wands', 'eight-of-wands', 'nine-of-wands', 'ten-of-wands', 'page-of-wands', 'knight

In [105]:
def get_card_name(card):
    return card.find('img')['alt'].split(' - ')[0].lower().replace(' ', '-').replace('-meaning', '')

In [106]:
def get_upright_reversed(card):
    clean_text = card.find('div', class_='rte rte--indented-images').text.strip()
    upright, reversed_ = clean_text.replace('Upright: ', '').split(', Reversed: ')
    
    return upright, reversed_

In [108]:
for card in html_cards:
    name = get_card_name(card)
    
    upright, reversed_ = get_upright_reversed(card)
    
    cards_as_map[name]['upright'] = upright
    cards_as_map[name]['reversed'] = reversed_

In [109]:
cards_as_map['seven-of-wands']

{'name': 'seven-of-wands"',
 'summary': 'Buy this deck',
 'full_meaning': 'The Subject really needs to think this through; a decision has to be taken regarding this issue – should they come away from this relationship/situation? \xa0Or should they make a real effort to change their attitude and inject some enthusiasm back into it?',
 'image': 'https://www.simplytarot.com/wp-content/uploads/2015/04/seven-of-wands-tarot-card.png',
 'upright': 'perseverance, defensive, maintaining control',
 'reversed': 'give up, destroyed confidence, overwhelmed'}

In [119]:
save_json(list(cards_as_map.values()), './lib/cards.json')

fix card that have errors

In [164]:
import pandas as pd
pd.set_option("display.max_colwidth", 10000)

df = pd.read_json('./lib/cards.json')

In [2]:
df.head()

Unnamed: 0,name,summary,full_meaning,image,upright,reversed
0,the-fool,Short Meaning: A new phase in life and an urg...,In-depth Meaning: Something new is starting f...,https://www.simplytarot.com/wp-content/uploads...,"innocence, new beginnings, free spirit","recklessness, taken advantage of, inconsideration"
1,the-magician,Short meaning: A crossroads. An opportunity f...,In-depth meaning: When The Magician appears i...,https://www.simplytarot.com/wp-content/uploads...,"willpower, desire, creation, manifestation","trickery, illusions, out of touch"
2,the-empress,"Short meaning: Can represent pregnancy, the e...",In-depth meaning: The Empress tends to appear...,https://www.simplytarot.com/wp-content/uploads...,"motherhood, fertility, nature","dependence, smothering, emptiness, nosiness"
3,the-emperor,"Short meaning: Can represent a father figure,...",In-depth meaning: This card shows the Subject...,https://www.simplytarot.com/wp-content/uploads...,"authority, structure, control, fatherhood","tyranny, rigidity, coldness"
4,the-high-priestess,"Short meaning: Growing potential, hidden tale...",In-depth meaning: The High Priestess represen...,https://www.simplytarot.com/wp-content/uploads...,"intuitive, unconscious, inner voice","lack of center, lost inner voice, repressed fe..."


In [4]:
df.describe()

Unnamed: 0,name,summary,full_meaning,image,upright,reversed
count,78,78,78,78,78,78
unique,78,69,78,78,78,78
top,queen-of-swords,Buy this deck,In-depth meaning: The Subject may have starte...,https://www.simplytarot.com/wp-content/uploads...,"anxiety, hopelessness, trauma","rebellion, subversiveness, new approaches"
freq,1,10,1,1,1,1


In [5]:
cards_with_wrong_summary = df[df.summary == 'Buy this deck']
cards_with_wrong_summary.head()

Unnamed: 0,name,summary,full_meaning,image,upright,reversed
15,the-devil,Buy this deck,It is time to deal with the situation rather t...,https://www.simplytarot.com/wp-content/uploads...,"addiction, materialism, playfulness","freedom, release, restoring control"
23,two-of-swords,Buy this deck,The Subject is not happy at all and this card ...,https://www.simplytarot.com/wp-content/uploads...,"difficult choices, indecision, stalemate","lesser of two evils, no right choice, confusion"
25,four-of-swords,Buy this deck,Taking time to consider the past and recognisi...,https://www.simplytarot.com/wp-content/uploads...,"rest, restoration, contemplation","restlessness, burnout, stress"
28,seven-of-swords,Buy this deck,"The Subject, or someone around them, may be re...",https://www.simplytarot.com/wp-content/uploads...,"deception, trickery, tactics and strategy","coming clean, rethinking approach, deception"
31,ten-of-swords,Buy this deck,"The message with this card is that, now the tr...",https://www.simplytarot.com/wp-content/uploads...,"failure, collapse, defeat","can't get worse, only upwards, inevitable end"


In [10]:
urls_with_suits = {
    'major': 'https://www.simplytarot.com/tarot-card-meanings/major-arcana-tarot-card-meanings/',
    'swords': 'https://www.simplytarot.com/tarot-card-meanings/minor-arcana/swords/',
    'cups': 'https://www.simplytarot.com/tarot-card-meanings/minor-arcana/cups/',
    'wands': 'https://www.simplytarot.com/tarot-card-meanings/minor-arcana/wands/',
    'pentacles': 'https://www.simplytarot.com/tarot-card-meanings/minor-arcana/pentacles/'
}

def url_for_card_name(card_name):
    for suit in urls_with_suits.keys():
        if suit in card_name:
            return urls_with_suits[suit] + card_name
        else:
            return urls_with_suits['major'] + card_name

suits = ['swords', 'wands', 'cus', 'pentacles', 'major']
cards_with_wrong_summary['url'] = cards_with_wrong_summary['name'].apply(lambda x: url_for_card_name(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [31]:
cards_htmls = {}

for i, card in cards_with_wrong_summary.iterrows():
    response = requests.get(card['url'], headers=headers)
    
    cards_htmls[card['name']] = BeautifulSoup(response.content)

In [143]:
cgi.escape("<div>lala</div>")

  """Entry point for launching an IPython kernel.


'&lt;div&gt;lala&lt;/div&gt;'

In [146]:
content = {}
for card, body in cards_htmls.items():
    card_content = [c for c in body.find_all('div', class_='card') if c.get_text() != ''][0]
    
    summary = [div for div in card_content.find_all('div') if div.get_text() != ''][0]
    full_meaning = card_content.find_all('p')[1]

    content[card] = {
        'summary': summary,
        'full_meaning': full_meaning
    }

In [147]:
content_df = pd.DataFrame(content.values(),index=content.keys())
content_df.head()

Unnamed: 0,summary,full_meaning
the-devil,"[[Short meaning: ], This card points to addic...","[[In-depth meaning: ], The Devil is an inward..."
two-of-swords,"[ , [Short meaning: ], The Two of Swords show...","[[In-depth meaning: ], When the Two of Swords..."
four-of-swords,"[ , [Short meaning: ], The Four of Swords enc...","[[In-depth meaning: ], The whole of the Sword..."
seven-of-swords,"[ , [Short meaning: ], The Seven of Swords wa...","[[In-depth meaning: ], The Subject should be ..."
ten-of-swords,"[ , [Short meaning: ], A difficult situation ...","[[In-depth meaning: ], The suffering of the S..."


In [148]:
cards_with_wrong_summary.loc[content.keys(), 'summary'] = content_df['summary']

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [129]:
cards_with_wrong_summary

Unnamed: 0_level_0,name,summary,full_meaning,image,upright,reversed,url
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
the-devil,the-devil,"[[Short meaning: ], This card points to addic...",It is time to deal with the situation rather t...,https://www.simplytarot.com/wp-content/uploads...,"addiction, materialism, playfulness","freedom, release, restoring control",https://www.simplytarot.com/tarot-card-meaning...
two-of-swords,two-of-swords,"[ , [Short meaning: ], The Two of Swords show...",The Subject is not happy at all and this card ...,https://www.simplytarot.com/wp-content/uploads...,"difficult choices, indecision, stalemate","lesser of two evils, no right choice, confusion",https://www.simplytarot.com/tarot-card-meaning...
four-of-swords,four-of-swords,"[ , [Short meaning: ], The Four of Swords enc...",Taking time to consider the past and recognisi...,https://www.simplytarot.com/wp-content/uploads...,"rest, restoration, contemplation","restlessness, burnout, stress",https://www.simplytarot.com/tarot-card-meaning...
seven-of-swords,seven-of-swords,"[ , [Short meaning: ], The Seven of Swords wa...","The Subject, or someone around them, may be re...",https://www.simplytarot.com/wp-content/uploads...,"deception, trickery, tactics and strategy","coming clean, rethinking approach, deception",https://www.simplytarot.com/tarot-card-meaning...
ten-of-swords,ten-of-swords,"[ , [Short meaning: ], A difficult situation ...","The message with this card is that, now the tr...",https://www.simplytarot.com/wp-content/uploads...,"failure, collapse, defeat","can't get worse, only upwards, inevitable end",https://www.simplytarot.com/tarot-card-meaning...
ace-of-wands,ace-of-wands,"[ , [Short meaning: ], The Ace of Wands shows...","The Ace of Wands is the birth of a vision, a n...",https://www.simplytarot.com/wp-content/uploads...,"creation, willpower, inspiration, desire","lack of energy, lack of passion, boredom",https://www.simplytarot.com/tarot-card-meaning...
"seven-of-wands""","seven-of-wands""","[ , [Short meaning: ], The Seven of Wands sho...",The Subject really needs to think this through...,https://www.simplytarot.com/wp-content/uploads...,"perseverance, defensive, maintaining control","give up, destroyed confidence, overwhelmed",https://www.simplytarot.com/tarot-card-meaning...
eight-of-wands,eight-of-wands,"[ , [Short meaning: ], The Eight of Wands sho...","This situation hasn’t always been so positive,...",https://www.simplytarot.com/wp-content/uploads...,"rapid action, movement, quick decisions","panic, waiting, slowdown",https://www.simplytarot.com/tarot-card-meaning...
nine-of-wands,nine-of-wands,"[ , [Short meaning: ], The Nine of Wands is th...",The Subject would benefit from looking at this...,https://www.simplytarot.com/wp-content/uploads...,"resilience, grit, last stand","exhaustion, fatigue, questioning motivations",https://www.simplytarot.com/tarot-card-meaning...
ten-of-wands,ten-of-wands,"[ , [Short meaning: ], The Ten of Wands shows...",The Subject may also feel overwhelmed by someo...,https://www.simplytarot.com/wp-content/uploads...,"accomplishment, responsibility, burden","inability to delegate, overstressed, burnt out",https://www.simplytarot.com/tarot-card-meaning...


In [149]:
df.index = df['name']
df.head()

Unnamed: 0_level_0,name,summary,full_meaning,image,upright,reversed
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
the-fool,the-fool,Short Meaning: A new phase in life and an urg...,In-depth Meaning: Something new is starting f...,https://www.simplytarot.com/wp-content/uploads...,"innocence, new beginnings, free spirit","recklessness, taken advantage of, inconsideration"
the-magician,the-magician,Short meaning: A crossroads. An opportunity f...,In-depth meaning: When The Magician appears i...,https://www.simplytarot.com/wp-content/uploads...,"willpower, desire, creation, manifestation","trickery, illusions, out of touch"
the-empress,the-empress,"Short meaning: Can represent pregnancy, the e...",In-depth meaning: The Empress tends to appear...,https://www.simplytarot.com/wp-content/uploads...,"motherhood, fertility, nature","dependence, smothering, emptiness, nosiness"
the-emperor,the-emperor,"Short meaning: Can represent a father figure,...",In-depth meaning: This card shows the Subject...,https://www.simplytarot.com/wp-content/uploads...,"authority, structure, control, fatherhood","tyranny, rigidity, coldness"
the-high-priestess,the-high-priestess,"Short meaning: Growing potential, hidden tale...",In-depth meaning: The High Priestess represen...,https://www.simplytarot.com/wp-content/uploads...,"intuitive, unconscious, inner voice","lack of center, lost inner voice, repressed fe..."


In [152]:
df.loc[cards_with_wrong_summary.index, 'summary'] = cards_with_wrong_summary['summary']

In [153]:
df.describe()

Unnamed: 0,name,summary,full_meaning,image,upright,reversed
count,78,78,78,78,78,78
unique,78,78,78,78,78,78
top,queen-of-swords,Short meaning: The Ace of Swords is a sign of...,In-depth meaning: The Subject may have starte...,https://www.simplytarot.com/wp-content/uploads...,"anxiety, hopelessness, trauma","rebellion, subversiveness, new approaches"
freq,1,1,1,1,1,1


Fix the image link of the ten of swords

In [169]:
df.loc[df['name'] == 'ten-of-swords', 'image'] = 'https://www.simplytarot.com/wp-content/uploads/2015/04/ten-of-swords-tarot-card.png'

In [170]:
df[df['name'] == 'ten-of-swords']['image']

name
ten-of-swords    https://www.simplytarot.com/wp-content/uploads/2015/04/ten-of-swords-tarot-card.png
Name: image, dtype: object

In [174]:
df.to_json('./lib/cards.json', orient='records')

In [186]:
# clean strings
df['image'] = df['image'].apply(lambda x: x.replace("'", '').strip('b'))
df['image']

name
the-fool                          https://www.simplytarot.com/wp-content/uploads/2015/04/the-fool-tarot-card.png
the-magician                  https://www.simplytarot.com/wp-content/uploads/2015/04/the-magician-tarot-card.png
the-empress                    https://www.simplytarot.com/wp-content/uploads/2015/04/the-empress-tarot-card.png
the-emperor                    https://www.simplytarot.com/wp-content/uploads/2015/04/the-emperor-tarot-card.png
the-high-priestess      https://www.simplytarot.com/wp-content/uploads/2015/04/the-high-priestess-tarot-card.png
                                                                 ...                                            
ten-of-pentacles          https://www.simplytarot.com/wp-content/uploads/2015/04/ten-of-pentacles-tarot-card.png
page-of-pentacles        https://www.simplytarot.com/wp-content/uploads/2015/04/page-of-pentacles-tarot-card.png
knight-of-pentacles    https://www.simplytarot.com/wp-content/uploads/2015/04/knight-of-pen

In [187]:
df.to_json('./lib/cards.json', orient='records')