In [1]:
from bs4 import BeautifulSoup
import requests
import json

In [2]:
home_url = 'https://www.simplytarot.com'
base_urls = [
    'https://www.simplytarot.com/tarot-card-meanings/major-arcana-tarot-card-meanings/',
    'https://www.simplytarot.com/tarot-card-meanings/minor-arcana/swords/',
    'https://www.simplytarot.com/tarot-card-meanings/minor-arcana/cups/',
    'https://www.simplytarot.com/tarot-card-meanings/minor-arcana/wands/',
    'https://www.simplytarot.com/tarot-card-meanings/minor-arcana/pentacles/'
]
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0'}

In [3]:
bodies = {}
for url in base_urls:
    html_body = requests.get(
        url,
        headers=headers).content
    bodies[url] = BeautifulSoup(html_body).find_all('div', class_='cardContainer')

In [25]:
cards = {}
errors = []
for url, body in bodies.items():
    for card in body:
        card_name = card.find('img')['alt'].lower().replace(' ', '-').replace('care', 'card').replace('taort', 'tarot')
        card_name = card_name.replace('the-', '').replace('.', '-').replace('-tarot-card', '')
        response = requests.get(url + card_name, headers=headers)
        
        if response.status_code > 300:
            errors.append(response)
        else:
            cards[card_name] = response.content

In [38]:
cards_formatted = []
for card, body in cards.items():
    card_bs = BeautifulSoup(body)
    
    content = [c for c in card_bs.find_all('div', class_='card') if c.get_text() != ''][0]
    
    if content != None:
        image_url = 'https://www.simplytarot.com' + card_bs.find('', class_='cardImgLink').img['src']
        parragraphs = content.find_all('p')
    
        cards_formatted.append({
            'name': card,
            'summary': parragraphs[0].get_text(),
            'full_meaning': parragraphs[2].get_text(),
            'image': image_url
        })

In [116]:
def save_json(obj, file_name):
    with open(file_name, 'w') as file:
        json.dump(obj, file)
save_json(cards_formatted, 'cards.json')

In [41]:
# Load the cards
cards = json.loads(open('./lib/cards.json', 'r').read())

In [52]:
meanings_url = 'https://labyrinthos.co/blogs/tarot-card-meanings-list'

html_body = requests.get(
        meanings_url,
        headers=headers).content
meanings_bs = BeautifulSoup(html_body)

In [59]:
html_cards = meanings_bs.find_all('div', class_='grid__item large--one-quarter medium--one-third text-center card')

In [103]:
cards_as_map = { card['name'].replace('"', ''): card for card in cards }

In [104]:
cards_as_map.keys()

dict_keys(['the-fool', 'the-magician', 'the-empress', 'the-emperor', 'the-high-priestess', 'the-hierophant', 'the-lovers', 'the-chariot', 'justice', 'temperance', 'strength', 'the-hermit', 'the-wheel-of-fortune', 'the-hanged-man', 'death', 'the-devil', 'the-tower', 'the-star', 'the-moon', 'the-sun', 'judgement', 'the-world', 'ace-of-swords', 'two-of-swords', 'three-of-swords', 'four-of-swords', 'five-of-swords', 'six-of-swords', 'seven-of-swords', 'eight-of-swords', 'nine-of-swords', 'ten-of-swords', 'page-of-swords', 'knight-of-swords', 'queen-of-swords', 'king-of-swords', 'ace-of-cups', 'two-of-cups', 'three-of-cups', 'four-of-cups', 'five-of-cups', 'six-of-cups', 'seven-of-cups', 'eight-of-cups', 'nine-of-cups', 'ten-of-cups', 'page-of-cups', 'knight-of-cups', 'queen-of-cups', 'king-of-cups', 'ace-of-wands', 'two-of-wands', 'three-of-wands', 'four-of-wands', 'five-of-wands', 'six-of-wands', 'seven-of-wands', 'eight-of-wands', 'nine-of-wands', 'ten-of-wands', 'page-of-wands', 'knight

In [105]:
def get_card_name(card):
    return card.find('img')['alt'].split(' - ')[0].lower().replace(' ', '-').replace('-meaning', '')

In [106]:
def get_upright_reversed(card):
    clean_text = card.find('div', class_='rte rte--indented-images').text.strip()
    upright, reversed_ = clean_text.replace('Upright: ', '').split(', Reversed: ')
    
    return upright, reversed_

In [108]:
for card in html_cards:
    name = get_card_name(card)
    
    upright, reversed_ = get_upright_reversed(card)
    
    cards_as_map[name]['upright'] = upright
    cards_as_map[name]['reversed'] = reversed_

In [109]:
cards_as_map['seven-of-wands']

{'name': 'seven-of-wands"',
 'summary': 'Buy this deck',
 'full_meaning': 'The Subject really needs to think this through; a decision has to be taken regarding this issue – should they come away from this relationship/situation? \xa0Or should they make a real effort to change their attitude and inject some enthusiasm back into it?',
 'image': 'https://www.simplytarot.com/wp-content/uploads/2015/04/seven-of-wands-tarot-card.png',
 'upright': 'perseverance, defensive, maintaining control',
 'reversed': 'give up, destroyed confidence, overwhelmed'}

In [117]:
save_json(list(cards_as_map.values()), './lib/cards.json')