In [81]:
import requests
from bs4 import BeautifulSoup
from fake_headers import Headers

In [105]:
SIGNS = ['aries', 'taurus', 'gemini', 'cancer', 'leo', 'virgo', 'libra', 'scorpio', 'sagittarius', 'capricorn', 'aquarius', 'pisces']

SOURCES_URLS_MAP = {
    '1': 'https://www.horoscope.com/us/horoscopes/general/horoscope-general-daily-today.aspx?sign=',
    '2': ['https://cafeastrology.com/', 'dailyhoroscope.html'],
    '3': ['https://www.dailyhoroscope.com/horoscopes/daily/', '?full=true'],
    '4': 'https://www.washingtonpost.com/horoscopes/',
    '5': ['https://www.elle.com/horoscopes/daily/a','-daily-horoscope/']
}

In [106]:
def generate_urls(source_id:str) -> list:
    '''Returns list of generated urls for each source'''    
    urls = list()
    if  source_id == '1':
        for n in range(1, 13):
            urls.append(SOURCES_URLS_MAP[source_id] + str(n))
    elif (source_id == '2') | (source_id == '3'):
        url_first_part = SOURCES_URLS_MAP[source_id][0]
        url_second_part = SOURCES_URLS_MAP[source_id][1]
        for sign in SIGNS:
            urls.append(url_first_part + sign + url_second_part)
    elif source_id == '4':
        for sign in SIGNS:
            urls.append(SOURCES_URLS_MAP[source_id] + sign)
    elif source_id == '5':
        url_first_part = SOURCES_URLS_MAP[source_id][0]
        url_last_part = SOURCES_URLS_MAP[source_id][1]
        url_middle_parts = ['60/'] + [f'{str(n)}/' for n in range(98,109)]
        for sign, middle_part in zip(SIGNS, url_middle_parts):
            urls.append(url_first_part + middle_part + sign + url_last_part)
    else:
        raise ValueError('Parameter "source_id" should be string in range 1-5.')
    return urls

In [193]:
def get_horoscope_text(page_code:str, source_id:str) -> str:
    """Returns horoscope text from html page code"""

    # css selectors for each source
    SOURCES_SELECTORS_MAP = {
        '1': '.main-horoscope p',
        '2': '.entry-content p',
        '3': 'p.body',
        '4': 'article[data-qa="main"] p.mt-sm',
        '5': '#main-content > div > .article-container \
             > div[data-journey-body="standard-article"] > p'
    }
    
    try:
        soup = BeautifulSoup(page_code, features="html.parser")
        paragraph = soup.select(SOURCES_SELECTORS_MAP[source_id])
        
        if source_id == '2':
            text = paragraph[3].text.strip()
        elif (source_id == '1') | (source_id == '3'):
            text = preprocess_soup(paragraph[0].text)
        elif (source_id == '4') | (source_id == '5'):
            text = paragraph[0].text.strip()
        return text
    
    except Exception as err:
        print(f'Unexpected {err}=, {type(err)}=')
        return None 

In [225]:
def get_horoscope_text(source_id:str) -> str:
    """Returns horoscope text from html page code"""

    # css selectors for each source
    SOURCE_1_SELECTOR = '.main-horoscope p'
    SOURCE_2_SELECTOR = '.entry-content p'
    SOURCE_3_SELECTOR = 'p.body'
    SOURCE_4_SELECTOR = 'article[data-qa="main"] p.mt-sm'
    SOURCE_5_SELECTOR = '#main-content > div > .article-container \
                         > div[data-journey-body="standard-article"] > p'

    print(locals()[f'SOURCE_{source_id}_SELECTOR'])

In [226]:
get_horoscope_text('1')

.main-horoscope p


In [85]:
def get_html_page(url:str) -> str:
    """Returns html code of a page for given URL"""
    headers = Headers().generate()
    response = requests.get(url, headers=headers)
    
    if response.status_code == 200:
        return response.text
    return None

In [227]:
test_urls = generate_urls('1')

In [228]:
test_urls

['https://www.horoscope.com/us/horoscopes/general/horoscope-general-daily-today.aspx?sign=1',
 'https://www.horoscope.com/us/horoscopes/general/horoscope-general-daily-today.aspx?sign=2',
 'https://www.horoscope.com/us/horoscopes/general/horoscope-general-daily-today.aspx?sign=3',
 'https://www.horoscope.com/us/horoscopes/general/horoscope-general-daily-today.aspx?sign=4',
 'https://www.horoscope.com/us/horoscopes/general/horoscope-general-daily-today.aspx?sign=5',
 'https://www.horoscope.com/us/horoscopes/general/horoscope-general-daily-today.aspx?sign=6',
 'https://www.horoscope.com/us/horoscopes/general/horoscope-general-daily-today.aspx?sign=7',
 'https://www.horoscope.com/us/horoscopes/general/horoscope-general-daily-today.aspx?sign=8',
 'https://www.horoscope.com/us/horoscopes/general/horoscope-general-daily-today.aspx?sign=9',
 'https://www.horoscope.com/us/horoscopes/general/horoscope-general-daily-today.aspx?sign=10',
 'https://www.horoscope.com/us/horoscopes/general/horoscope

In [229]:
test_page_code = get_html_page(test_urls[0])

In [230]:
test_page_code

'<!doctype html>\n<html class="no-js" lang="en">\n<head>\n<title>Aries Horoscopes: Daily & Today, Monday | Horoscope.com</title>\n<meta name="description" content="Read your free daily Aries horoscope on Horoscope.com. Find out what the planets have predicted for your day!">\n<meta property="og:url" content="https://www.horoscope.com/us/horoscopes/general/horoscope-general-daily-today.aspx?sign=1&ladate=20231023">\n<meta property="og:title" content="Aries Horoscopes: Daily & Today, Monday: Oct 23, 2023">\n<meta property="og:description" content="Your well-considered actions will be effective today. There\'s strength in your mental power. You shouldn\'t hesitate to direct others toward avenues...">\n<meta property="og:image" content="http://www.horoscope.com/images-US/signs/profile-1.jpg">\n<meta property="caption" content="Oct 23, 2023">\n<meta property="article:modified_time" content="2023-10-23T00:00:00">\n<meta charset="utf-8">\n<meta http-equiv="X-UA-Compatible" content="IE=edge">\

In [238]:
preprocess_text_source_3(soup.select('.main-horoscope p')[0].text)

["Your well-considered actions will be effective today. There's strength in your mental power. You shouldn't hesitate to direct others toward avenues that would be good for them. You have the vision to see things that other people might not notice. People may not be able to see something in front of them even if they're about to crash into it!"]

In [202]:
horoscopes_dict = dict()

In [222]:
horoscopes_dict


{'1': 'If you’re craving one-on-one time with someone who’s important to you, find some whitespace in your calendar and make concrete plans. With the radiant Sun diving into Scorpio and your intimate, intense eighth house today, you’re due to pump up the passion in your life during the month ahead. Focus your undivided attention on your S.O. and open up so you can grow closer. If you’re unattached, make a promise to yourself that you won’t settle for a “meh” connection. You deserve better, Ram!',
 '2': 'Shift your emphasis from “me” to “we” starting today when the vibrant Sun sails into Scorpio and your relationship zone, setting up shop there until November 22. If you’re already coupled up, prioritize your connection in these next four weeks, especially if you haven’t been nurturing it enough lately. Don’t assume you know everything about each other! Treating your partner like an intriguing stranger can launch a whole new discovery process. Unattached? Be open to unexpected encounters

In [221]:
for n in range(0, 12):
    test_page_code = get_html_page(test_urls[n])
    soup = BeautifulSoup(test_page_code, features="html.parser")
    horoscopes_dict[str(n + 1)] = soup.select('#main-content > div > .article-container > div[data-journey-body="standard-article"] > p')[0].text

In [143]:
text = get_horoscope_text(test_page_code, '3')

In [240]:
bool('')

False

In [231]:
soup = BeautifulSoup(test_page_code, features="html.parser")
soup

<!DOCTYPE html>

<html class="no-js" lang="en">
<head>
<title>Aries Horoscopes: Daily &amp; Today, Monday | Horoscope.com</title>
<meta content="Read your free daily Aries horoscope on Horoscope.com. Find out what the planets have predicted for your day!" name="description"/>
<meta content="https://www.horoscope.com/us/horoscopes/general/horoscope-general-daily-today.aspx?sign=1&amp;ladate=20231023" property="og:url"/>
<meta content="Aries Horoscopes: Daily &amp; Today, Monday: Oct 23, 2023" property="og:title"/>
<meta content="Your well-considered actions will be effective today. There's strength in your mental power. You shouldn't hesitate to direct others toward avenues..." property="og:description"/>
<meta content="http://www.horoscope.com/images-US/signs/profile-1.jpg" property="og:image"/>
<meta content="Oct 23, 2023" property="caption"/>
<meta content="2023-10-23T00:00:00" property="article:modified_time"/>
<meta charset="utf-8"/>
<meta content="IE=edge" http-equiv="X-UA-Compati

In [220]:
soup.select('#main-content > div > .article-container > div[data-journey-body="standard-article"] > p')

[<p class="css-106f026 et3p2gv0" data-journey-content="true" data-node-id="3">If you’re craving one-on-one time with someone who’s important to you, find some whitespace in your calendar and make concrete plans. With the radiant Sun diving into Scorpio and your intimate, intense eighth house today, you’re due to pump up the passion in your life during the month ahead. Focus your undivided attention on your S.O. and open up so you can grow closer. If you’re unattached, make a promise to yourself that you won’t settle for a “meh” connection. You deserve better, Ram!</p>,
 <p class="css-106f026 et3p2gv0" data-journey-content="true" data-node-id="5"><a class="css-d2yypp et3p2gv0" href="/horoscopes/">See All Signs</a></p>]