In [81]:
import requests
from bs4 import BeautifulSoup
from fake_headers import Headers

In [105]:
SIGNS = ['aries', 'taurus', 'gemini', 'cancer', 'leo', 'virgo', 'libra', 'scorpio', 'sagittarius', 'capricorn', 'aquarius', 'pisces']

SOURCES_URLS_MAP = {
    '1': 'https://www.horoscope.com/us/horoscopes/general/horoscope-general-daily-today.aspx?sign=',
    '2': ['https://cafeastrology.com/', 'dailyhoroscope.html'],
    '3': ['https://www.dailyhoroscope.com/horoscopes/daily/', '?full=true'],
    '4': 'https://www.washingtonpost.com/horoscopes/',
    '5': ['https://www.elle.com/horoscopes/daily/a','-daily-horoscope/']
}

In [106]:
def generate_urls(source_id:str) -> list:
    '''Returns list of generated urls for each source'''    
    urls = list()
    if  source_id == '1':
        for n in range(1, 13):
            urls.append(SOURCES_URLS_MAP[source_id] + str(n))
    elif (source_id == '2') | (source_id == '3'):
        url_first_part = SOURCES_URLS_MAP[source_id][0]
        url_second_part = SOURCES_URLS_MAP[source_id][1]
        for sign in SIGNS:
            urls.append(url_first_part + sign + url_second_part)
    elif source_id == '4':
        for sign in SIGNS:
            urls.append(SOURCES_URLS_MAP[source_id] + sign)
    elif source_id == '5':
        url_first_part = SOURCES_URLS_MAP[source_id][0]
        url_last_part = SOURCES_URLS_MAP[source_id][1]
        url_middle_parts = ['60/'] + [f'{str(n)}/' for n in range(98,109)]
        for sign, middle_part in zip(SIGNS, url_middle_parts):
            urls.append(url_first_part + middle_part + sign + url_last_part)
    else:
        raise ValueError('Parameter "source_id" should be string in range 1-5.')
    return urls

In [134]:
def get_horoscope_text(page_code:str, source_id:str) -> str:
    """Returns horoscope text from html page code"""
    try:
        soup = BeautifulSoup(page_code, features="html.parser")
        if  source_id == '1':
            paragraph = soup.select('.main-horoscope p')
            text = paragraph[0].text.split('-')[1].strip()

        elif source_id == '2':
            paragraph = soup.select('.entry-content p')[3]
            text = paragraph.text.strip()
        elif source_id == '3':
            paragraph = soup.select('p.body')[0]
            text = preprocess_text_source_3(text:str).split('-')[1]
        elif source_id == '4':
            pass
        elif source_id == '5':
            pass
        return text
    except Exception:
        return None 

In [85]:
def get_html_page(url:str) -> str:
    """Returns html code of a page for given URL"""
    headers = Headers().generate()
    response = requests.get(url, headers=headers)
    
    if response.status_code == 200:
        return response.text
    return None

In [107]:
test_urls = generate_urls('3')

In [159]:
test_urls

['https://www.dailyhoroscope.com/horoscopes/daily/aries?full=true',
 'https://www.dailyhoroscope.com/horoscopes/daily/taurus?full=true',
 'https://www.dailyhoroscope.com/horoscopes/daily/gemini?full=true',
 'https://www.dailyhoroscope.com/horoscopes/daily/cancer?full=true',
 'https://www.dailyhoroscope.com/horoscopes/daily/leo?full=true',
 'https://www.dailyhoroscope.com/horoscopes/daily/virgo?full=true',
 'https://www.dailyhoroscope.com/horoscopes/daily/libra?full=true',
 'https://www.dailyhoroscope.com/horoscopes/daily/scorpio?full=true',
 'https://www.dailyhoroscope.com/horoscopes/daily/sagittarius?full=true',
 'https://www.dailyhoroscope.com/horoscopes/daily/capricorn?full=true',
 'https://www.dailyhoroscope.com/horoscopes/daily/aquarius?full=true',
 'https://www.dailyhoroscope.com/horoscopes/daily/pisces?full=true']

In [160]:
test_urls[4]

'https://www.dailyhoroscope.com/horoscopes/daily/leo?full=true'

In [180]:
from datetime import date

In [182]:
date.today().year

2023

In [150]:
horoscopes_dict = dict()

In [167]:
for n in range(0, 12):
    test_page_code = get_html_page(test_urls[n])
    soup = BeautifulSoup(test_page_code, features="html.parser")
    horoscopes_dict[str(n + 1)] = soup.select('p.body')[0].text

In [191]:
def preprocess_text_source_3(text:str) -> str:
    '''Preprocesses text from source 3 and returns only horoscope.'''
    # get rid of the link part
    text = text.split('\n')[0]
    
    # find where the current date ends in text
    year_now = date.today().year
    str_to_find = f'{year_now } - '
    index_to_cut = text.find(str_to_find) + len(str_to_find)

    return text[index_to_cut:].strip()

In [190]:
for v in horoscopes_dict.values():
    print(v)

October 23, 2023 - Your friends may look to you for emotional guidance throughout the day. They're potentially going through a crisis in some part of their life or grappling with a problem that they've been unable to figure out, and you could have the insight that will allow them to work through their struggles. Working together throughout any issues can strengthen the bond between you, since they'll see that you are there for them. Just avoid appearing to be preachy -- they might tune that right out! 
                    
                        Get your Daily Horoscope delivered to your inbox for FREE. Sign up now!
                    

October 23, 2023 - You're able to be honest about the feelings that are in your heart. Even if you've recently stumbled when releasing your emotions, it's okay. Prepare yourself for someone to ask you a question, providing an opportunity to open up and free yourself from any silent weights that have been dragging you down. Don't forget to check and en

In [192]:
for v in horoscopes_dict.values():
    print(preprocess_text_source_3(v), '\n')

Your friends may look to you for emotional guidance throughout the day. They're potentially going through a crisis in some part of their life or grappling with a problem that they've been unable to figure out, and you could have the insight that will allow them to work through their struggles. Working together throughout any issues can strengthen the bond between you, since they'll see that you are there for them. Just avoid appearing to be preachy -- they might tune that right out! 

You're able to be honest about the feelings that are in your heart. Even if you've recently stumbled when releasing your emotions, it's okay. Prepare yourself for someone to ask you a question, providing an opportunity to open up and free yourself from any silent weights that have been dragging you down. Don't forget to check and ensure they have space to hold your emotions with you for a little while! Once they give the green light, sharing is an amazing way to feel lighter. 

The wise words of a mentor 

In [143]:
text = get_horoscope_text(test_page_code, '3')

In [144]:
paragraph = soup.select('p.body')[0]
paragraph

<p class="body">October 21, 2023 - How you relate to others could use some tempering. Someone might be rubbing you the wrong way, possibly by engaging you in a bad-faith argument or blocking whatever you're trying to accomplish. While this would be understandably frustrating, it is likely that this person will have some kind of authority over you as well, or you may want to impress them. Brace yourself to grin and bear it rather than engaging them in the way your fiery heart might want to. Stay cool! 
                    <a class="upsell" data-category="text promo message" data-ga-event="" data-info="Promo_Text:Ari:DH:LO:Signup" data-label="Ari:DH:LO:Signup" onclick="open_modal('signup_modal', 'page-signup-text');">
                        Get your Daily Horoscope delivered to your inbox for FREE. Sign up now!
                    </a>
</p>

In [145]:
soup = BeautifulSoup(test_page_code, features="html.parser")
soup


<!DOCTYPE html>

<html class="no-js" lang="en">
<head>
<meta charset="utf-8"/>
<title>
            Aries Daily Horoscope | DailyHoroscope.com
        </title>
<meta content="Aries Daily Horoscope | DailyHoroscope.com" property="og:title"/>
<link href="https://gfx.tarot.com/images/dailyhoroscope.com/favicon.ico" rel="shortcut icon" type="image/x-icon"/>
<link href="https://www.dailyhoroscope.com/horoscopes/daily/aries" rel="canonical">
<meta content="www.dailyhoroscope.com" property="og:site_name"/>
<meta content="https://www.dailyhoroscope.com/horoscopes/daily/aries" property="og:url"/>
<meta content="What’s happening in the stars today, Aries? Check out your daily general horoscope to tune in to the day’s astrology and know what you can expect." name="description"/>
<meta content="What’s happening in the stars today, Aries? Check out your daily general horoscope to tune in to the day’s astrology and know what you can expect." property="og:description"/>
<meta content="aries daily hor

In [148]:
soup.select('p.body')[0].text

"October 21, 2023 - How you relate to others could use some tempering. Someone might be rubbing you the wrong way, possibly by engaging you in a bad-faith argument or blocking whatever you're trying to accomplish. While this would be understandably frustrating, it is likely that this person will have some kind of authority over you as well, or you may want to impress them. Brace yourself to grin and bear it rather than engaging them in the way your fiery heart might want to. Stay cool! \n                    \n                        Get your Daily Horoscope delivered to your inbox for FREE. Sign up now!\n                    \n"