Notebook scrapes fixture data from: https://www.bbc.com/sport/football/scores-fixtures

In [1]:
import requests
from bs4 import BeautifulSoup
from datetime import date as mydate
from datetime import datetime as mydatetime
import os, pytz, datetime, re
import time as mytime

In [2]:
def swap_positions(list, pos1, pos2):

    """
    Function to swap item positions in a list.

    Called later
    """

    list[pos1], list[pos2] = list[pos2], list[pos1]

In [3]:
def clean_data(list):

    """
    Changing all instances of 'Premier League' to 'English Premier League' for better consistency.
    Also chops away all unnecessary string data.

    Called later
    """

    prem_header = ">Premier League</h3>"
    EPL_header = ">English Premier League</h3>"
    prem_span = "$0Premier League"
    EPL_span = "$0English Premier League"

    for indx, item in enumerate(list):
        if prem_header in item:
            list[indx] = list[indx].replace(prem_header, EPL_header)
        elif prem_span in item:
            list[indx] = list[indx].replace(prem_span, EPL_span)
        else:
            item

    leagues = (['English Premier League', 'Spanish La Liga',  'German Bundesliga',  'Italian Serie A',
            'French Ligue 1', 'Champions League'])

    list = [i[-145:] for i in list]
    left, right = '">', '</'
    list = [[l[l.index(left)+len(left):l.index(right)] for l in list if i in l] for i in leagues]

    return list

In [4]:
def home_and_away(list):

    """
    For games that haven't occured yet, our scraper will return Home Team, Away Team, and game time.
    There will be an empty spot '' where our scraper tried to scrape the minute the game is in, but since
    the game has yet to start it is empty.

    This function fills the blank space with an (H) to signify home team, then creates a new blank space
    and fills it with an (A) to signify away team, and re-orders the list so it reads:

    'Home Team, (H), Away Team, (A), Game time'

    Called later
    """

    for i in list:
        while '' in i:
            swap_positions(i, i.index(''), i.index('') - 2)
            blank = i.index('')
            blank_2 = i.index('') + 2
            i[blank] = '(H)'
            i.insert(blank_2, '(A)')


In [5]:
def choose_date():

    """
    User inputs the date they would like to check
    If input is in the wrong format, user is prompted to try it again
    """

    print_once = True
    while print_once:

        print(' ')
        date_to_look = input('Enter a date (YYYY-MM-DD) to view the matches in your selected leagues: ')

        match = re.match("[0-9]{4}-[0-9]{2}-[0-9]{2}", date_to_look)
        is_match = bool(match) # Check if date was entered wrong

        if is_match == False:
            os.system("clear")
            print("Invalid entry. Make sure your date is entered in ('YYYY-MM-DD') format: ")
            continue

        year, month, day = (int(x) for x in date_to_look.split('-'))
        ans = datetime.date(year, month, day)

        print(' ')
        print('-'*100)
        print('-'*100)
        print(' ')
        print('Matchups in the following leagues for {}, {} {}, {}:'.format(ans.strftime("%A"),
                                                                              ans.strftime("%B"),
                                                                              ans.strftime("%d"),
                                                                                  ans.strftime("%Y")))
        print(' ')

        print_once = False

    return str(date_to_look)

In [6]:
def scraping():

    """
    Web scraping code
    """

    url = "https://www.bbc.com/sport/football/scores-fixtures/" + date_to_choose

    html_content = requests.get(url).text

    soup = BeautifulSoup(html_content, "html.parser")

    tags = ["span", "h3"]
    classes = (["gs-u-display-none gs-u-display-block@m qa-full-team-name sp-c-fixture__team-name-trunc",
                  "sp-c-fixture__status-wrapper qa-sp-fixture-status",
                  'sp-c-fixture__number sp-c-fixture__number--time', "sp-c-fixture__number sp-c-fixture__number--home",
                  "sp-c-fixture__number sp-c-fixture__number--home sp-c-fixture__number--ft",
                 "sp-c-fixture__number sp-c-fixture__number--home sp-c-fixture__number--live-sport",
                  "sp-c-fixture__number sp-c-fixture__number--away sp-c-fixture__number--live-sport",
                 "sp-c-fixture__number sp-c-fixture__number--away sp-c-fixture__number--ft",
                  'gel-minion sp-c-match-list-heading'])

    scraper = soup.find_all(tags, attrs={'class': classes})
    data = [str(l) for l in scraper]

    data = clean_data(data) # Functiom call
    home_and_away(data)     # Function call

    data = [l for l in data if len(l) != 0]

    return data

In [7]:
def change_time():

    """
    Alters match-time from UK time (site gives games in UK time) to whatever the local time is
    by detecting users timezone automatically
    """

    data = scraping() # Function call

    curr_time = mytime.localtime()
    curr_clock = mytime.strftime("%Y:%m:%d %H:%M:%S %Z %z", curr_time)

    IST = pytz.timezone('Europe/London')
    datetime_ist = mydatetime.now(IST)
    london = datetime_ist.strftime("%Y:%m:%d %H:%M:%S %Z %z")

    curr_hour, curr_min = curr_clock[-5:-2], curr_clock[14:16]
    lndn_hour, lndn_min = london[-5:-2], london[14:16]

    # Comparing time difference between London and user's local time
    hour_diff = int(lndn_hour) - int(curr_hour)
    min_diff = int(lndn_min) - int(curr_min)

    if min_diff == 0:
        min_diff = str(min_diff) + '0'

    for k in data:
        for indx, item in enumerate(k):

            if ":" in item:

                if min_diff == '00': # If there is no minute difference, change hours and keep minutes the same
                    val = str(int(item[:item.index(":")]) - hour_diff) + item[item.index(":"):]

                if min_diff != '00': # If there is a minutes difference, change hours and minutes
                    val = str(int(item[:item.index(":")]) - hour_diff) + ":" + str(abs(min_diff) + int(item[item.index(":") + 1:]))

                if int(val[val.index(":") + 1:]) >= 60:
                    # If the new 'minutes' value is >= 60, add 1 to the hour value and subtract 60 from the minutes
                    val = str(int(val[:val.index(":")]) + 1) + ":" + str(int(val[val.index(":") + 1:]) - 60)

                if int(val[:val.index(":")]) >= 24:
                    # If the new hours value is >= 24, subtract 24 from the hours and add a '+1' to the end
                    # to signify game is taking place the following day
                    val = "0" + str(int(item[:item.index(":")]) -24) + ":" + str(int(item[item.index(":") + 1:])) + " +1"

                if val[val.index(":") + 1:] == '0':
                    val = i + '0' # Add a second '0' to minutes value is there is only one

                try:
                    # If minutes value is between 1-9, add a '0' so that it reads '11:07' rather than
                    # '11:7', for example
                    if int(val[val.index(":") + 1:]) < 10 and int(val[val.index(":") + 1:]) > 0:
                        colon = val.find(":")
                        val = val[:colon + 1] + '0' + val[colon + 1:]
                except ValueError:
                        k[indx] = val
                        continue
                k[indx] = val

    data = [[i.replace('&amp;', '&') for i in group] for group in data] # Brighton & Hove Albion problem

    return data

In [8]:
def final_print():

    """
    Final print function

    If user presses Enter while in terminal the scores will refresh without the user needing to enter
    the date to search again. This way it can be called once during matchdays and work throughout the day
    """

    refresh = ''

    while refresh == '':

        ct = 0
        league_in = 0
        h_team, h_score, a_team, a_score, time = 1, 2, 3, 4, 5

        data = change_time()

        no_games = all(len(l) == 0 for l in data)
        if (no_games): # If all the lists are empty
            print('NO GAMES ON THIS DATE')
            break

        for i in data:

            print(i[0])
            print('-'*25)

            while ct < len(data[league_in][1:]) // 5:
                print("{:<25} {:^5} {:<25} {:^3} | {:>7}".format(i[h_team], i[h_score], i[a_team], i[a_score], i[time]))
                ct += 1
                h_team += 5
                h_score += 5
                a_team += 5
                a_score += 5
                time += 5

            print(' ')
            league_in += 1
            ct, h_team, h_score, a_team, a_score, time = 0, 1, 2, 3, 4, 5

        refresh = input('Press "Enter" to refresh the page: ')
        os.system("clear")


In [10]:
if __name__ == "__main__":
    date_to_choose = choose_date()
    final_print()

 
Enter a date (YYYY-MM-DD) to view the matches in your selected leagues: 17-09-2023
Invalid entry. Make sure your date is entered in ('YYYY-MM-DD') format: 
 
Enter a date (YYYY-MM-DD) to view the matches in your selected leagues: 2023-09-17
 
----------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------
 
Matchups in the following leagues for Sunday, September 17, 2023:
 
English Premier League
-------------------------
AFC Bournemouth            (H)  Chelsea                   (A) |   13:00
Everton                    (H)  Arsenal                   (A) |   15:30
 
Spanish La Liga
-------------------------
Getafe                     (H)  Osasuna                   (A) |   12:00
Villarreal                 (H)  Almería                   (A) |   14:15
Sevilla                    (H)  Las Palmas                (A) |   16:30
Real Madrid                (H)

KeyboardInterrupt: ignored

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta

# Function to scrape fixtures for a specific date
def scrape_fixtures(date):
    url = f"https://www.bbc.com/sport/football/teams/liverpool/fixtures?date={date.strftime('%Y-%m-%d')}"
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")
        fixtures_section = soup.find('div', class_='qa-fixtures')
        fixtures = fixtures_section.find_all('div', class_='qa-match-link')

        fixtures_list = []
        for fixture in fixtures:
            date = fixture.find('span', class_='sp-c-fixture__datetime').text.strip()
            teams = fixture.find('span', class_='sp-c-fixture__teams').text.strip()
            fixtures_list.append({'Date': date, 'Teams': teams})

        return fixtures_list
    else:
        return None

# Initialize an empty DataFrame to store fixtures
fixtures_df = pd.DataFrame(columns=['Date', 'Teams'])

# Define the start and end dates for scraping (2 months in the future)
start_date = datetime.now()
end_date = start_date + timedelta(days=60)

# Iterate through each date and scrape fixtures
current_date = start_date
while current_date <= end_date:
    fixtures = scrape_fixtures(current_date)
    if fixtures:
        fixtures_df = fixtures_df.append(fixtures, ignore_index=True)
    current_date += timedelta(days=1)

# Filter fixtures for Liverpool FC
liverpool_fixtures = fixtures_df[fixtures_df['Teams'].str.contains('Liverpool', case=False)]

# Print the Liverpool FC fixtures
print(liverpool_fixtures)


In [14]:
import requests
from datetime import datetime, timedelta

# Replace with your Football Data API key
api_key = "75ed82196f4f4224b11bf55cd8034ab0"

# Define the start and end dates for fetching fixtures (2 months in the future)
start_date = datetime.now()
end_date = start_date + timedelta(days=60)

# Initialize an empty list to store fixtures
fixtures = []

# Iterate through each day and fetch fixtures
current_date = start_date
while current_date <= end_date:
    date_str = current_date.strftime("%Y-%m-%d")
    url = f"https://api.football-data.org/v2/teams/64/matches?dateFrom={date_str}&dateTo={date_str}"

    headers = {
        "X-Auth-Token": api_key,
    }

    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        data = response.json()
        fixtures.extend(data.get("matches", []))

    current_date += timedelta(days=1)

# Filter fixtures for Liverpool FC
liverpool_fixtures = [fixture for fixture in fixtures if "Liverpool" in fixture.get("homeTeam", {}).get("name", "") or "Liverpool" in fixture.get("awayTeam", {}).get("name", "")]

# Print the Liverpool FC fixtures
for fixture in liverpool_fixtures:
    home_team = fixture.get("homeTeam", {}).get("name", "")
    away_team = fixture.get("awayTeam", {}).get("name", "")
    date = fixture.get("utcDate", "")
    print(f"{date}: {home_team} vs {away_team}")


In [16]:
import requests
from datetime import datetime, timedelta
import time

# Replace with your Football Data API key
api_key = "75ed82196f4f4224b11bf55cd8034ab0"

# Define the start and end dates for fetching fixtures (2 months in the future)
start_date = datetime.now()
end_date = start_date + timedelta(days=60)

# Initialize an empty list to store fixtures
fixtures = []

# Iterate through each day and fetch fixtures
current_date = start_date
while current_date <= end_date:
    date_str = current_date.strftime("%Y-%m-%d")
    url = f"https://api.football-data.org/v2/teams/64/matches?dateFrom={date_str}&dateTo={date_str}"

    headers = {
        "X-Auth-Token": api_key,
    }

    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        data = response.json()
        fixtures.extend(data.get("matches", []))
    else:
        print(f"Error: {response.status_code} - {response.text}")

    current_date += timedelta(days=1)
    time.sleep(5)  # Add a delay of 5 seconds between requests (adjust as needed)

# Filter fixtures for Liverpool FC
liverpool_fixtures = [fixture for fixture in fixtures if "Liverpool" in fixture.get("homeTeam", {}).get("name", "") or "Liverpool" in fixture.get("awayTeam", {}).get("name", "")]

# Print the Liverpool FC fixtures
for fixture in liverpool_fixtures:
    home_team = fixture.get("homeTeam", {}).get("name", "")
    away_team = fixture.get("awayTeam", {}).get("name", "")
    date = fixture.get("utcDate", "")
    print(f"{date}: {home_team} vs {away_team}")


2023-09-16T11:30:00Z: Wolverhampton Wanderers FC vs Liverpool FC
2023-09-24T13:00:00Z: Liverpool FC vs West Ham United FC
2023-09-30T16:30:00Z: Tottenham Hotspur FC vs Liverpool FC
2023-10-08T13:00:00Z: Brighton & Hove Albion FC vs Liverpool FC
2023-10-21T11:30:00Z: Liverpool FC vs Everton FC
2023-10-29T14:00:00Z: Liverpool FC vs Nottingham Forest FC
2023-11-04T00:00:00Z: Luton Town FC vs Liverpool FC


In [18]:
import requests

# Define the team ID for Liverpool FC on TheSportsDB
team_id = '133612'

# Define the API endpoint
url = f'https://www.thesportsdb.com/api/v1/json/1/eventsnext.php?id={team_id}'

# Send a GET request to fetch fixture data
response = requests.get(url)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    data = response.json()
    events = data.get('events', [])

    # Print the upcoming fixtures
    for event in events:
        date = event.get('dateEvent', '')
        home_team = event.get('strHomeTeam', '')
        away_team = event.get('strAwayTeam', '')

        print(f'{date}: {home_team} vs {away_team}')
else:
    print('Failed to fetch data from TheSportsDB.')



Failed to fetch data from TheSportsDB.


In [19]:
pip install icalendar


Collecting icalendar
  Downloading icalendar-5.0.7-py3-none-any.whl (99 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/99.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━[0m [32m92.2/99.8 kB[0m [31m2.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.8/99.8 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: icalendar
Successfully installed icalendar-5.0.7


In [22]:
import requests
from icalendar import Calendar
import pandas as pd

# Define the iCalendar URL
ics_url = "https://pub.fotmob.com/prod/pub/api/v2/calendar/team/8650.ics"

# Fetch the iCalendar data
response = requests.get(ics_url)

if response.status_code == 200:
    # Parse the iCalendar data
    cal = Calendar.from_ical(response.text)

    # Initialize lists to store event data
    dates = []
    summaries = []

    # Extract fixture data
    for event in cal.walk('vevent'):
        dates.append(event['dtstart'].dt)
        summaries.append(event['summary'])

    # Create a DataFrame from the extracted data
    df = pd.DataFrame({'Date': dates, 'Fixture': summaries})

    # Print the DataFrame
    print(df)
else:
    print('Failed to fetch data from the iCalendar URL.')


                        Date                                     Fixture
0  2023-07-19 16:35:00+00:00     â½ï¸ Karlsruher SC - Liverpool  (2-4)
1  2023-07-24 11:20:00+00:00   â½ï¸ Greuther FÃ¼rth - Liverpool  (4-4)
2  2023-07-30 09:00:00+00:00    â½ï¸ Leicester City - Liverpool  (0-4)
3  2023-08-02 11:30:00+00:00   â½ï¸ Bayern MÃ¼nchen - Liverpool  (4-3)
4  2023-08-07 18:15:00+00:00      â½ï¸ Liverpool - SV Darmstadt  (3-1)
5  2023-08-13 15:30:00+00:00           â½ï¸ Chelsea - Liverpool  (1-1)
6  2023-08-19 14:00:00+00:00   â½ï¸ Liverpool - AFC Bournemouth  (3-1)
7  2023-08-27 15:30:00+00:00  â½ï¸ Newcastle United - Liverpool  (1-2)
8  2023-09-03 13:00:00+00:00       â½ï¸ Liverpool - Aston Villa  (3-0)
9  2023-09-16 11:30:00+00:00  â½ï¸ Wolverhampton Wanderers - Liverpool
10 2023-09-21 16:45:00+00:00                     â½ï¸ LASK - Liverpool
11 2023-09-24 13:00:00+00:00          â½ï¸ Liverpool - West Ham United
12 2023-09-26 18:45:00+00:00           â½ï¸ Liver

In [23]:
import requests
from icalendar import Calendar
import pandas as pd
import re

# Define the iCalendar URL
ics_url = "https://pub.fotmob.com/prod/pub/api/v2/calendar/team/8650.ics"

# Fetch the iCalendar data
response = requests.get(ics_url)

if response.status_code == 200:
    # Parse the iCalendar data
    cal = Calendar.from_ical(response.text)

    # Initialize lists to store event data
    years = []
    months = []
    days = []
    full_dates = []
    kickoff_times = []
    home_teams = []
    away_teams = []
    scores = []

    # Extract fixture data
    for event in cal.walk('vevent'):
        dtstart = event['dtstart'].dt
        years.append(dtstart.year)
        months.append(dtstart.month)
        days.append(dtstart.day)
        full_dates.append(dtstart.strftime('%Y-%m-%d'))
        kickoff_times.append(dtstart.strftime('%H:%M'))

        # Extract and clean summary (fixture information)
        summary = event['summary']
        # Remove non-English characters
        summary = re.sub(r'[^\x00-\x7F]+', '', summary)
        # Split summary into home team, away team, and score (if available)
        match_info = summary.split('-')
        if len(match_info) == 2:
            home_team, away_team = match_info
            scores.append(None)  # No score available
        elif len(match_info) == 3:
            home_team, away_team, score = match_info
            scores.append(score.strip())
        else:
            home_team, away_team = match_info
            scores.append(None)  # No score available

        home_teams.append(home_team.strip())
        away_teams.append(away_team.strip())

    # Create a DataFrame from the extracted data
    df = pd.DataFrame({
        'Year': years,
        'Month': months,
        'Day': days,
        'Full Date': full_dates,
        'Kickoff Time': kickoff_times,
        'Home Team': home_teams,
        'Away Team': away_teams,
        'Score': scores
    })

    # Print the DataFrame
    print(df)
else:
    print('Failed to fetch data from the iCalendar URL.')


    Year  Month  Day   Full Date Kickoff Time                Home Team  \
0   2023      7   19  2023-07-19        16:35            Karlsruher SC   
1   2023      7   24  2023-07-24        11:20            Greuther Frth   
2   2023      7   30  2023-07-30        09:00           Leicester City   
3   2023      8    2  2023-08-02        11:30            Bayern Mnchen   
4   2023      8    7  2023-08-07        18:15                Liverpool   
5   2023      8   13  2023-08-13        15:30                  Chelsea   
6   2023      8   19  2023-08-19        14:00                Liverpool   
7   2023      8   27  2023-08-27        15:30         Newcastle United   
8   2023      9    3  2023-09-03        13:00                Liverpool   
9   2023      9   16  2023-09-16        11:30  Wolverhampton Wanderers   
10  2023      9   21  2023-09-21        16:45                     LASK   
11  2023      9   24  2023-09-24        13:00                Liverpool   
12  2023      9   26  2023-09-26      

In [24]:
df

Unnamed: 0,Year,Month,Day,Full Date,Kickoff Time,Home Team,Away Team,Score
0,2023,7,19,2023-07-19,16:35,Karlsruher SC,Liverpool (2,4)
1,2023,7,24,2023-07-24,11:20,Greuther Frth,Liverpool (4,4)
2,2023,7,30,2023-07-30,09:00,Leicester City,Liverpool (0,4)
3,2023,8,2,2023-08-02,11:30,Bayern Mnchen,Liverpool (4,3)
4,2023,8,7,2023-08-07,18:15,Liverpool,SV Darmstadt (3,1)
5,2023,8,13,2023-08-13,15:30,Chelsea,Liverpool (1,1)
6,2023,8,19,2023-08-19,14:00,Liverpool,AFC Bournemouth (3,1)
7,2023,8,27,2023-08-27,15:30,Newcastle United,Liverpool (1,2)
8,2023,9,3,2023-09-03,13:00,Liverpool,Aston Villa (3,0)
9,2023,9,16,2023-09-16,11:30,Wolverhampton Wanderers,Liverpool,


In [25]:
import requests
from icalendar import Calendar
import pandas as pd

# Define the iCalendar URL
ics_url = "https://cdn.soticservers.net/tools/wordpress/ical/calendar.php?TeamId=126&CompId=4355,4407,4438&source=sfms&project=irfu"

# Fetch the iCalendar data
response = requests.get(ics_url)

if response.status_code == 200:
    # Parse the iCalendar data
    cal = Calendar.from_ical(response.text)

    # Initialize lists to store event data
    dates = []
    summaries = []

    # Extract fixture data
    for event in cal.walk('vevent'):
        dates.append(event['dtstart'].dt)
        summaries.append(event['summary'])

    # Create a DataFrame from the extracted data
    df = pd.DataFrame({'Date': dates, 'Fixture': summaries})

    # Print the DataFrame
    print(df)
else:
    print('Failed to fetch data from the iCalendar URL.')


                        Date                    Fixture
0  2023-08-05 19:00:00+00:00    Ireland 33 - 17 Italy, 
1  2023-08-19 16:30:00+00:00  Ireland 29 - 10 England, 
2  2023-08-26 18:45:00+00:00    Ireland 17 - 13 Samoa, 
3  2023-09-09 13:30:00+00:00        Ireland v Romania, 
4  2023-09-16 19:00:00+00:00          Ireland v Tonga, 
5  2023-09-23 19:00:00+00:00   South Africa v Ireland, 
6  2023-10-07 19:00:00+00:00       Ireland v Scotland, 
7  2024-02-02 19:00:00+00:00         France v Ireland, 
8  2024-02-11 15:00:00+00:00          Ireland v Italy, 
9  2024-02-24 14:15:00+00:00          Ireland v Wales, 
10 2024-03-09 16:45:00+00:00        England v Ireland, 
11 2024-03-16 16:45:00+00:00       Ireland v Scotland, 


In [26]:
import requests
from icalendar import Calendar
import pandas as pd
import re

# Define the iCalendar URL
ics_url = "https://cdn.soticservers.net/tools/wordpress/ical/calendar.php?TeamId=126&CompId=4355,4407,4438&source=sfms&project=irfu"

# Fetch the iCalendar data
response = requests.get(ics_url)

if response.status_code == 200:
    # Parse the iCalendar data
    cal = Calendar.from_ical(response.text)

    # Initialize lists to store event data
    years = []
    months = []
    days = []
    full_dates = []
    kickoff_times = []
    home_teams = []
    away_teams = []
    scores = []

    # Extract fixture data
    for event in cal.walk('vevent'):
        dtstart = event['dtstart'].dt
        years.append(dtstart.year)
        months.append(dtstart.month)
        days.append(dtstart.day)
        full_dates.append(dtstart.strftime('%Y-%m-%d'))

        # Extract kickoff time (if available)
        description = event.get('description')
        kickoff_time = re.search(r'Kick Off: (\d{2}:\d{2})', description) if description else None
        kickoff_times.append(kickoff_time.group(1) if kickoff_time else None)

        summary = event['summary']
        home_team, away_team = re.split(r' vs | v | - | at ', summary, flags=re.IGNORECASE)
        home_teams.append(home_team.strip())
        away_teams.append(away_team.strip())

        # Extract score (if available)
        score = re.search(r'Score: (\d+-\d+)', description) if description else None
        scores.append(score.group(1) if score else None)

    # Create a DataFrame from the extracted data
    df = pd.DataFrame({
        'Year': years,
        'Month': months,
        'Day': days,
        'Full Date': full_dates,
        'Kickoff Time': kickoff_times,
        'Home Team': home_teams,
        'Away Team': away_teams,
        'Score': scores
    })

    # Print the DataFrame
    print(df)
else:
    print('Failed to fetch data from the iCalendar URL.')


    Year  Month  Day   Full Date Kickoff Time     Home Team    Away Team Score
0   2023      8    5  2023-08-05         None    Ireland 33    17 Italy,  None
1   2023      8   19  2023-08-19         None    Ireland 29  10 England,  None
2   2023      8   26  2023-08-26         None    Ireland 17    13 Samoa,  None
3   2023      9    9  2023-09-09         None       Ireland     Romania,  None
4   2023      9   16  2023-09-16         None       Ireland       Tonga,  None
5   2023      9   23  2023-09-23         None  South Africa     Ireland,  None
6   2023     10    7  2023-10-07         None       Ireland    Scotland,  None
7   2024      2    2  2024-02-02         None        France     Ireland,  None
8   2024      2   11  2024-02-11         None       Ireland       Italy,  None
9   2024      2   24  2024-02-24         None       Ireland       Wales,  None
10  2024      3    9  2024-03-09         None       England     Ireland,  None
11  2024      3   16  2024-03-16         None       

In [27]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Define the URL of the website you want to scrape
url = "https://calendar.formula1.com/"

# Send an HTTP GET request to the URL
response = requests.get(url)

if response.status_code == 200:
    # Parse the HTML content of the page
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find and extract the relevant data from the HTML using BeautifulSoup
    # Example: You'll need to inspect the website's structure and identify the HTML elements that contain the data you need.

    # Initialize lists to store the extracted data
    years = []
    months = []
    days = []
    full_dates = []
    kickoff_times = []
    home_teams = []
    away_teams = []
    scores = []

    # Loop through the HTML elements and extract data
    # Example: Use soup.find_all() or other BeautifulSoup methods to locate data within HTML tags

    # Append the extracted data to the respective lists

    # Create a DataFrame from the extracted data
    df = pd.DataFrame({
        'Year': years,
        'Month': months,
        'Day': days,
        'Full Date': full_dates,
        'Kickoff Time': kickoff_times,
        'Home Team': home_teams,
        'Away Team': away_teams,
        'Score': scores
    })

    # Print the DataFrame
    print(df)
else:
    print('Failed to fetch data from the website.')

# Note: You will need to inspect the website's structure and update the code to target the specific HTML elements
# containing the data you need.


Empty DataFrame
Columns: [Year, Month, Day, Full Date, Kickoff Time, Home Team, Away Team, Score]
Index: []


In [31]:
import requests
import pandas as pd
from ics import Calendar

# Define the iCalendar URL
ics_url = "http://www.formula1.com/calendar/Formula_1_Official_Calendar.ics"

# Fetch the iCalendar data
response = requests.get(ics_url)

if response.status_code == 200:
    # Parse the iCalendar data
    cal = Calendar(response.text)

    # Initialize lists to store event data
    years = []
    months = []
    days = []
    full_dates = []
    kickoff_times = []
    home_teams = []
    away_teams = []
    scores = []

    # Extract fixture data
    for event in cal.events:
        start_datetime = event.begin
        years.append(start_datetime.year)
        months.append(start_datetime.month)
        days.append(start_datetime.day)
        full_dates.append(start_datetime.strftime("%Y-%m-%d"))
        kickoff_times.append(start_datetime.strftime("%H:%M:%S"))

        # Assuming the event summary is in the format "Home Team vs. Away Team (Score)"
        summary_parts = event.name.split('(')
        home_away_score = summary_parts[-1].strip(')') if len(summary_parts) > 1 else ''
        home_away_teams = summary_parts[0].strip().split('vs.')

        home_team = home_away_teams[0].strip()
        away_team = home_away_teams[1].strip() if len(home_away_teams) > 1 else ''

        home_teams.append(home_team)
        away_teams.append(away_team)
        scores.append(home_away_score)

    # Create a DataFrame from the extracted data
    df = pd.DataFrame({
        'Year': years,
        'Month': months,
        'Day': days,
        'Full Date': full_dates,
        'Kickoff Time': kickoff_times,
        'Home Team': home_teams,
        'Away Team': away_teams,
        'Score': scores
    })

    # Print the DataFrame


In [29]:
pip install ics


Collecting ics
  Downloading ics-0.7.2-py2.py3-none-any.whl (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.1/40.1 kB[0m [31m806.1 kB/s[0m eta [36m0:00:00[0m
Collecting arrow>=0.11 (from ics)
  Downloading arrow-1.2.3-py3-none-any.whl (66 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.4/66.4 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
Collecting tatsu>4.2 (from ics)
  Downloading TatSu-5.8.3-py2.py3-none-any.whl (101 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.5/101.5 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tatsu, arrow, ics
Successfully installed arrow-1.2.3 ics-0.7.2 tatsu-5.8.3


In [32]:
df

Unnamed: 0,Year,Month,Day,Full Date,Kickoff Time,Home Team,Away Team,Score
0,2023,1,1,2023-01-01,00:01:00,Time for a new F1Â® Calendar!,,


In [35]:
pip install gcalcli


Collecting gcalcli
  Downloading gcalcli-4.3.0-py2.py3-none-any.whl (36 kB)
Collecting parsedatetime (from gcalcli)
  Downloading parsedatetime-2.6-py3-none-any.whl (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.5/42.5 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: parsedatetime, gcalcli
Successfully installed gcalcli-4.3.0 parsedatetime-2.6


In [37]:
import subprocess
import pandas as pd

# Define your Google Calendar's name (you can find it on the Google Calendar web interface)
calendar_name = "https://calendar.google.com/calendar/u/0/r?cid=ekqk1nbdusr1baon1ic42oeeik@group.calendar.google.com&pli=1"

# Run gcalcli to list events from your calendar
command = f"gcalcli agenda --calendar='{calendar_name}'"
output = subprocess.check_output(command, shell=True, text=True)

# Parse the output to extract event data
lines = output.strip().split("\n")
event_data = [line.split("\t") for line in lines]

# Create a DataFrame from the extracted data
df = pd.DataFrame(event_data, columns=["Start Time", "End Time", "Event Summary"])

# Print the DataFrame
print(df)


KeyboardInterrupt: ignored

In [40]:
import subprocess
import pandas as pd
from datetime import datetime, timedelta

# Define your Google Calendar name (replace 'Your Calendar Name' with your actual calendar name)
calendar_name = 'Stephen Ryan'

# Calculate the date range (next 30 days)
today = datetime.today()
end_of_month = today + timedelta(days=30)

today_str = today.strftime('%Y-%m-%d')
end_of_month_str = end_of_month.strftime('%Y-%m-%d')

# Run gcalcli to list events from your calendar within the date range
command = f"gcalcli agenda --calendar='{calendar_name}' --tsv --details 'start,end,title' --start='{today_str}' --end='{end_of_month_str}'"
output = subprocess.check_output(command, shell=True, text=True)

# Parse the output to extract event data
events_data = [line.split('\t') for line in output.strip().split('\n')]

# Create a DataFrame
df = pd.DataFrame(events_data, columns=['Start', 'End', 'Title'])

# Print the DataFrame
print(df)


CalledProcessError: ignored

In [44]:
!gcalcli agenda --calendar='Stephen Ryan' --tsv --details 'start,end,title' --start='2023-09-04' --end='2023-10-04'


usage: gcalcli agenda
       [-h]
       [--details {calendar,location,length,reminders,description,url,conference,attendees,email,attachments,end,all}]
       [--color-owner COLOR_OWNER]
       [--color-writer COLOR_WRITER]
       [--color-reader COLOR_READER]
       [--color-freebusy COLOR_FREEBUSY]
       [--color-date COLOR_DATE]
       [--color-now-marker COLOR_NOW_MARKER]
       [--color-border COLOR_BORDER]
       [--color-title COLOR_TITLE]
       [--tsv]
       [--nostarted]
       [--nodeclined]
       [--width CAL_WIDTH]
       [--military]
       [--no-military]
       [--override-color]
       [start]
       [end]
gcalcli agenda: error: argument start: invalid get_time_from_str value: '--calendar=Stephen Ryan'


In [45]:
import requests
from ics import Calendar
import pandas as pd

# Define the URL of the ICS file
ics_url = "https://your-calendar-url.ics"  # Replace with the actual URL

# Fetch the ICS data from the URL
response = requests.get(ics_url)

if response.status_code == 200:
    # Parse the ICS data
    ics_data = response.text
    calendar = Calendar(ics_data)

    # Initialize lists to store event data
    dates = []
    summaries = []

    # Extract event data
    for event in calendar.events:
        dates.append(event.begin)
        summaries.append(event.name)

    # Create a DataFrame from the extracted data
    df = pd.DataFrame({'Date': dates, 'Summary': summaries})

    # Display the DataFrame
    print(df)
else:
    print('Failed to fetch data from the ICS URL.')


CalledProcessError: ignored