In [12]:
import sqlite3

import pandas as pd

# Define the seasons I need data for
seasons = [2022, 2023]

In [2]:
from simulation_utils import build_data_by_year

with sqlite3.connect("data.db") as conn:
    cursor = conn.cursor()

    # Get the data for the database
    df = pd.concat([build_data_by_year(year, False) for year in seasons])

    # Save the data to the database
    df.to_sql("football_data_season_results", conn, if_exists="fail", index=False)

    conn.commit()

df

724650000.0


Unnamed: 0,utc_date,season,status,matchday,home,away,home_score,away_score,home_outcome,away_outcome
0,2022-08-05 19:00:00+00:00,2022,FINISHED,1,Crystal Palace FC,Arsenal FC,0,2,0,3
1,2022-08-06 11:30:00+00:00,2022,FINISHED,1,Fulham FC,Liverpool FC,2,2,1,1
2,2022-08-06 14:00:00+00:00,2022,FINISHED,1,Tottenham Hotspur FC,Southampton FC,4,1,3,0
3,2022-08-06 14:00:00+00:00,2022,FINISHED,1,Newcastle United FC,Nottingham Forest FC,2,0,3,0
4,2022-08-06 14:00:00+00:00,2022,FINISHED,1,Leeds United FC,Wolverhampton Wanderers FC,2,1,3,0
...,...,...,...,...,...,...,...,...,...,...
375,2024-05-19 15:00:00+00:00,2023,FINISHED,38,Crystal Palace FC,Aston Villa FC,5,0,3,0
376,2024-05-19 15:00:00+00:00,2023,FINISHED,38,Liverpool FC,Wolverhampton Wanderers FC,2,0,3,0
377,2024-05-19 15:00:00+00:00,2023,FINISHED,38,Luton Town FC,Fulham FC,2,4,0,3
378,2024-05-19 15:00:00+00:00,2023,FINISHED,38,Manchester City FC,West Ham United FC,3,1,3,0


In [5]:
# Get unique club names minus FC
from simulation_utils import get_club_value_at_season


clubs = df["home"].unique()

# Get club value for each season
club_values = {}

for club in clubs:
    club_values[club] = [
        {
            "season": season,
            "value": get_club_value_at_season(club, season)
        }
        for season in seasons
    ]

# Flatten the dictionary
flattened_data = []

for club, values in club_values.items():
    for value in values:
        flattened_data.append({
            "club": club,
            "season": value["season"],
            "value": value["value"]
        })

# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(flattened_data)

with sqlite3.connect("data.db") as conn:
    cursor = conn.cursor()

    # Save the data to the database
    df.to_sql("transfermarkt_club_values", conn, if_exists="fail", index=False)

    conn.commit()

df

Unnamed: 0,club,season,value
0,Crystal Palace FC,2022,191950000.0
1,Crystal Palace FC,2023,441700000.0
2,Fulham FC,2022,225600000.0
3,Fulham FC,2023,324300000.0
4,Tottenham Hotspur FC,2022,454800000.0
5,Tottenham Hotspur FC,2023,793300000.0
6,Newcastle United FC,2022,541600000.0
7,Newcastle United FC,2023,648950000.0
8,Leeds United FC,2022,280150000.0
9,Leeds United FC,2023,214000000.0


In [17]:
import requests
import bs4

# Get the data from the website
url = "https://en.wikipedia.org/wiki/List_of_Premier_League_managers"
response = requests.get(url)
soup = bs4.BeautifulSoup(response.text, "html.parser")

# Get the table with classes wikitable sortable plainrowheaders jquery-tablesorter
table = soup.find("table", {"class": "wikitable sortable plainrowheaders"})

# Get the rows from the table
rows = table.find_all("tr")
rows = rows[1:]

# Get the data from the rows
data = []

for row in rows:
    cells = row.find_all(["th", "td"])
    cells = [cell.text.strip() for cell in cells if cell.text.strip()][:-1]
    data.append(cells)

# Convert the data to a DataFrame
df = pd.DataFrame(data, columns=["manager", "club", "start", "end", "duration_days", "years"])

# Add columns for keys
df["incumbent"] = df["manager"].apply(lambda x: True if str(x).endswith(" †") else False)
df["caretaker"] = df["manager"].apply(lambda x: True if str(x).endswith(" ‡") else False)
df["incumbent_not_in_league"] = df["manager"].apply(lambda x: True if str(x).endswith(" §") else False)

# Remove the symbols from the manager names
df["manager"] = df["manager"].str.replace(" †", "")
df["manager"] = df["manager"].str.replace(" ‡", "")
df["manager"] = df["manager"].str.replace(" §", "")

# Convert the start and end dates to datetime
df["start"] = pd.to_datetime(df["start"], errors="coerce")
df["end"] = pd.to_datetime(df["end"], errors="coerce")

# Save the data to the database
with sqlite3.connect("data.db") as conn:
    cursor = conn.cursor()

    # Save the data to the database
    df.to_sql("premier_league_managers", conn, if_exists="fail", index=False)

    conn.commit()

df

Unnamed: 0,manager,club,start,end,duration_days,years,incumbent,caretaker,incumbent_not_in_league
0,George Graham,Arsenal,1986-05-14,1995-02-21,3205,1992–1995,False,False,False
1,Stewart Houston,Arsenal,1995-02-22,1995-06-08,106,1995,False,True,False
2,Bruce Rioch,Arsenal,1995-06-08,1996-08-12,431,1995–1996,False,False,False
3,Stewart Houston,Arsenal,1996-08-12,1996-09-13,32,1996,False,True,False
4,Pat Rice,Arsenal,1996-09-13,1996-09-30,17,1996,False,True,False
...,...,...,...,...,...,...,...,...,...
469,Nuno Espírito Santo,Wolverhampton Wanderers,2017-05-31,2021-05-23,1453,2018–2021,False,False,False
470,Bruno Lage,Wolverhampton Wanderers,2021-06-09,2022-10-02,1104,,False,False,False
471,Steve Davis,Wolverhampton Wanderers,2022-10-03,2022-11-13,41,,False,True,False
472,Julen Lopetegui,Wolverhampton Wanderers,2022-11-14,2023-08-08,267,,False,False,False
