In [17]:
import sqlite3
import pandas as pd

# Relational Database Management System (RDBMS)

We chose to use an SQLite DBMS to store and manage our extracted data as it allows us to have both CSV and JSON tables that are well structured, organized, and cleaned ahead of time. The code blocks below show how we import and save all of our files into our `.db`file, and then query the database to read-in a specific table into a pandas dataframe.

## Creating the Database

In [18]:
# created out new SQLite database
conn = sqlite3.connect('../Premier_League_Data.db')

# reading-in the acquired data from the extracted CSV files 
player_stats = pd.read_csv('../data/premier_league_player_stats.csv')
future_matches = pd.read_csv('../data/premier_league_future_matches_05_01_2024.csv') 
past_matches = pd.read_csv('../data/premier_league_past_matches_05_01_2024.csv')
articles = pd.read_csv('../data/articles_premier_leagues.csv')
bets_Betclic = pd.read_csv('../data/betclic_bets_PL_matches.csv')
bets_Winamax = pd.read_csv('../data/winamax_bets_PL_matches.csv')

# writing the data to a table in the SQLite database
player_stats.to_sql('player_stats', conn, if_exists='replace', index=False)
future_matches.to_sql('future_matches', conn, if_exists='replace', index=False)
past_matches.to_sql('past_matches', conn, if_exists='replace', index=False)
articles.to_sql('articles', conn, if_exists='replace', index=False)
bets_Betclic.to_sql('bets_Betclic', conn, if_exists='replace', index=False)
bets_Winamax.to_sql('bets_Winamax', conn, if_exists='replace', index=False)

conn.close()

## Querying the Database

In [20]:
conn = sqlite3.connect('../Premier_League_Data.db')

# query to list all tables in the database
all_tables = pd.read_sql_query("SELECT name FROM sqlite_master WHERE type='table';", conn)
print(all_tables)

# read a specific table into a pandas DataFrame
df_player_stats = pd.read_sql_query("SELECT * FROM player_stats", conn)
df_betclic = pd.read_sql_query("SELECT * FROM bets_Betclic", conn)

print(df_player_stats)
print(df_betclic)

conn.close()

             name
0    player_stats
1  future_matches
2    past_matches
3        articles
4    bets_Betclic
5    bets_Winamax
                  name              team  age    position   country  \
0               Adrian         Liverpool   37  Goalkeeper     Spain   
1              Alisson         Liverpool   31  Goalkeeper    Brazil   
2    Caoimhín Kelleher         Liverpool   25  Goalkeeper   Ireland   
3        Fabian Mrozek         Liverpool   20  Goalkeeper    Poland   
4     Marcelo Pitaluga         Liverpool   21  Goalkeeper    Brazil   
..                 ...               ...  ...         ...       ...   
659    Daniel Jebbison  Sheffield United   20     Forward    Canada   
660    Oliver McBurnie  Sheffield United   27     Forward  Scotland   
661     Rhian Brewster  Sheffield United   23     Forward   England   
662           Ryan Oné  Sheffield United   17     Forward  Scotland   
663      William Osula  Sheffield United   20     Forward   Denmark   

     height_cm  weigh