In [48]:
import sqlite3
import pandas as pd
from datetime import datetime

In [2]:
# Connection à la db, crée le fichier s'il n'existe pas déjà
conn = sqlite3.connect('db_foot.sqlite3') 
c = conn.cursor()

In [3]:
# Requètes de creation des différentes tables
championships = '''CREATE TABLE IF NOT EXISTS championships (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    name VARCHAR,
    country VARCHAR,
    start_year INTEGER,
    end_year INTEGER,
    created_at TIMESTAMP,
    updated_at TIMESTAMP)'''
teams = '''CREATE TABLE IF NOT EXISTS teams (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    championship_id INTEGER,
    name VARCHARD,
    city VARCHAR,
    coach_name VARCHAR,
    rank INTEGER,
    created_at TIMESTAMP,
    updated_at TIMESTAMP,
    FOREIGN KEY(championship_id) REFERENCES championships(id))'''
players = '''CREATE TABLE IF NOT EXISTS players (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    full_name VARCHAR,
    birthdate DATE,
    team_id INTEGER,
    position VARCHAR,
    nationality VARCHAR,
    created_at TIMESTAMP,
    updated_at TIMESTAMP,
    FOREIGN KEY(team_id) REFERENCES teams(id))'''
goals = '''CREATE TABLE IF NOT EXISTS goals (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    match_id INTEGER,
    player_id INTEGER,
    goal_type VARCHAR,
    created_at TIMESTAMP,
    updated_at TIMESTAMP,
    FOREIGN KEY (player_id) REFERENCES players(id),
    FOREIGN KEY (match_id) REFERENCES matchs(id))'''
matchs = '''CREATE TABLE IF NOT EXISTS matchs(
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    date DATIME,
    place VARCHAR,
    rainfall FLOAT,
    temperature FLOAT,
    created_at TIMESTAMP,
    updated_at TIMESTAMP)'''
teams_matches = '''CREATE TABLE IF NOT EXISTS teams_matches (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    match_id INTEGER,
    team_id INTEGER,
    home BOOLEAN,
    team_goals INTEGER,
    points INTEGER,
    created_at TIMESTAMP,
    updated_at TIMESTAMP,
    FOREIGN KEY (team_id) REFERENCES teams(id)
    FOREIGN KEY (match_id) REFERENCES matchs(id))'''


In [4]:
# Fonctions de création et de suppression de table
def create_table(query):
    '''Create table from query specified'''
    c.execute(query)
    conn.commit()

def drop_table(table_name):
    '''Drop table specified'''
    c.execute('''DROP TABLE %s''' % (table_name,))
    conn.commit()

In [5]:
# Fonction d'insertion 
def add_championship(name, country, start_year, end_year):
    '''Add championship in table championships with values specified'''
    c.execute('''INSERT INTO championships (name, country, start_year, end_year, created_at, updated_at) VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)''', (name,country,start_year,end_year))
    conn.commit()

In [6]:
# Version pandas
def insert_table(my_df, my_table):
  '''Insert Dataframe into table specified'''
  my_df.to_sql(my_table, conn, if_exists='append', index=False)

def read_db(my_query):
  '''Read sql query, read the db an return response in a dataframe'''
  df = pd.read_sql(my_query, conn)
  return df

In [7]:
# Sandbox
# drop_table('players')
# create_table(championships)
# create_table(matchs)
# create_table(teams)
# create_table(players)
# create_table(goals)
# create_table(teams_matches)

#add_championship('Ligue 1', 'France', 2020, 2021)

In [8]:
# Lis le contenu de la table
df_championships = read_db('''SELECT * FROM championships''')
df_championships

Unnamed: 0,id,name,country,start_year,end_year,created_at,updated_at
0,1,Ligue 1,France,2020,2021,2021-06-28 12:56:07,2021-06-28 12:56:07


In [9]:
# Recup id et nom dans un dict
championships_dict = {}
for championship in range(len(df_championships)):
    championship_dict = {df_championships.iloc[championship,1] : df_championships.iloc[championship, 0]}
    championships_dict.update(championship_dict)
championships_dict

{'Ligue 1': 1}

In [10]:
df_matchs = pd.read_csv('match_table.csv')
df_matchs.drop(['Unnamed: 0'], axis=1, inplace=True)

In [11]:
df_matchs

Unnamed: 0,date,place,rainfall,temperature,created_at,updated_at
0,2020-08-21 19:00:00+02:00,Bordeaux,5.000000,24.000000,2021-06-30 12:01:37,2021-06-30 12:06:46
1,2020-08-22 17:00:00+02:00,Dijon,19.000000,26.000000,2021-06-30 12:01:37,2021-06-30 12:06:46
2,2020-08-22 21:00:00+02:00,Lille,1.000000,21.000000,2021-06-30 12:01:37,2021-06-30 12:06:46
3,2020-08-23 13:00:00+02:00,Monaco,1.000000,27.000000,2021-06-30 12:01:37,2021-06-30 12:06:46
4,2020-08-23 15:00:00+02:00,Lorient,1.000000,19.000000,2021-06-30 12:01:37,2021-06-30 12:06:46
...,...,...,...,...,...,...
375,2021-05-23 21:00:00+02:00,Rennes,2.000000,15.000000,2021-06-30 12:01:37,2021-06-30 12:06:46
376,2021-05-23 21:00:00+02:00,Nantes,1.000000,16.000000,2021-06-30 12:01:37,2021-06-30 12:06:46
377,2021-05-23 21:00:00+02:00,Saint-Étienne,4.277397,13.279778,2021-06-30 12:01:37,2021-06-30 12:06:46
378,2021-05-23 21:00:00+02:00,Reims,1.000000,14.000000,2021-06-30 12:01:37,2021-06-30 12:06:46


In [12]:
#insert_table(df_matchs, 'matchs')

read_db('''SELECT * FROM matchs''')

Unnamed: 0,id,date,place,rainfall,temperature,created_at,updated_at
0,1,2020-08-21 19:00:00+02:00,Bordeaux,5.000000,24.000000,2021-06-30 12:01:37,2021-06-30 12:06:46
1,2,2020-08-22 17:00:00+02:00,Dijon,19.000000,26.000000,2021-06-30 12:01:37,2021-06-30 12:06:46
2,3,2020-08-22 21:00:00+02:00,Lille,1.000000,21.000000,2021-06-30 12:01:37,2021-06-30 12:06:46
3,4,2020-08-23 13:00:00+02:00,Monaco,1.000000,27.000000,2021-06-30 12:01:37,2021-06-30 12:06:46
4,5,2020-08-23 15:00:00+02:00,Lorient,1.000000,19.000000,2021-06-30 12:01:37,2021-06-30 12:06:46
...,...,...,...,...,...,...,...
375,376,2021-05-23 21:00:00+02:00,Rennes,2.000000,15.000000,2021-06-30 12:01:37,2021-06-30 12:06:46
376,377,2021-05-23 21:00:00+02:00,Nantes,1.000000,16.000000,2021-06-30 12:01:37,2021-06-30 12:06:46
377,378,2021-05-23 21:00:00+02:00,Saint-Étienne,4.277397,13.279778,2021-06-30 12:01:37,2021-06-30 12:06:46
378,379,2021-05-23 21:00:00+02:00,Reims,1.000000,14.000000,2021-06-30 12:01:37,2021-06-30 12:06:46


In [13]:
df_teams = pd.read_csv('table_teams.csv', index_col=0)
df_teams

Unnamed: 0,championship_id,name,city,coach_name,rank,created_at,updated_at
0,Ligue 1,Angers Sporting Club de l'Ouest,Angers,Gérald BATICLE,44,2021-07-01 11:24:36.283421,2021-07-01 11:24:36.285113
1,Ligue 1,Football Club des Girondins de Bordeaux,Bordeaux,Jean-Louis GASSET,45,2021-07-01 11:24:36.283421,2021-07-01 11:24:36.285113
2,Ligue 1,Stade Brestois 29,Brest,Michel DER ZAKARIAN,41,2021-07-01 11:24:36.283421,2021-07-01 11:24:36.285113
3,Ligue 1,Dijon Football Côte-d'Or,Dijon,David LINARES,21,2021-07-01 11:24:36.283421,2021-07-01 11:24:36.285113
4,Ligue 1,Racing Club de Lens,Lens,Franck HAISE,57,2021-07-01 11:24:36.283421,2021-07-01 11:24:36.285113
5,Ligue 1,LOSC Lille,Lille,Christophe GALTIER,83,2021-07-01 11:24:36.283421,2021-07-01 11:24:36.285113
6,Ligue 1,Football Club Lorient-Bretagne Sud,Lorient,Christophe PELISSIER,42,2021-07-01 11:24:36.283421,2021-07-01 11:24:36.285113
7,Ligue 1,Olympique Lyonnais,Lyon,Peter BOSZ,76,2021-07-01 11:24:36.283421,2021-07-01 11:24:36.285113
8,Ligue 1,Olympique de Marseille,Marseille,Jorge SAMPAOLI,60,2021-07-01 11:24:36.283421,2021-07-01 11:24:36.285113
9,Ligue 1,Football Club de Metz,Metz,Frédéric ANTONETTI,47,2021-07-01 11:24:36.283421,2021-07-01 11:24:36.285113


In [14]:
#map colonne avec dictionnaire des ids
df_teams['championship_id'] = df_teams['championship_id'].map(championships_dict)
df_teams

Unnamed: 0,championship_id,name,city,coach_name,rank,created_at,updated_at
0,1,Angers Sporting Club de l'Ouest,Angers,Gérald BATICLE,44,2021-07-01 11:24:36.283421,2021-07-01 11:24:36.285113
1,1,Football Club des Girondins de Bordeaux,Bordeaux,Jean-Louis GASSET,45,2021-07-01 11:24:36.283421,2021-07-01 11:24:36.285113
2,1,Stade Brestois 29,Brest,Michel DER ZAKARIAN,41,2021-07-01 11:24:36.283421,2021-07-01 11:24:36.285113
3,1,Dijon Football Côte-d'Or,Dijon,David LINARES,21,2021-07-01 11:24:36.283421,2021-07-01 11:24:36.285113
4,1,Racing Club de Lens,Lens,Franck HAISE,57,2021-07-01 11:24:36.283421,2021-07-01 11:24:36.285113
5,1,LOSC Lille,Lille,Christophe GALTIER,83,2021-07-01 11:24:36.283421,2021-07-01 11:24:36.285113
6,1,Football Club Lorient-Bretagne Sud,Lorient,Christophe PELISSIER,42,2021-07-01 11:24:36.283421,2021-07-01 11:24:36.285113
7,1,Olympique Lyonnais,Lyon,Peter BOSZ,76,2021-07-01 11:24:36.283421,2021-07-01 11:24:36.285113
8,1,Olympique de Marseille,Marseille,Jorge SAMPAOLI,60,2021-07-01 11:24:36.283421,2021-07-01 11:24:36.285113
9,1,Football Club de Metz,Metz,Frédéric ANTONETTI,47,2021-07-01 11:24:36.283421,2021-07-01 11:24:36.285113


In [15]:
#insert_table(df_teams, 'teams')

df_teams_table = read_db('''SELECT * FROM teams''')
df_teams_table

Unnamed: 0,id,championship_id,name,city,coach_name,rank,created_at,updated_at
0,1,1,Angers Sporting Club de l'Ouest,Angers,Gérald BATICLE,44,2021-07-01 11:24:36.283421,2021-07-01 11:24:36.285113
1,2,1,Football Club des Girondins de Bordeaux,Bordeaux,Jean-Louis GASSET,45,2021-07-01 11:24:36.283421,2021-07-01 11:24:36.285113
2,3,1,Stade Brestois 29,Brest,Michel DER ZAKARIAN,41,2021-07-01 11:24:36.283421,2021-07-01 11:24:36.285113
3,4,1,Dijon Football Côte-d'Or,Dijon,David LINARES,21,2021-07-01 11:24:36.283421,2021-07-01 11:24:36.285113
4,5,1,Racing Club de Lens,Lens,Franck HAISE,57,2021-07-01 11:24:36.283421,2021-07-01 11:24:36.285113
5,6,1,LOSC Lille,Lille,Christophe GALTIER,83,2021-07-01 11:24:36.283421,2021-07-01 11:24:36.285113
6,7,1,Football Club Lorient-Bretagne Sud,Lorient,Christophe PELISSIER,42,2021-07-01 11:24:36.283421,2021-07-01 11:24:36.285113
7,8,1,Olympique Lyonnais,Lyon,Peter BOSZ,76,2021-07-01 11:24:36.283421,2021-07-01 11:24:36.285113
8,9,1,Olympique de Marseille,Marseille,Jorge SAMPAOLI,60,2021-07-01 11:24:36.283421,2021-07-01 11:24:36.285113
9,10,1,Football Club de Metz,Metz,Frédéric ANTONETTI,47,2021-07-01 11:24:36.283421,2021-07-01 11:24:36.285113


In [16]:
# Recup id et nom dans un dict
teams_dict = {}
for team in range(len(df_teams_table)):
    team_dict = {
        df_teams_table.iloc[team, 2]: df_teams_table.iloc[team, 0]}
    teams_dict.update(team_dict)
teams_dict


{"Angers Sporting Club de l'Ouest": 1,
 'Football Club des Girondins de Bordeaux': 2,
 'Stade Brestois 29': 3,
 "Dijon Football Côte-d'Or": 4,
 'Racing Club de Lens': 5,
 'LOSC Lille': 6,
 'Football Club Lorient-Bretagne Sud': 7,
 'Olympique Lyonnais': 8,
 'Olympique de Marseille': 9,
 'Football Club de Metz': 10,
 'Association sportive de Monaco FC': 11,
 'Montpellier-Herault Sport Club': 12,
 'Football Club de Nantes': 13,
 "Olympique Gymnaste Club Nice Côte d'Azur": 14,
 'Nîmes Olympique': 15,
 'Paris-Saint-Germain Football Club': 16,
 'Stade de Reims': 17,
 'Stade Rennais Football Club': 18,
 'Association Sportive de St-Etienne': 19,
 'Racing Club de Strasbourg Alsace': 20}

In [17]:
df_players = pd.read_csv('table_players.csv', index_col=0)
df_players

Unnamed: 0,full_name,birthdate,team_id,position,nationality,created_at,updated_at
0,P. Bernardoni,18/04/97,Angers Sporting Club de l'Ouest,Gar.,FRA,2021-07-01 15:52:08.103040,2021-07-01 15:52:10.274645
1,L. Butelle,3/04/83,Angers Sporting Club de l'Ouest,Gar.,FRA,2021-07-01 15:52:08.103040,2021-07-01 15:52:10.274645
2,D. Petkovic,25/05/93,Angers Sporting Club de l'Ouest,Gar.,MNE,2021-07-01 15:52:08.103040,2021-07-01 15:52:10.274645
3,A. Bamba,25/04/90,Angers Sporting Club de l'Ouest,Déf.,CIV,2021-07-01 15:52:08.103040,2021-07-01 15:52:10.274645
4,K. Boma,20/11/02,Angers Sporting Club de l'Ouest,Déf.,FRA,2021-07-01 15:52:08.103040,2021-07-01 15:52:10.274645
...,...,...,...,...,...,...,...
642,L. Mothiba,28/01/96,Racing Club de Strasbourg Alsace,Att.,AFS,2021-07-01 15:52:08.103040,2021-07-01 15:52:10.274645
643,I. Saadi,8/02/92,Racing Club de Strasbourg Alsace,Att.,ALG,2021-07-01 15:52:08.103040,2021-07-01 15:52:10.274645
644,M. Sahi,20/12/01,Racing Club de Strasbourg Alsace,Att.,MLI,2021-07-01 15:52:08.103040,2021-07-01 15:52:10.274645
645,A. Waris,19/09/91,Racing Club de Strasbourg Alsace,Att.,GHA,2021-07-01 15:52:08.103040,2021-07-01 15:52:10.274645


In [18]:
df_players['team_id'] = df_players['team_id'].map(teams_dict)
df_players

Unnamed: 0,full_name,birthdate,team_id,position,nationality,created_at,updated_at
0,P. Bernardoni,18/04/97,1,Gar.,FRA,2021-07-01 15:52:08.103040,2021-07-01 15:52:10.274645
1,L. Butelle,3/04/83,1,Gar.,FRA,2021-07-01 15:52:08.103040,2021-07-01 15:52:10.274645
2,D. Petkovic,25/05/93,1,Gar.,MNE,2021-07-01 15:52:08.103040,2021-07-01 15:52:10.274645
3,A. Bamba,25/04/90,1,Déf.,CIV,2021-07-01 15:52:08.103040,2021-07-01 15:52:10.274645
4,K. Boma,20/11/02,1,Déf.,FRA,2021-07-01 15:52:08.103040,2021-07-01 15:52:10.274645
...,...,...,...,...,...,...,...
642,L. Mothiba,28/01/96,20,Att.,AFS,2021-07-01 15:52:08.103040,2021-07-01 15:52:10.274645
643,I. Saadi,8/02/92,20,Att.,ALG,2021-07-01 15:52:08.103040,2021-07-01 15:52:10.274645
644,M. Sahi,20/12/01,20,Att.,MLI,2021-07-01 15:52:08.103040,2021-07-01 15:52:10.274645
645,A. Waris,19/09/91,20,Att.,GHA,2021-07-01 15:52:08.103040,2021-07-01 15:52:10.274645


In [19]:
# insert_table(df_players, 'players')

df_players_table = read_db('''SELECT * FROM players''')
df_players_table.head(40)


Unnamed: 0,id,full_name,birthdate,team_id,position,nationality,created_at,updated_at
0,1,P. Bernardoni,18/04/97,1,Gar.,FRA,2021-07-01 15:52:08.103040,2021-07-01 15:52:10.274645
1,2,L. Butelle,3/04/83,1,Gar.,FRA,2021-07-01 15:52:08.103040,2021-07-01 15:52:10.274645
2,3,D. Petkovic,25/05/93,1,Gar.,MNE,2021-07-01 15:52:08.103040,2021-07-01 15:52:10.274645
3,4,A. Bamba,25/04/90,1,Déf.,CIV,2021-07-01 15:52:08.103040,2021-07-01 15:52:10.274645
4,5,K. Boma,20/11/02,1,Déf.,FRA,2021-07-01 15:52:08.103040,2021-07-01 15:52:10.274645
5,6,E. Diaw,31/12/94,1,Déf.,SEN,2021-07-01 15:52:08.103040,2021-07-01 15:52:10.274645
6,7,S. Doumbia,24/09/96,1,Déf.,CIV,2021-07-01 15:52:08.103040,2021-07-01 15:52:10.274645
7,8,I. Dramé,,1,Déf.,FRA,2021-07-01 15:52:08.103040,2021-07-01 15:52:10.274645
8,9,E. Ebosse,11/03/99,1,Déf.,FRA,2021-07-01 15:52:08.103040,2021-07-01 15:52:10.274645
9,10,O. Falouh,12/07/99,1,Déf.,MAR,2021-07-01 15:52:08.103040,2021-07-01 15:52:10.274645


In [28]:
df_matchs_table = read_db('SELECT * FROM matchs')
df_matchs

Unnamed: 0,id,date,place,rainfall,temperature,created_at,updated_at,match
0,1,2020-08-21 19:00:00+02:00,Bordeaux,5.000000,24.000000,2021-06-30 12:01:37,2021-06-30 12:06:46,2020-08-21 19:00:00+02:00 Bordeaux
1,2,2020-08-22 17:00:00+02:00,Dijon,19.000000,26.000000,2021-06-30 12:01:37,2021-06-30 12:06:46,2020-08-22 17:00:00+02:00 Dijon
2,3,2020-08-22 21:00:00+02:00,Lille,1.000000,21.000000,2021-06-30 12:01:37,2021-06-30 12:06:46,2020-08-22 21:00:00+02:00 Lille
3,4,2020-08-23 13:00:00+02:00,Monaco,1.000000,27.000000,2021-06-30 12:01:37,2021-06-30 12:06:46,2020-08-23 13:00:00+02:00 Monaco
4,5,2020-08-23 15:00:00+02:00,Lorient,1.000000,19.000000,2021-06-30 12:01:37,2021-06-30 12:06:46,2020-08-23 15:00:00+02:00 Lorient
...,...,...,...,...,...,...,...,...
375,376,2021-05-23 21:00:00+02:00,Rennes,2.000000,15.000000,2021-06-30 12:01:37,2021-06-30 12:06:46,2021-05-23 21:00:00+02:00 Rennes
376,377,2021-05-23 21:00:00+02:00,Nantes,1.000000,16.000000,2021-06-30 12:01:37,2021-06-30 12:06:46,2021-05-23 21:00:00+02:00 Nantes
377,378,2021-05-23 21:00:00+02:00,Saint-Étienne,4.277397,13.279778,2021-06-30 12:01:37,2021-06-30 12:06:46,2021-05-23 21:00:00+02:00 Saint-Étienne
378,379,2021-05-23 21:00:00+02:00,Reims,1.000000,14.000000,2021-06-30 12:01:37,2021-06-30 12:06:46,2021-05-23 21:00:00+02:00 Reims


In [30]:
df_matchs_table['match'] = df_matchs_table['date'].astype(str) + ' ' + df_matchs_table['place']
df_matchs_table

Unnamed: 0,id,date,place,rainfall,temperature,created_at,updated_at,match
0,1,2020-08-21 19:00:00+02:00,Bordeaux,5.000000,24.000000,2021-06-30 12:01:37,2021-06-30 12:06:46,2020-08-21 19:00:00+02:00 Bordeaux
1,2,2020-08-22 17:00:00+02:00,Dijon,19.000000,26.000000,2021-06-30 12:01:37,2021-06-30 12:06:46,2020-08-22 17:00:00+02:00 Dijon
2,3,2020-08-22 21:00:00+02:00,Lille,1.000000,21.000000,2021-06-30 12:01:37,2021-06-30 12:06:46,2020-08-22 21:00:00+02:00 Lille
3,4,2020-08-23 13:00:00+02:00,Monaco,1.000000,27.000000,2021-06-30 12:01:37,2021-06-30 12:06:46,2020-08-23 13:00:00+02:00 Monaco
4,5,2020-08-23 15:00:00+02:00,Lorient,1.000000,19.000000,2021-06-30 12:01:37,2021-06-30 12:06:46,2020-08-23 15:00:00+02:00 Lorient
...,...,...,...,...,...,...,...,...
375,376,2021-05-23 21:00:00+02:00,Rennes,2.000000,15.000000,2021-06-30 12:01:37,2021-06-30 12:06:46,2021-05-23 21:00:00+02:00 Rennes
376,377,2021-05-23 21:00:00+02:00,Nantes,1.000000,16.000000,2021-06-30 12:01:37,2021-06-30 12:06:46,2021-05-23 21:00:00+02:00 Nantes
377,378,2021-05-23 21:00:00+02:00,Saint-Étienne,4.277397,13.279778,2021-06-30 12:01:37,2021-06-30 12:06:46,2021-05-23 21:00:00+02:00 Saint-Étienne
378,379,2021-05-23 21:00:00+02:00,Reims,1.000000,14.000000,2021-06-30 12:01:37,2021-06-30 12:06:46,2021-05-23 21:00:00+02:00 Reims


In [34]:
matchs_dict = {}
for match in range(len(df_matchs_table)):
    match_dict = {
        df_matchs_table.iloc[match, 7]: df_matchs_table.iloc[match, 0]}
    matchs_dict.update(match_dict)
matchs_dict

{'2020-08-21 19:00:00+02:00 Bordeaux': 1,
 '2020-08-22 17:00:00+02:00 Dijon': 2,
 '2020-08-22 21:00:00+02:00 Lille': 3,
 '2020-08-23 13:00:00+02:00 Monaco': 4,
 '2020-08-23 15:00:00+02:00 Lorient': 5,
 '2020-08-23 15:00:00+02:00 Nîmes': 6,
 '2020-08-23 17:00:00+02:00 Nice': 7,
 '2020-09-15 21:00:00+02:00 Montpellier': 8,
 '2020-09-16 21:00:00+02:00 Paris': 9,
 '2020-09-17 21:00:00+02:00 Marseille': 10,
 '2020-08-28 21:00:00+02:00 Lyon': 11,
 '2020-08-29 17:00:00+02:00 Rennes': 12,
 '2020-08-29 21:00:00+02:00 Strasbourg': 13,
 '2020-08-30 13:00:00+02:00 Reims': 14,
 '2020-08-30 15:00:00+02:00 Nantes': 15,
 '2020-08-30 15:00:00+02:00 Saint-Étienne': 16,
 '2020-08-30 15:00:00+02:00 Angers': 17,
 '2020-08-30 15:00:00+02:00 Metz': 18,
 '2020-08-30 21:00:00+02:00 Brest': 19,
 '2020-09-10 21:00:00+02:00 Lens': 20,
 '2020-09-11 21:00:00+02:00 Bordeaux': 21,
 '2020-09-12 17:00:00+02:00 Montpellier': 22,
 '2020-09-12 21:00:00+02:00 Saint-Étienne': 23,
 '2020-09-13 13:00:00+02:00 Lille': 24,
 '20

In [35]:
teams_matchs_dict = {}
for team in range(len(df_teams_table)):
    team_dict = {
        df_teams_table.iloc[team, 3]: df_teams_table.iloc[team, 0]}
    teams_matchs_dict.update(team_dict)
teams_matchs_dict

{'Angers': 1,
 'Bordeaux': 2,
 'Brest': 3,
 'Dijon': 4,
 'Lens': 5,
 'Lille': 6,
 'Lorient': 7,
 'Lyon': 8,
 'Marseille': 9,
 'Metz': 10,
 'Monaco': 11,
 'Montpellier': 12,
 'Nantes': 13,
 'Nice': 14,
 'Nîmes': 15,
 'Paris': 16,
 'Reims': 17,
 'Rennes': 18,
 'Saint-Étienne': 19,
 'Strasbourg': 20}

In [38]:
df_teams_matches = pd.read_csv('teams_matches_table.csv', index_col=0)
df_teams_matches

Unnamed: 0,match_id,team_id,home,team_goals,points,created_at,updated_at
0,2020-08-21 19:00:00+02:00 Bordeaux,Bordeaux,True,0,1,2021-07-02 11:07:34.093920,2021-07-02 11:07:34.571565
1,2020-08-22 17:00:00+02:00 Dijon,Dijon,True,0,0,2021-07-02 11:07:34.093920,2021-07-02 11:07:34.571565
2,2020-08-22 21:00:00+02:00 Lille,Lille,True,1,1,2021-07-02 11:07:34.093920,2021-07-02 11:07:34.571565
3,2020-08-23 13:00:00+02:00 Monaco,Monaco,True,2,1,2021-07-02 11:07:34.093920,2021-07-02 11:07:34.571565
4,2020-08-23 15:00:00+02:00 Lorient,Lorient,True,3,3,2021-07-02 11:07:34.093920,2021-07-02 11:07:34.571565
...,...,...,...,...,...,...,...
755,2021-05-23 21:00:00+02:00 Rennes,Nîmes,False,0,0,2021-07-02 11:07:34.093920,2021-07-02 11:07:34.571565
756,2021-05-23 21:00:00+02:00 Nantes,Montpellier,False,2,3,2021-07-02 11:07:34.093920,2021-07-02 11:07:34.571565
757,2021-05-23 21:00:00+02:00 Saint-Étienne,Dijon,False,1,3,2021-07-02 11:07:34.093920,2021-07-02 11:07:34.571565
758,2021-05-23 21:00:00+02:00 Reims,Bordeaux,False,2,3,2021-07-02 11:07:34.093920,2021-07-02 11:07:34.571565


In [39]:
df_teams_matches['match_id'] = df_teams_matches['match_id'].map(matchs_dict)
df_teams_matches['team_id'] = df_teams_matches['team_id'].map(teams_matchs_dict)
df_teams_matches

Unnamed: 0,match_id,team_id,home,team_goals,points,created_at,updated_at
0,1,2,True,0,1,2021-07-02 11:07:34.093920,2021-07-02 11:07:34.571565
1,2,4,True,0,0,2021-07-02 11:07:34.093920,2021-07-02 11:07:34.571565
2,3,6,True,1,1,2021-07-02 11:07:34.093920,2021-07-02 11:07:34.571565
3,4,11,True,2,1,2021-07-02 11:07:34.093920,2021-07-02 11:07:34.571565
4,5,7,True,3,3,2021-07-02 11:07:34.093920,2021-07-02 11:07:34.571565
...,...,...,...,...,...,...,...
755,376,15,False,0,0,2021-07-02 11:07:34.093920,2021-07-02 11:07:34.571565
756,377,12,False,2,3,2021-07-02 11:07:34.093920,2021-07-02 11:07:34.571565
757,378,4,False,1,3,2021-07-02 11:07:34.093920,2021-07-02 11:07:34.571565
758,379,2,False,2,3,2021-07-02 11:07:34.093920,2021-07-02 11:07:34.571565


In [40]:
#insert_table(df_teams_matches, 'teams_matches')

read_db('SELECT * FROM teams_matches')


Unnamed: 0,id,match_id,team_id,home,team_goals,points,created_at,updated_at
0,1,1,2,1,0,1,2021-07-02 11:07:34.093920,2021-07-02 11:07:34.571565
1,2,2,4,1,0,0,2021-07-02 11:07:34.093920,2021-07-02 11:07:34.571565
2,3,3,6,1,1,1,2021-07-02 11:07:34.093920,2021-07-02 11:07:34.571565
3,4,4,11,1,2,1,2021-07-02 11:07:34.093920,2021-07-02 11:07:34.571565
4,5,5,7,1,3,3,2021-07-02 11:07:34.093920,2021-07-02 11:07:34.571565
...,...,...,...,...,...,...,...,...
755,756,376,15,0,0,0,2021-07-02 11:07:34.093920,2021-07-02 11:07:34.571565
756,757,377,12,0,2,3,2021-07-02 11:07:34.093920,2021-07-02 11:07:34.571565
757,758,378,4,0,1,3,2021-07-02 11:07:34.093920,2021-07-02 11:07:34.571565
758,759,379,2,0,2,3,2021-07-02 11:07:34.093920,2021-07-02 11:07:34.571565


In [61]:
df_matchs_table_bis = read_db('SELECT * FROM matchs')
df_matchs_table_bis['date'] = pd.to_datetime(
    df_matchs_table_bis['date'], format="%Y-%m-%d %H:%M:%S%z").apply(lambda x: x.date())

df_matchs_table_bis['match'] = df_matchs_table_bis['date'].astype(
    str) + ' ' + df_matchs_table_bis['place']
df_matchs_table_bis


Unnamed: 0,id,date,place,rainfall,temperature,created_at,updated_at,match
0,1,2020-08-21,Bordeaux,5.000000,24.000000,2021-06-30 12:01:37,2021-06-30 12:06:46,2020-08-21 Bordeaux
1,2,2020-08-22,Dijon,19.000000,26.000000,2021-06-30 12:01:37,2021-06-30 12:06:46,2020-08-22 Dijon
2,3,2020-08-22,Lille,1.000000,21.000000,2021-06-30 12:01:37,2021-06-30 12:06:46,2020-08-22 Lille
3,4,2020-08-23,Monaco,1.000000,27.000000,2021-06-30 12:01:37,2021-06-30 12:06:46,2020-08-23 Monaco
4,5,2020-08-23,Lorient,1.000000,19.000000,2021-06-30 12:01:37,2021-06-30 12:06:46,2020-08-23 Lorient
...,...,...,...,...,...,...,...,...
375,376,2021-05-23,Rennes,2.000000,15.000000,2021-06-30 12:01:37,2021-06-30 12:06:46,2021-05-23 Rennes
376,377,2021-05-23,Nantes,1.000000,16.000000,2021-06-30 12:01:37,2021-06-30 12:06:46,2021-05-23 Nantes
377,378,2021-05-23,Saint-Étienne,4.277397,13.279778,2021-06-30 12:01:37,2021-06-30 12:06:46,2021-05-23 Saint-Étienne
378,379,2021-05-23,Reims,1.000000,14.000000,2021-06-30 12:01:37,2021-06-30 12:06:46,2021-05-23 Reims


In [62]:
matchs_goals_dict = {}
for match in range(len(df_matchs_table_bis)):
    match_dict = {
        df_matchs_table_bis.iloc[match, 7]: df_matchs_table_bis.iloc[match, 0]}
    matchs_goals_dict.update(match_dict)
matchs_goals_dict


{'2020-08-21 Bordeaux': 1,
 '2020-08-22 Dijon': 2,
 '2020-08-22 Lille': 3,
 '2020-08-23 Monaco': 4,
 '2020-08-23 Lorient': 5,
 '2020-08-23 Nîmes': 6,
 '2020-08-23 Nice': 7,
 '2020-09-15 Montpellier': 8,
 '2020-09-16 Paris': 9,
 '2020-09-17 Marseille': 10,
 '2020-08-28 Lyon': 11,
 '2020-08-29 Rennes': 12,
 '2020-08-29 Strasbourg': 13,
 '2020-08-30 Reims': 14,
 '2020-08-30 Nantes': 15,
 '2020-08-30 Saint-Étienne': 16,
 '2020-08-30 Angers': 17,
 '2020-08-30 Metz': 18,
 '2020-08-30 Brest': 19,
 '2020-09-10 Lens': 20,
 '2020-09-11 Bordeaux': 21,
 '2020-09-12 Montpellier': 22,
 '2020-09-12 Saint-Étienne': 23,
 '2020-09-13 Lille': 24,
 '2020-09-13 Nîmes': 25,
 '2020-09-13 Lorient': 26,
 '2020-09-13 Angers': 27,
 '2020-09-13 Dijon': 28,
 '2020-09-13 Monaco': 29,
 '2020-09-13 Paris': 30,
 '2020-09-18 Lyon': 31,
 '2020-09-19 Lens': 32,
 '2020-09-19 Rennes': 33,
 '2020-09-20 Nice': 34,
 '2020-09-20 Montpellier': 35,
 '2020-09-20 Brest': 36,
 '2020-09-20 Metz': 37,
 '2020-09-20 Strasbourg': 38,
 '

In [65]:
df_players_table_bis = read_db('SELECT players.id, players.full_name, teams.city FROM players JOIN teams ON players.team_id = teams.id')
df_players_table_bis


Unnamed: 0,id,full_name,city
0,1,P. Bernardoni,Angers
1,2,L. Butelle,Angers
2,3,D. Petkovic,Angers
3,4,A. Bamba,Angers
4,5,K. Boma,Angers
...,...,...,...
642,643,L. Mothiba,Strasbourg
643,644,I. Saadi,Strasbourg
644,645,M. Sahi,Strasbourg
645,646,A. Waris,Strasbourg


In [68]:
df_players_table_bis['players'] = df_players_table_bis['full_name'] + ' ' + df_players_table_bis['city']
df_players_table_bis


Unnamed: 0,id,full_name,city,players
0,1,P. Bernardoni,Angers,P. Bernardoni Angers
1,2,L. Butelle,Angers,L. Butelle Angers
2,3,D. Petkovic,Angers,D. Petkovic Angers
3,4,A. Bamba,Angers,A. Bamba Angers
4,5,K. Boma,Angers,K. Boma Angers
...,...,...,...,...
642,643,L. Mothiba,Strasbourg,L. Mothiba Strasbourg
643,644,I. Saadi,Strasbourg,I. Saadi Strasbourg
644,645,M. Sahi,Strasbourg,M. Sahi Strasbourg
645,646,A. Waris,Strasbourg,A. Waris Strasbourg


In [69]:
players_goals_dict = {}
for player in range(len(df_players_table_bis)):
    player_dict = {
        df_players_table_bis.iloc[player, 3]: df_players_table_bis.iloc[player, 0]}
    players_goals_dict.update(player_dict)
players_goals_dict


{'P. Bernardoni Angers': 1,
 'L. Butelle Angers': 2,
 'D. Petkovic Angers': 3,
 'A. Bamba Angers': 4,
 'K. Boma Angers': 5,
 'E. Diaw Angers': 6,
 'S. Doumbia Angers': 7,
 'I. Dramé Angers': 8,
 'E. Ebosse Angers': 9,
 'O. Falouh Angers': 10,
 'V. Manceau Angers': 11,
 'M. Pavlovic Angers': 12,
 'R. Thomas Angers': 13,
 'I. Traoré Angers': 14,
 'I. Amadou Angers': 15,
 'K. Bemanga Angers': 16,
 'A. Bobichon Angers': 17,
 'S. Boufal Angers': 18,
 'J. Cabot Angers': 19,
 'P. Capelle Angers': 20,
 'M. Cho Angers': 21,
 'L. Coulibaly Angers': 22,
 'A. Fulgini Angers': 23,
 'T. Mangani Angers': 24,
 'K. Mouanga Angers': 25,
 'Z. Ould Khaled Angers': 26,
 'M. Pereira Lage Angers': 27,
 'W. Taïbi Angers': 28,
 'R. Alioui Angers': 29,
 'S. Bahoken Angers': 30,
 'L. Diony Angers': 31,
 'F. El-Melali Angers': 32,
 'N. Fatar Angers': 33,
 'Y. Fortuné Angers': 34,
 'S. Thioub Angers': 35,
 'B. Costil Bordeaux': 36,
 'D. Lima Semedo Bordeaux': 37,
 'C. Michel Bordeaux': 38,
 'G. Poussin Bordeaux': 

In [90]:
df_goals = pd.read_csv('goals_table_final.csv', index_col=0)
df_goals.head(40)

Unnamed: 0,match_id,player_id,goal_type,created_at,updated_at
0,2020-08-22 Dijon,I. Traoré Angers,normal,2021-07-02 19:37:47.834072,2021-07-02 19:37:48.190047
1,2020-08-22 Lille,J. Bamba Lille,normal,2021-07-02 19:37:47.834072,2021-07-02 19:37:48.190047
2,2020-08-22 Lille,D. Da Silva Rennes,normal,2021-07-02 19:37:47.834072,2021-07-02 19:37:48.190047
3,2020-08-23 Monaco,A. Disasi Monaco,normal,2021-07-02 19:37:47.834072,2021-07-02 19:37:48.190047
4,2020-08-23 Monaco,B. Badiashile Monaco,normal,2021-07-02 19:37:47.834072,2021-07-02 19:37:48.190047
5,2020-08-23 Monaco,B. Dia Reims,normal,2021-07-02 19:37:47.834072,2021-07-02 19:37:48.190047
6,2020-08-23 Monaco,E. Touré Reims,normal,2021-07-02 19:37:47.834072,2021-07-02 19:37:48.190047
7,2020-08-23 Lorient,Y. Wissa Lorient,normal,2021-07-02 19:37:47.834072,2021-07-02 19:37:48.190047
8,2020-08-23 Lorient,A. Grbic Lorient,penalty,2021-07-02 19:37:47.834072,2021-07-02 19:37:48.190047
9,2020-08-23 Lorient,P. Hamel Lorient,normal,2021-07-02 19:37:47.834072,2021-07-02 19:37:48.190047


In [91]:
df_goals['match_id'] = df_goals['match_id'].map(matchs_goals_dict)
df_goals['player_id'] = df_goals['player_id'].map(players_goals_dict)
df_goals.head(40)

Unnamed: 0,match_id,player_id,goal_type,created_at,updated_at
0,2.0,14.0,normal,2021-07-02 19:37:47.834072,2021-07-02 19:37:48.190047
1,3.0,176.0,normal,2021-07-02 19:37:47.834072,2021-07-02 19:37:48.190047
2,3.0,549.0,normal,2021-07-02 19:37:47.834072,2021-07-02 19:37:48.190047
3,4.0,322.0,normal,2021-07-02 19:37:47.834072,2021-07-02 19:37:48.190047
4,4.0,320.0,normal,2021-07-02 19:37:47.834072,2021-07-02 19:37:48.190047
5,4.0,539.0,normal,2021-07-02 19:37:47.834072,2021-07-02 19:37:48.190047
6,4.0,544.0,normal,2021-07-02 19:37:47.834072,2021-07-02 19:37:48.190047
7,5.0,224.0,normal,2021-07-02 19:37:47.834072,2021-07-02 19:37:48.190047
8,5.0,220.0,penalty,2021-07-02 19:37:47.834072,2021-07-02 19:37:48.190047
9,5.0,221.0,normal,2021-07-02 19:37:47.834072,2021-07-02 19:37:48.190047


In [84]:
pd.set_option('display.max_rows', None)
df_goals['player_id'].isna()


0       False
1       False
2       False
3       False
4       False
5       False
6       False
7       False
8       False
9       False
10      False
11       True
12      False
13       True
14      False
15      False
16      False
17      False
18      False
19      False
20      False
21      False
22      False
23      False
24      False
25       True
26      False
27      False
28       True
29       True
30      False
31      False
32      False
33      False
34      False
35      False
36      False
37      False
38      False
39      False
40       True
41      False
42      False
43      False
44      False
45      False
46      False
47      False
48      False
49      False
50      False
51      False
52      False
53      False
54      False
55      False
56      False
57      False
58      False
59      False
60      False
61      False
62      False
63      False
64      False
65      False
66      False
67      False
68      False
69      False
70      False
71    