In [167]:
import sqlite3 as sql
import pandas as pd

In [167]:
conn = sql.connect("data/database.sqlite")

In [167]:
matches = pd.read_sql("""SELECT Match.id, 
                                        Country.name AS country_name, 
                                        League.name AS league_name, 
                                        season, 
                                        stage, 
                                        date,
                                        HT.team_long_name AS  home_team,
                                        AT.team_long_name AS away_team,
                                        home_team_goal, 
                                        away_team_goal
                                FROM Match
                                JOIN Country on Country.id = Match.country_id
                                JOIN League on League.id = Match.league_id
                                LEFT JOIN Team AS HT on HT.team_api_id = Match.home_team_api_id
                                LEFT JOIN Team AS AT on AT.team_api_id = Match.away_team_api_id
                                ORDER by date;""", conn)

# Some easy logic to see who won

matches[['home_team_win', 'away_team_win', 'draw']] = pd.get_dummies(matches.apply(lambda row: 0 if row.home_team_goal > row.away_team_goal else 1 if row.home_team_goal < row.away_team_goal else 2, axis=1))
matches['home_team_points'] = matches.apply(lambda row: 3 if row.home_team_win else 0 if row.away_team_win else 1, axis=1)
matches['away_team_points'] = matches.apply(lambda row: 0 if row.home_team_win else 3 if row.away_team_win else 1, axis=1)

# Melt so we can retrieve stats by team

matches_melt = matches.merge(matches.melt(value_vars=['home_team', 'away_team'], var_name='home_away', value_name='team', ignore_index=False), left_index=True, right_index=True)
matches_melt['other_team'] = matches_melt.apply(lambda row: row.away_team if row.home_away == 'home_team' else row.home_team, axis=1)
matches_melt[['win', 'loss']] = pd.get_dummies(matches_melt.apply(lambda row: 0 if ((row.home_away == 'home_team') & row.home_team_win) | ((row.home_away == 'away_team') & row.away_team_win) else None if row.draw else 1, axis=1))
matches_melt['points'] = matches_melt.apply(lambda row: row.home_team_points if row.home_away == 'home_team' else row.away_team_points, axis=1)
matches_melt['goal'] = matches_melt.apply(lambda row: row.home_team_goal if row.home_away == 'home_team' else row.away_team_goal, axis=1)

# Teams by season (add 'home_away' to the groupby's to get at home/away stats)

teams_by_season = matches_melt.groupby(by=['country_name', 'league_name', 'season', 'team', ]).points.agg(['sum', 'count']).rename(columns={'sum': 'total_points', 'count': 'n_games'})
teams_by_season['perc_points'] = teams_by_season.total_points / (3 * teams_by_season.n_games)
teams_by_season[['wins', 'losses', 'draws']] = matches_melt.groupby(by=['country_name', 'league_name', 'season', 'team']).agg({'win': 'sum', 'loss': 'sum', 'draw': 'sum'})
teams_by_season[['perc_wins', 'perc_losses', 'perc_draws']] = teams_by_season[['wins', 'losses', 'draws']].div(teams_by_season.n_games, axis=0)

# Matchups

matchups = matches_melt.groupby(by=['country_name', 'league_name', 'team', 'other_team']).agg({'win': ['count', 'sum'], 'loss': 'sum', 'draw': 'sum'})
matchups.columns = ['n_games', 'wins', 'losses', 'draws']
matchups[['perc_wins', 'perc_losses', 'perc_draws']] = matchups[['wins', 'losses', 'draws']].div(matchups.n_games, axis=0)

In [168]:
display(teams_by_season)

display(matchups)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,total_points,n_games,perc_points,wins,losses,draws,perc_wins,perc_losses,perc_draws
country_name,league_name,season,team,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Belgium,Belgium Jupiler League,2008/2009,Beerschot AC,42,34,0.411765,11,14,9,0.323529,0.411765,0.264706
Belgium,Belgium Jupiler League,2008/2009,Club Brugge KV,59,34,0.578431,18,11,5,0.529412,0.323529,0.147059
Belgium,Belgium Jupiler League,2008/2009,FCV Dender EH,35,34,0.343137,9,17,8,0.264706,0.500000,0.235294
Belgium,Belgium Jupiler League,2008/2009,KAA Gent,59,34,0.578431,17,9,8,0.500000,0.264706,0.235294
Belgium,Belgium Jupiler League,2008/2009,KRC Genk,50,34,0.490196,15,14,5,0.441176,0.411765,0.147059
...,...,...,...,...,...,...,...,...,...,...,...,...
Switzerland,Switzerland Super League,2015/2016,FC Thun,41,36,0.379630,10,15,11,0.277778,0.416667,0.305556
Switzerland,Switzerland Super League,2015/2016,FC Vaduz,36,36,0.333333,7,14,15,0.194444,0.388889,0.416667
Switzerland,Switzerland Super League,2015/2016,FC Zürich,34,36,0.314815,7,16,13,0.194444,0.444444,0.361111
Switzerland,Switzerland Super League,2015/2016,Grasshopper Club Zürich,53,36,0.490741,15,13,8,0.416667,0.361111,0.222222


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,n_games,wins,losses,draws,perc_wins,perc_losses,perc_draws
country_name,league_name,team,other_team,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Belgium,Belgium Jupiler League,Beerschot AC,Club Brugge KV,10,3,6,1,0.300,0.600,0.10
Belgium,Belgium Jupiler League,Beerschot AC,FCV Dender EH,2,1,0,1,0.500,0.000,0.50
Belgium,Belgium Jupiler League,Beerschot AC,KAA Gent,10,3,2,5,0.300,0.200,0.50
Belgium,Belgium Jupiler League,Beerschot AC,KAS Eupen,2,1,0,1,0.500,0.000,0.50
Belgium,Belgium Jupiler League,Beerschot AC,KRC Genk,10,3,5,2,0.300,0.500,0.20
...,...,...,...,...,...,...,...,...,...,...
Switzerland,Switzerland Super League,Servette FC,FC St. Gallen,4,0,3,1,0.000,0.750,0.25
Switzerland,Switzerland Super League,Servette FC,FC Thun,8,1,5,2,0.125,0.625,0.25
Switzerland,Switzerland Super League,Servette FC,FC Zürich,8,3,3,2,0.375,0.375,0.25
Switzerland,Switzerland Super League,Servette FC,Grasshopper Club Zürich,8,4,4,0,0.500,0.500,0.00


### Old tables with ideas for other statistics

In [143]:
season_stats = pd.read_sql("""SELECT Country.name AS country_name, 
                                        League.name AS league_name, 
                                        season,
                                        count(distinct stage) AS number_of_stages,
                                        count(distinct HT.team_long_name) AS number_of_teams,
                                        avg(home_team_goal) AS avg_home_team_scors, 
                                        avg(away_team_goal) AS avg_away_team_goals, 
                                        avg(home_team_goal-away_team_goal) AS avg_goal_dif, 
                                        avg(home_team_goal+away_team_goal) AS avg_goals, 
                                        sum(home_team_goal+away_team_goal) AS total_goals                                       
                                FROM Match
                                JOIN Country on Country.id = Match.country_id
                                JOIN League on League.id = Match.league_id
                                LEFT JOIN Team AS HT on HT.team_api_id = Match.home_team_api_id
                                LEFT JOIN Team AS AT on AT.team_api_id = Match.away_team_api_id
                                GROUP BY Country.name, League.name, season
                                HAVING count(distinct stage) > 10
                                ORDER BY Country.name, League.name, season DESC
                                ;""", conn)

matchups = pd.read_sql("""SELECT Country.name AS country_name, 
                                        League.name AS league_name, 
                                        HT.team_long_name AS  home_team,
                                        AT.team_long_name AS away_team,
                                        COUNT(*) AS games,
                                        COUNT(DISTINCT season) AS seasons,
                                        SUM(home_team_goal) AS home_team_goals, 
                                        SUM(away_team_goal) AS away_team_goal,
                                        SUM(CASE WHEN home_team_goal > away_team_goal THEN 1 ELSE 0 END) AS home_wins,
                                        SUM(CASE WHEN home_team_goal = away_team_goal THEN 1 ELSE 0 END) AS draws,
                                        SUM(CASE WHEN home_team_goal < away_team_goal THEN 1 ELSE 0 END) AS laway_wins
                                FROM Match
                                JOIN Country on Country.id = Match.country_id
                                JOIN League on League.id = Match.league_id
                                LEFT JOIN Team AS HT on HT.team_api_id = Match.home_team_api_id
                                LEFT JOIN Team AS AT on AT.team_api_id = Match.away_team_api_id
                                GROUP BY
                                    home_team,
                                    away_team;""", conn)

In [145]:
matchups[matchups.home_team == "Real Madrid CF"].sort_values(by="seasons", ascending=False).head(10)

Unnamed: 0,country_name,league_name,home_team,away_team,games,seasons,home_team_goals,away_team_goal,home_wins,draws,laway_wins
4852,Spain,Spain LIGA BBVA,Real Madrid CF,Athletic Club de Bilbao,8,8,34,9,8,0,0
4859,Spain,Spain LIGA BBVA,Real Madrid CF,FC Barcelona,8,8,12,22,2,1,5
4881,Spain,Spain LIGA BBVA,Real Madrid CF,Valencia CF,8,8,13,7,4,4,0
4878,Spain,Spain LIGA BBVA,Real Madrid CF,Sevilla FC,8,8,27,11,7,0,1
4853,Spain,Spain LIGA BBVA,Real Madrid CF,Atlético Madrid,8,8,13,8,4,1,3
4864,Spain,Spain LIGA BBVA,Real Madrid CF,Málaga CF,8,8,25,7,6,2,0
4860,Spain,Spain LIGA BBVA,Real Madrid CF,Getafe CF,8,8,32,9,8,0,0
4868,Spain,Spain LIGA BBVA,Real Madrid CF,RCD Espanyol,8,8,27,5,6,2,0
4882,Spain,Spain LIGA BBVA,Real Madrid CF,Villarreal CF,7,7,22,7,6,1,0
4863,Spain,Spain LIGA BBVA,Real Madrid CF,Levante UD,6,6,19,3,6,0,0
