# Data analysis of the European Football Dataset using SQL

#### by Denilson Panzo
##### November 2024

#  DataBase Schema 
![1 IF83MVgOooX5EEm-Fateww.png](attachment:2170bab1-cdb9-4f68-91bf-994f69f086fd.png)

**Note:- For visual Simplicity we have downsized the columns o few tables (match_table,team_attributes,player_table,team_attributes) and, if you're interested for the complete schema base, please drop me a note.**

In [7]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import sqlite3


# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('.'):  # Searches in the current directory
    for filename in filenames:
        if filename == 'database.sqlite':  # Look specifically for the file
            print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

./database.sqlite


In [9]:
## Map the dataset to a DB path and connect to sqlite base

path = "./database.sqlite/"
conn = sqlite3.connect(path)

In [13]:
## Read Master File
conn = sqlite3.connect(path)
master_table = pd.read_sql("""SELECT * 
                    FROM sqlite_master
                    WHERE type='table';""", conn)
print(master_table.tbl_name.to_list())

['sqlite_sequence', 'Player_Attributes', 'Player', 'Match', 'League', 'Country', 'Team', 'Team_Attributes']


### the columns of each table

In [17]:
## print all the colums of each table

sqlite_sequence_table = pd.read_sql("""SELECT *
                        FROM sqlite_sequence;""", conn)
player_attributes_table = pd.read_sql("""SELECT *
                        FROM player_attributes;""", conn)
player_table = pd.read_sql("""SELECT *
                        FROM player;""", conn)
match_table = pd.read_sql("""SELECT *
                        FROM match;""", conn)
league_table = pd.read_sql("""SELECT *
                        FROM league;""", conn)
country_table = pd.read_sql("""SELECT *
                        FROM country;""", conn)
team_table = pd.read_sql("""SELECT *
                        FROM team;""", conn)
team_attributes = pd.read_sql("""SELECT *
                        FROM team_attributes;""", conn)

print('The columns for sqlite_sequence_table_table are :',sqlite_sequence_table.columns.to_list())
print('\n')
print('The columns for player_attributes_table are :',player_attributes_table.columns.to_list())
print('\n')
print('The columns for player_table are :',player_table.columns.to_list())
print('\n')
print('The columns for match_table are :',match_table.columns.to_list())
print('\n')
print('The columns for league_table are :',league_table.columns.to_list())
print('\n')
print('The columns for country_table are :',country_table.columns.to_list())
print('\n')
print('The columns for team_table are :',team_table.columns.to_list())
print('\n')
print('The columns for team_attributes are :',team_attributes.columns.to_list())

The columns for sqlite_sequence_table_table are : ['name', 'seq']


The columns for player_attributes_table are : ['id', 'player_fifa_api_id', 'player_api_id', 'date', 'overall_rating', 'potential', 'preferred_foot', 'attacking_work_rate', 'defensive_work_rate', 'crossing', 'finishing', 'heading_accuracy', 'short_passing', 'volleys', 'dribbling', 'curve', 'free_kick_accuracy', 'long_passing', 'ball_control', 'acceleration', 'sprint_speed', 'agility', 'reactions', 'balance', 'shot_power', 'jumping', 'stamina', 'strength', 'long_shots', 'aggression', 'interceptions', 'positioning', 'vision', 'penalties', 'marking', 'standing_tackle', 'sliding_tackle', 'gk_diving', 'gk_handling', 'gk_kicking', 'gk_positioning', 'gk_reflexes']


The columns for player_table are : ['id', 'player_api_id', 'player_name', 'player_fifa_api_id', 'birthday', 'height', 'weight']


The columns for match_table are : ['id', 'country_id', 'league_id', 'season', 'stage', 'date', 'match_api_id', 'home_team_api_id', 'awa

### count of Columns of all table

In [20]:
## count of Columns of all table

print('No of columns for sqlite_sequence_table_table are :',len(sqlite_sequence_table.columns))
print('\n')
print('No of columns for player_attributes_table are :',len(player_attributes_table.columns))
print('\n')
print('No of columns for player_table are :',len(player_table.columns))
print('\n')
print('No of columns for match_table are :',len(match_table.columns))
print('\n')
print('No of columns for league_table are :',len(match_table.columns))
print('\n')
print('No of columns for country_table are :',len(country_table.columns))
print('\n')
print('No of columns for team_table are :',len(team_table.columns))
print('\n')
print('No of columns for team_attributes are :',len(team_attributes.columns))

No of columns for sqlite_sequence_table_table are : 2


No of columns for player_attributes_table are : 42


No of columns for player_table are : 7


No of columns for match_table are : 115


No of columns for league_table are : 115


No of columns for country_table are : 2


No of columns for team_table are : 5


No of columns for team_attributes are : 25


# EDA

### Query 1:- Types of leagues

In [37]:
##Query 1:- Types of leagues

sql = pd.read_sql(""" select id,name from league
group by name
""", conn)
sql.head()

Unnamed: 0,id,name
0,1,Belgium Jupiler League
1,1729,England Premier League
2,4769,France Ligue 1
3,7809,Germany 1. Bundesliga
4,10257,Italy Serie A


### Query 2:- League vs Country Name

In [41]:
##Query 2:- League vs Country Name

sql = pd.read_sql(""" 
select c.id,
l.name as leagueName,
c.name as countryName
from league l
join country c on c.id=l.id
group by c.name,l.name""", conn)
sql.head(100)

Unnamed: 0,id,leagueName,countryName
0,1,Belgium Jupiler League,Belgium
1,1729,England Premier League,England
2,4769,France Ligue 1,France
3,7809,Germany 1. Bundesliga,Germany
4,10257,Italy Serie A,Italy
5,13274,Netherlands Eredivisie,Netherlands
6,15722,Poland Ekstraklasa,Poland
7,17642,Portugal Liga ZON Sagres,Portugal
8,19694,Scotland Premier League,Scotland
9,21518,Spain LIGA BBVA,Spain


### Query 3:- Teams associated with the leagues (Country vs League vs Teams)

In [53]:
##Query 3:- Teams associated with the leagues (Country vs League vs Teams)

sql = pd.read_sql(""" 

select 
country_league_table.countryname,
league_teams_table.LeagueName,
league_teams_table.Teamintheleague from (
select
l.name as LeagueName,
t.team_long_name as TeamInTheLeague
from league l 
join match m on l.country_id=m.country_id
join team t on  m.home_team_api_id=t.team_api_id
group by l.name,t.team_long_name) league_teams_table join (select c.id,
l.name as leagueName,
c.name as countryName
from league l
join country c on c.id=l.id
group by c.name,l.name) country_league_table on league_teams_table.leaguename=country_league_table.leaguename

""", conn)
sql.head()

Unnamed: 0,countryName,LeagueName,TeamInTheLeague
0,Belgium,Belgium Jupiler League,Beerschot AC
1,Belgium,Belgium Jupiler League,Club Brugge KV
2,Belgium,Belgium Jupiler League,FCV Dender EH
3,Belgium,Belgium Jupiler League,KAA Gent
4,Belgium,Belgium Jupiler League,KAS Eupen


### Query 4:-  Player Info

In [57]:
##Query 4:-  Player Info
sql = pd.read_sql(""" 
select 
pa.player_api_id as Player_api_id,
p.player_name as Player_Name, 
strftime('%d-%m-%Y',p.birthday) as DOB,
max(p.height) as Height,
max(p.weight) as Weight,
max(pa.overall_rating) as Rating,
max(pa.potential) as Potenital,
pa.preferred_foot as Preferred_Foot,
pa.attacking_work_rate as Attacking_Work_Rate,
pa.defensive_work_rate as Defensive_Work_Rate
from player p 
join player_attributes pa on p.player_api_id=pa.player_api_id and p.player_fifa_api_id=pa.player_fifa_api_id
group by p.player_name

""", conn)
sql.head()

Unnamed: 0,Player_api_id,Player_Name,DOB,Height,Weight,Rating,Potenital,Preferred_Foot,Attacking_Work_Rate,Defensive_Work_Rate
0,505942,Aaron Appindangoye,29-02-1992,182.88,187,67,71,right,medium,medium
1,155782,Aaron Cresswell,15-12-1989,170.18,146,74,80,left,medium,medium
2,162549,Aaron Doran,13-05-1991,170.18,163,71,78,right,medium,medium
3,30572,Aaron Galindo,08-05-1982,182.88,198,75,82,right,medium,high
4,23780,Aaron Hughes,08-11-1979,182.88,154,78,81,right,medium,medium


### Query 5: Home and Away Team Info

In [61]:
##Query 5: Home Team Info
sql = pd.read_sql(""" 

select  
match_api_id,
home_team_api_id,
team_long_name,
team_short_name
from match m
join team t on t.team_api_id=m.home_team_api_id
group by match_api_id
""", conn)
sql.head()

Unnamed: 0,match_api_id,home_team_api_id,team_long_name,team_short_name
0,483129,8583,AJ Auxerre,AUX
1,483130,9827,Girondins de Bordeaux,BOR
2,483131,9746,Le Havre AC,LEH
3,483132,8682,Le Mans FC,LEM
4,483133,9748,Olympique Lyonnais,LYO


In [66]:
##Query 6:  Away Team Info
sql = pd.read_sql(""" 

select  
match_api_id,
away_team_api_id,
team_long_name,
team_short_name
from match m
join team t on t.team_api_id=m.away_team_api_id
group by match_api_id
""", conn)
sql.head()

Unnamed: 0,match_api_id,away_team_api_id,team_long_name,team_short_name
0,483129,9830,FC Nantes,NAN
1,483130,7819,SM Caen,CAE
2,483131,9831,OGC Nice,NIC
3,483132,8689,FC Lorient,LOR
4,483133,9941,Toulouse FC,TOU


### Query 6: Match By Match RAW

In [69]:
##Query 6:-  Match By Match RAW
sql = pd.read_sql(""" 
select
a.match_api_id,
a.countryname,
a.leaguename,
ht.team_long_name as Home_Team_Long_Name,
ht.team_short_name as Home_Team_Short_Name,
at.team_long_name as Away_Team_Long_Name,
at.team_short_name as Away_Team_Short_Name,
a.season,
strftime('%d-%m-%Y',a.date) as Date,
a.stage,
a.home_team_goal,
a.away_team_goal,
case 
when a.home_team_goal - a.away_team_goal > 0 then ht.team_short_name
when a.home_team_goal - a.away_team_goal < 0 then at.team_short_name else 'Tie' end as Match_Winner
from (
select 
match_api_id,
country_team_league_table.countryname,
country_team_league_table.leaguename,
home_team_goal,
away_team_goal,
season,
date,
stage
from match m 
join team t on t.team_api_id=m.home_team_api_id or t.team_api_id=m.away_team_api_id
join (select 
country_league_table.countryname,
league_teams_table.LeagueName,
league_teams_table.Teamintheleague from (
select
l.name as LeagueName,
t.team_long_name as TeamInTheLeague
from league l 
join match m on l.country_id=m.country_id
join team t on  m.home_team_api_id=t.team_api_id
group by l.name,t.team_long_name) league_teams_table join (select c.id,
l.name as leagueName,
c.name as countryName
from league l
join country c on c.id=l.id
group by c.name,l.name) country_league_table on league_teams_table.leaguename=country_league_table.leaguename) country_team_league_table on t.team_long_name=country_team_league_table.teamintheleague
group by match_api_id ) a join (select  
match_api_id,
home_team_api_id,
team_long_name,
team_short_name
from match m
join team t on t.team_api_id=m.home_team_api_id
group by match_api_id) ht on a.match_api_id=ht.match_api_id 
join (select  
match_api_id,
away_team_api_id,
team_long_name,
team_short_name
from match m
join team t on t.team_api_id=m.away_team_api_id
group by match_api_id) at on a.match_api_id=at.match_api_id

""", conn)
sql.head()

Unnamed: 0,match_api_id,countryname,leaguename,Home_Team_Long_Name,Home_Team_Short_Name,Away_Team_Long_Name,Away_Team_Short_Name,season,Date,stage,home_team_goal,away_team_goal,Match_Winner
0,483129,France,France Ligue 1,AJ Auxerre,AUX,FC Nantes,NAN,2008/2009,09-08-2008,1,2,1,AUX
1,483130,France,France Ligue 1,Girondins de Bordeaux,BOR,SM Caen,CAE,2008/2009,09-08-2008,1,2,1,BOR
2,483131,France,France Ligue 1,Le Havre AC,LEH,OGC Nice,NIC,2008/2009,09-08-2008,1,1,0,LEH
3,483132,France,France Ligue 1,Le Mans FC,LEM,FC Lorient,LOR,2008/2009,09-08-2008,1,0,1,LOR
4,483133,France,France Ligue 1,Olympique Lyonnais,LYO,Toulouse FC,TOU,2008/2009,10-08-2008,1,3,0,LYO


### Query 7:Goals scored 

In [73]:
##Query 7:Goals scored by home team vs away team Season on Season
sql = pd.read_sql(""" 

select
at_table.season,
at_table.leaguename,
ht_table.HT_goals as Home_Team_Goals,
at_table.AT_goals as Away_Team_Goals
from (
select 
b.season,
b.leaguename,
sum(home_team_goal) as HT_goals
from (
select
a.match_api_id,
a.countryname,
a.leaguename,
ht.team_long_name as Home_Team_Long_Name,
ht.team_short_name as Home_Team_Short_Name,
at.team_long_name as Away_Team_Long_Name,
at.team_short_name as Away_Team_Short_Name,
a.season,
strftime('%d-%m-%Y',a.date) as Date,
a.stage,
a.home_team_goal,
a.away_team_goal,
case 
when a.home_team_goal - a.away_team_goal > 0 then ht.team_short_name
when a.home_team_goal - a.away_team_goal < 0 then at.team_short_name else 'Tie' end as Match_Winner
from (
select 
match_api_id,
country_team_league_table.countryname,
country_team_league_table.leaguename,
home_team_goal,
away_team_goal,
season,
date,
stage
from match m 
join team t on t.team_api_id=m.home_team_api_id or t.team_api_id=m.away_team_api_id
join (select 
country_league_table.countryname,
league_teams_table.LeagueName,
league_teams_table.Teamintheleague from (
select
l.name as LeagueName,
t.team_long_name as TeamInTheLeague
from league l 
join match m on l.country_id=m.country_id
join team t on  m.home_team_api_id=t.team_api_id
group by l.name,t.team_long_name) league_teams_table join (select c.id,
l.name as leagueName,
c.name as countryName
from league l
join country c on c.id=l.id
group by c.name,l.name) country_league_table on league_teams_table.leaguename=country_league_table.leaguename) country_team_league_table on t.team_long_name=country_team_league_table.teamintheleague
group by match_api_id ) a join (select  
match_api_id,
home_team_api_id,
team_long_name,
team_short_name
from match m
join team t on t.team_api_id=m.home_team_api_id
group by match_api_id) ht on a.match_api_id=ht.match_api_id 
join (select  
match_api_id,
away_team_api_id,
team_long_name,
team_short_name
from match m
join team t on t.team_api_id=m.away_team_api_id
group by match_api_id) at on a.match_api_id=at.match_api_id) b

group by b.season,b.leaguename ) ht_table join 
(select 
b.season,
b.leaguename,
sum(away_team_goal) as AT_goals
from (
select
a.match_api_id,
a.countryname,
a.leaguename,
ht.team_long_name as Home_Team_Long_Name,
ht.team_short_name as Home_Team_Short_Name,
at.team_long_name as Away_Team_Long_Name,
at.team_short_name as Away_Team_Short_Name,
a.season,
strftime('%d-%m-%Y',a.date) as Date,
a.stage,
a.home_team_goal,
a.away_team_goal,
case 
when a.home_team_goal - a.away_team_goal > 0 then ht.team_short_name
when a.home_team_goal - a.away_team_goal < 0 then at.team_short_name else 'Tie' end as Match_Winner
from (
select 
match_api_id,
country_team_league_table.countryname,
country_team_league_table.leaguename,
home_team_goal,
away_team_goal,
season,
date,
stage
from match m 
join team t on t.team_api_id=m.home_team_api_id or t.team_api_id=m.away_team_api_id
join (select 
country_league_table.countryname,
league_teams_table.LeagueName,
league_teams_table.Teamintheleague from (
select
l.name as LeagueName,
t.team_long_name as TeamInTheLeague
from league l 
join match m on l.country_id=m.country_id
join team t on  m.home_team_api_id=t.team_api_id
group by l.name,t.team_long_name) league_teams_table join (select c.id,
l.name as leagueName,
c.name as countryName
from league l
join country c on c.id=l.id
group by c.name,l.name) country_league_table on league_teams_table.leaguename=country_league_table.leaguename) country_team_league_table on t.team_long_name=country_team_league_table.teamintheleague
group by match_api_id ) a join (select  
match_api_id,
home_team_api_id,
team_long_name,
team_short_name
from match m
join team t on t.team_api_id=m.home_team_api_id
group by match_api_id) ht on a.match_api_id=ht.match_api_id 
join (select  
match_api_id,
away_team_api_id,
team_long_name,
team_short_name
from match m
join team t on t.team_api_id=m.away_team_api_id
group by match_api_id) at on a.match_api_id=at.match_api_id) b

group by b.season,b.leaguename) at_Table on ht_table.leaguename=at_table.leaguename and ht_table.season=at_table.season
""", conn)
sql.head()

Unnamed: 0,season,leaguename,Home_Team_Goals,Away_Team_Goals
0,2008/2009,Belgium Jupiler League,499,356
1,2008/2009,England Premier League,532,410
2,2008/2009,France Ligue 1,489,369
3,2008/2009,Germany 1. Bundesliga,520,374
4,2008/2009,Italy Serie A,578,410


### Query 8: Total Games played by team

In [79]:
##Query 8:-Total Games played by team
sql = pd.read_sql(""" 
select 
ht.season as Season,
ht.team as Team ,
ht.home_games_played,
at.away_games_played,
ht.home_games_played + at.away_games_played as Total_games_played_for_the_season
from (
select b.season as Season,b.home_team_long_name as Team ,count(b.match_api_id) as home_games_played from (
select
a.match_api_id,
a.countryname,
a.leaguename,
ht.team_long_name as Home_Team_Long_Name,
ht.team_short_name as Home_Team_Short_Name,
at.team_long_name as Away_Team_Long_Name,
at.team_short_name as Away_Team_Short_Name,
a.season,
strftime('%d-%m-%Y',a.date) as Date,
a.stage,
a.home_team_goal,
a.away_team_goal,
case 
when a.home_team_goal - a.away_team_goal > 0 then ht.team_short_name
when a.home_team_goal - a.away_team_goal < 0 then at.team_short_name else 'Tie' end as Match_Winner
from (
select 
match_api_id,
country_team_league_table.countryname,
country_team_league_table.leaguename,
home_team_goal,
away_team_goal,
season,
date,
stage
from match m 
join team t on t.team_api_id=m.home_team_api_id or t.team_api_id=m.away_team_api_id
join (select 
country_league_table.countryname,
league_teams_table.LeagueName,
league_teams_table.Teamintheleague from (
select
l.name as LeagueName,
t.team_long_name as TeamInTheLeague
from league l 
join match m on l.country_id=m.country_id
join team t on  m.home_team_api_id=t.team_api_id
group by l.name,t.team_long_name) league_teams_table join (select c.id,
l.name as leagueName,
c.name as countryName
from league l
join country c on c.id=l.id
group by c.name,l.name) country_league_table on league_teams_table.leaguename=country_league_table.leaguename) country_team_league_table on t.team_long_name=country_team_league_table.teamintheleague
group by match_api_id ) a join (select  
match_api_id,
home_team_api_id,
team_long_name,
team_short_name
from match m
join team t on t.team_api_id=m.home_team_api_id
group by match_api_id) ht on a.match_api_id=ht.match_api_id 
join (select  
match_api_id,
away_team_api_id,
team_long_name,
team_short_name
from match m
join team t on t.team_api_id=m.away_team_api_id
group by match_api_id) at on a.match_api_id=at.match_api_id ) b
group by b.season,b.home_team_long_name ) ht
join (select b.season as Season,b.away_team_long_name as team,count(b.match_api_id) as away_games_played from (
select
a.match_api_id,
a.countryname,
a.leaguename,
ht.team_long_name as Home_Team_Long_Name,
ht.team_short_name as Home_Team_Short_Name,
at.team_long_name as Away_Team_Long_Name,
at.team_short_name as Away_Team_Short_Name,
a.season,
strftime('%d-%m-%Y',a.date) as Date,
a.stage,
a.home_team_goal,
a.away_team_goal,
case 
when a.home_team_goal - a.away_team_goal > 0 then ht.team_short_name
when a.home_team_goal - a.away_team_goal < 0 then at.team_short_name else 'Tie' end as Match_Winner
from (
select 
match_api_id,
country_team_league_table.countryname,
country_team_league_table.leaguename,
home_team_goal,
away_team_goal,
season,
date,
stage
from match m 
join team t on t.team_api_id=m.home_team_api_id or t.team_api_id=m.away_team_api_id
join (select 
country_league_table.countryname,
league_teams_table.LeagueName,
league_teams_table.Teamintheleague from (
select
l.name as LeagueName,
t.team_long_name as TeamInTheLeague
from league l 
join match m on l.country_id=m.country_id
join team t on  m.home_team_api_id=t.team_api_id
group by l.name,t.team_long_name) league_teams_table join (select c.id,
l.name as leagueName,
c.name as countryName
from league l
join country c on c.id=l.id
group by c.name,l.name) country_league_table on league_teams_table.leaguename=country_league_table.leaguename) country_team_league_table on t.team_long_name=country_team_league_table.teamintheleague
group by match_api_id ) a join (select  
match_api_id,
home_team_api_id,
team_long_name,
team_short_name
from match m
join team t on t.team_api_id=m.home_team_api_id
group by match_api_id) ht on a.match_api_id=ht.match_api_id 
join (select  
match_api_id,
away_team_api_id,
team_long_name,
team_short_name
from match m
join team t on t.team_api_id=m.away_team_api_id
group by match_api_id) at on a.match_api_id=at.match_api_id ) b
group by b.season,b.home_team_long_name) at on ht.season=at.season and ht.team=at.team
""", conn)
sql.head()

Unnamed: 0,Season,Team,home_games_played,away_games_played,Total_games_played_for_the_season
0,2008/2009,1. FC Köln,17,17,34
1,2008/2009,AC Bellinzona,18,18,36
2,2008/2009,ADO Den Haag,17,17,34
3,2008/2009,AJ Auxerre,19,19,38
4,2008/2009,AS Monaco,19,19,38


### Query 9: Matches won by Teams

In [82]:
##Query 9:-  Matches won by Teams Season on Season
sql = pd.read_sql(""" 
select 
a.season,a.match_winner,count(a.match_winner) as Number_of_Wins
from (
select
a.match_api_id,
a.countryname,
a.leaguename,
ht.team_long_name as Home_Team_Long_Name,
ht.team_short_name as Home_Team_Short_Name,
at.team_long_name as Away_Team_Long_Name,
at.team_short_name as Away_Team_Short_Name,
a.season,
strftime('%d-%m-%Y',a.date) as Date,
a.stage,
a.home_team_goal,
a.away_team_goal,
case 
when a.home_team_goal - a.away_team_goal > 0 then ht.team_long_name
when a.home_team_goal - a.away_team_goal < 0 then at.team_long_name else 'Tie' end as Match_Winner
from (
select 
match_api_id,
country_team_league_table.countryname,
country_team_league_table.leaguename,
home_team_goal,
away_team_goal,
season,
date,
stage
from match m 
join team t on t.team_api_id=m.home_team_api_id or t.team_api_id=m.away_team_api_id
join (select 
country_league_table.countryname,
league_teams_table.LeagueName,
league_teams_table.Teamintheleague from (
select
l.name as LeagueName,
t.team_long_name as TeamInTheLeague
from league l 
join match m on l.country_id=m.country_id
join team t on  m.home_team_api_id=t.team_api_id
group by l.name,t.team_long_name) league_teams_table join (select c.id,
l.name as leagueName,
c.name as countryName
from league l
join country c on c.id=l.id
group by c.name,l.name) country_league_table on league_teams_table.leaguename=country_league_table.leaguename) country_team_league_table on t.team_long_name=country_team_league_table.teamintheleague
group by match_api_id ) a join (select  
match_api_id,
home_team_api_id,
team_long_name,
team_short_name
from match m
join team t on t.team_api_id=m.home_team_api_id
group by match_api_id) ht on a.match_api_id=ht.match_api_id 
join (select  
match_api_id,
away_team_api_id,
team_long_name,
team_short_name
from match m
join team t on t.team_api_id=m.away_team_api_id
group by match_api_id) at on a.match_api_id=at.match_api_id) a
group by a.season,a.match_winner

""", conn)
sql.head()

Unnamed: 0,season,Match_Winner,Number_of_Wins
0,2008/2009,1. FC Köln,11
1,2008/2009,AC Bellinzona,11
2,2008/2009,ADO Den Haag,8
3,2008/2009,AJ Auxerre,16
4,2008/2009,AS Monaco,11


### Query 10:-  Most Successful Teams

In [85]:
##Query 11:-  Most Successful Teams compared for all seasons

sql = pd.read_sql(""" 
select * from (
select 
a.team,
sum(a.total_games),
sum(a.wins),
100*round(round(a.wins,2)/round(a.total_games,2),4) as Win_Percentage from
(select 
total_matches.season,
total_matches.team,
Total_games_played_for_the_season as total_games,
win_matches.number_of_wins as wins
from (
select 
ht.season as Season,
ht.team as Team ,
ht.home_games_played,
at.away_games_played,
ht.home_games_played + at.away_games_played as Total_games_played_for_the_season
from (
select b.season as Season,b.home_team_long_name as Team ,count(b.match_api_id) as home_games_played from (
select
a.match_api_id,
a.countryname,
a.leaguename,
ht.team_long_name as Home_Team_Long_Name,
ht.team_short_name as Home_Team_Short_Name,
at.team_long_name as Away_Team_Long_Name,
at.team_short_name as Away_Team_Short_Name,
a.season,
strftime('%d-%m-%Y',a.date) as Date,
a.stage,
a.home_team_goal,
a.away_team_goal,
case 
when a.home_team_goal - a.away_team_goal > 0 then ht.team_short_name
when a.home_team_goal - a.away_team_goal < 0 then at.team_short_name else 'Tie' end as Match_Winner
from (
select 
match_api_id,
country_team_league_table.countryname,
country_team_league_table.leaguename,
home_team_goal,
away_team_goal,
season,
date,
stage
from match m 
join team t on t.team_api_id=m.home_team_api_id or t.team_api_id=m.away_team_api_id
join (select 
country_league_table.countryname,
league_teams_table.LeagueName,
league_teams_table.Teamintheleague from (
select
l.name as LeagueName,
t.team_long_name as TeamInTheLeague
from league l 
join match m on l.country_id=m.country_id
join team t on  m.home_team_api_id=t.team_api_id
group by l.name,t.team_long_name) league_teams_table join (select c.id,
l.name as leagueName,
c.name as countryName
from league l
join country c on c.id=l.id
group by c.name,l.name) country_league_table on league_teams_table.leaguename=country_league_table.leaguename) country_team_league_table on t.team_long_name=country_team_league_table.teamintheleague
group by match_api_id ) a join (select  
match_api_id,
home_team_api_id,
team_long_name,
team_short_name
from match m
join team t on t.team_api_id=m.home_team_api_id
group by match_api_id) ht on a.match_api_id=ht.match_api_id 
join (select  
match_api_id,
away_team_api_id,
team_long_name,
team_short_name
from match m
join team t on t.team_api_id=m.away_team_api_id
group by match_api_id) at on a.match_api_id=at.match_api_id ) b
group by b.season,b.home_team_long_name ) ht
join (select b.season as Season,b.away_team_long_name as team,count(b.match_api_id) as away_games_played from (
select
a.match_api_id,
a.countryname,
a.leaguename,
ht.team_long_name as Home_Team_Long_Name,
ht.team_short_name as Home_Team_Short_Name,
at.team_long_name as Away_Team_Long_Name,
at.team_short_name as Away_Team_Short_Name,
a.season,
strftime('%d-%m-%Y',a.date) as Date,
a.stage,
a.home_team_goal,
a.away_team_goal,
case 
when a.home_team_goal - a.away_team_goal > 0 then ht.team_short_name
when a.home_team_goal - a.away_team_goal < 0 then at.team_short_name else 'Tie' end as Match_Winner
from (
select 
match_api_id,
country_team_league_table.countryname,
country_team_league_table.leaguename,
home_team_goal,
away_team_goal,
season,
date,
stage
from match m 
join team t on t.team_api_id=m.home_team_api_id or t.team_api_id=m.away_team_api_id
join (select 
country_league_table.countryname,
league_teams_table.LeagueName,
league_teams_table.Teamintheleague from (
select
l.name as LeagueName,
t.team_long_name as TeamInTheLeague
from league l 
join match m on l.country_id=m.country_id
join team t on  m.home_team_api_id=t.team_api_id
group by l.name,t.team_long_name) league_teams_table join (select c.id,
l.name as leagueName,
c.name as countryName
from league l
join country c on c.id=l.id
group by c.name,l.name) country_league_table on league_teams_table.leaguename=country_league_table.leaguename) country_team_league_table on t.team_long_name=country_team_league_table.teamintheleague
group by match_api_id ) a join (select  
match_api_id,
home_team_api_id,
team_long_name,
team_short_name
from match m
join team t on t.team_api_id=m.home_team_api_id
group by match_api_id) ht on a.match_api_id=ht.match_api_id 
join (select  
match_api_id,
away_team_api_id,
team_long_name,
team_short_name
from match m
join team t on t.team_api_id=m.away_team_api_id
group by match_api_id) at on a.match_api_id=at.match_api_id ) b
group by b.season,b.home_team_long_name) at on ht.season=at.season and ht.team=at.team ) total_matches join 
(select 
a.season,a.match_winner,count(a.match_winner) as Number_of_Wins
from (
select
a.match_api_id,
a.countryname,
a.leaguename,
ht.team_long_name as Home_Team_Long_Name,
ht.team_short_name as Home_Team_Short_Name,
at.team_long_name as Away_Team_Long_Name,
at.team_short_name as Away_Team_Short_Name,
a.season,
strftime('%d-%m-%Y',a.date) as Date,
a.stage,
a.home_team_goal,
a.away_team_goal,
case 
when a.home_team_goal - a.away_team_goal > 0 then ht.team_long_name
when a.home_team_goal - a.away_team_goal < 0 then at.team_long_name else 'Tie' end as Match_Winner
from (
select 
match_api_id,
country_team_league_table.countryname,
country_team_league_table.leaguename,
home_team_goal,
away_team_goal,
season,
date,
stage
from match m 
join team t on t.team_api_id=m.home_team_api_id or t.team_api_id=m.away_team_api_id
join (select 
country_league_table.countryname,
league_teams_table.LeagueName,
league_teams_table.Teamintheleague from (
select
l.name as LeagueName,
t.team_long_name as TeamInTheLeague
from league l 
join match m on l.country_id=m.country_id
join team t on  m.home_team_api_id=t.team_api_id
group by l.name,t.team_long_name) league_teams_table join (select c.id,
l.name as leagueName,
c.name as countryName
from league l
join country c on c.id=l.id
group by c.name,l.name) country_league_table on league_teams_table.leaguename=country_league_table.leaguename) country_team_league_table on t.team_long_name=country_team_league_table.teamintheleague
group by match_api_id ) a join (select  
match_api_id,
home_team_api_id,
team_long_name,
team_short_name
from match m
join team t on t.team_api_id=m.home_team_api_id
group by match_api_id) ht on a.match_api_id=ht.match_api_id 
join (select  
match_api_id,
away_team_api_id,
team_long_name,
team_short_name
from match m
join team t on t.team_api_id=m.away_team_api_id
group by match_api_id) at on a.match_api_id=at.match_api_id) a
group by a.season,a.match_winner) win_matches on total_matches.team=win_matches.match_winner) a
group by a.team)b
group by b.team
order by b.win_percentage desc
""", conn)
sql.head()

Unnamed: 0,team,sum(a.total_games),sum(a.wins),Win_Percentage
0,Manchester United,2432,1536,73.68
1,AZ,2176,1120,73.53
2,FC Barcelona,2432,1872,71.05
3,Standard de Liège,1484,742,70.59
4,RSC Anderlecht,1484,952,70.59


In [116]:
# Correct SQL query
sql = """
SELECT 
    pa.player_fifa_api_id,
    p.player_name,
    MAX(pa.overall_rating) AS max_overall_rating,
    AVG(pa.overall_rating) AS avg_overall_rating,
    pa.potential,
    pa.preferred_foot,
    AVG(pa.attacking_work_rate) AS avg_attacking_work_rate,
    AVG(pa.defensive_work_rate) AS avg_defensive_work_rate,
    AVG(pa.dribbling) AS avg_dribbling,
    AVG(pa.ball_control) AS avg_ball_control,
    AVG(pa.short_passing) AS avg_short_passing,
    AVG(pa.sprint_speed) AS avg_sprint_speed,
    AVG(pa.reactions) AS avg_reactions,
    AVG(pa.stamina) AS avg_stamina
FROM 
    player_attributes pa
JOIN 
    player p
ON 
    pa.player_fifa_api_id = p.player_fifa_api_id
GROUP BY 
    pa.player_fifa_api_id, p.player_name, pa.potential, pa.preferred_foot
ORDER BY 
    max_overall_rating DESC
LIMIT 1;
"""

# Execute the query and fetch the result
best_player = pd.read_sql(sql, conn)

# Display the result
best_player.head()

Unnamed: 0,player_fifa_api_id,player_name,max_overall_rating,avg_overall_rating,potential,preferred_foot,avg_attacking_work_rate,avg_defensive_work_rate,avg_dribbling,avg_ball_control,avg_short_passing,avg_sprint_speed,avg_reactions,avg_stamina
0,158023,Lionel Messi,94,90.6,94,left,0.0,0.0,96.0,95.2,88.0,91.0,90.0,75.8


# MESSI is the GOAT 😁