# Laliga data set Analysis 
The laliga dataset contains the data from 1995 to 2020 seasons and we will analyze different seasons and draw some conclusion through visualizations

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
plt.rcParams['figure.figsize'] = [18, 5]
pd.set_option('display.max_rows', 42)
pd.options.mode.chained_assignment = None  # default='warn'

In [None]:
laliga = pd.read_csv('dataset/laliga.csv')

In [None]:
laliga

In [None]:
laliga.info()

## Analysis of the 95-96 season

In [None]:
season_95 = laliga[laliga['Season'] == '1995-96']

##### How many teams were in the season 1995-96

In [None]:
season_95['HomeTeam'].unique().size

#### Who won the league?

In [None]:
# record will be stored for each team's away and home scored goals in a tuple form 
team_records = {}

In [None]:
for value,row in season_95.iterrows():
    home = row['HomeTeam']
    away = row['AwayTeam']
    if row['FTR'] == 'H':
        if home in team_records:
            team_records[home] += 3
        else:
            team_records[home] = 3
    elif row['FTR'] == 'A':
        if away in team_records:
            team_records[away] += 3
        else:
            team_records[away] = 3
    else:
        if away in team_records:
            team_records[away] += 1
        else:
            team_records[away] = 1
        
        if home in team_records:
            team_records[home] += 1
        else:
            team_records[home] = 1

In [None]:
def get_max(record):
    """Recieve the dictionary and return key with max value"""
    val = list(record.values())
    keys = list(record.keys())
    return keys[val.index(max(val))]

### Who won the league of 1995-96 season

In [None]:
print(f'{get_max(team_records)}  won the 1995-1996 league')

## Analyzing the home, away wins and draws throughout the season for all teams

In [None]:
# seperating the dataFrames for away, home and draw
home_wins_95 = season_95[season_95['FTR']=='H'].groupby('HomeTeam').agg({'FTR':'count'})
away_wins_95 = season_95[season_95['FTR']=='A'].groupby('AwayTeam').agg({'FTR':'count'})
draw_95 = season_95[season_95['FTR']=='D'].groupby('AwayTeam').agg({'FTR':'count'})

In [None]:
# renaming the column in the dataFrames and then combining into one
home_wins_95.rename(columns={'FTR':'Home_wins'}, inplace=True)
away_wins_95.rename(columns={'FTR':'Away_wins'}, inplace=True)
draw_95.rename(columns={'FTR':'draws'}, inplace=True)

In [None]:
# merging the above dataFrames and renaming the index
home_away_combined_df = home_wins_95.merge(away_wins_95, left_index=True, right_index=True)
results_95 = home_away_combined_df.merge(draw_95, left_index=True, right_index=True)
results_95.index.names = ['Team']
results_95.sort_values(['Home_wins','Away_wins','draws'], ascending=False, inplace=True)

In [None]:
results_95.plot(kind='bar')
plt.show()

In [None]:
# converting the Date column from Object to Date 
season_95['Date'] = pd.to_datetime(season_95['Date'])

##### Analysis of Top two teams, Valencia and Athletico Madrid's chamionship race 

In [None]:
season_95.sort_values('Date', inplace=True)

In [None]:
valencia = season_95[(season_95['HomeTeam']=='Valencia') | (season_95['AwayTeam']=='Valencia')]
athletico_madrid = season_95[(season_95['HomeTeam']=='Ath Madrid') | (season_95['AwayTeam']=='Ath Madrid')]

In [None]:
# to be executed on;y once
valencia.reset_index(inplace=True)
del valencia["index"]

athletico_madrid.reset_index(inplace=True)
del athletico_madrid["index"]

In [None]:
#adding league score, represeting the full time score that will be made commulative latter on for title race after every match
valencia["league_score"] = 0
athletico_madrid["league_score"] = 0

In [None]:
#adding scores in "league score column based on the match results"
for ind,row in valencia.iterrows():
    if (row["AwayTeam"] == "Valencia") & (row["FTR"] == "A") :
        valencia.loc[ind,"league_score"] = 3
    elif (row["HomeTeam"] == "Valencia") & (row["FTR"] == "H") :
        valencia.loc[ind,"league_score"] = 3
    elif row["FTR"] == "D":
        valencia.loc[ind,"league_score"] = 1
    else:
        valencia.loc[ind,"league_score"] =0

for ind,row in athletico_madrid.iterrows():
    if (row["AwayTeam"] == "Ath Madrid") & (row["FTR"] == "A") :
        athletico_madrid.loc[ind,"league_score"] = 3
    elif (row["HomeTeam"] == "Ath Madrid") & (row["FTR"] == "H") :
        athletico_madrid.loc[ind,"league_score"] = 3
    elif row["FTR"] == "D":
        athletico_madrid.loc[ind,"league_score"] = 1
    else:
        athletico_madrid.loc[ind,"league_score"] =0

In [None]:
valencia["league_score"] = valencia["league_score"].cumsum()
athletico_madrid["league_score"] = athletico_madrid["league_score"].cumsum()

In [None]:
lis = []
for value in range(42):
    lis.append(f"Week{value+1}")

In [None]:
fig, ax = plt.subplots()
ax.plot(lis,valencia.league_score,label="Valencia")
ax.set_label("Valencia")
ax.plot(lis,athletico_madrid.league_score, label="Athletico Madrid")
plt.xticks(rotation=90)
ax.set_label("Athletico Madrid")
ax.legend()
plt.yticks([x+5 for x in range(0,90,5)])
plt.show()