In [1]:
# Code to get the Cards and Fouls in favor of the teams for each season between 05/06-12/13 using the data from FootballUK
# By Aaron Zaragoza
# Contact: aaronzmk@gmail.com

In [2]:
# Import libraries
import pandas as pd
import os

# Set working directory
os.chdir('C:\\Users\\aaron\\Desktop\\GitHub\\Thesis\\Python')

In [3]:
# Import data, just one year
time = ['0506', '0607', '0708', '0809', '0910', '1011', '1112', '1213', '1314', '1415',
        '1516', '1617', '1718', '1819', '1920', '2021', '2122']

# Dictionary to store DataFrames
dataframes = {}
dataAway = {}
dataHome = {}

# Iterate over the list of years
for i in time:
    filename = f"SP1{i}.csv"
    
    filepath = os.path.join(r'Data\FootballUK', filename)  # Use os.path.join to construct the path
    
    # Read csv with the data
    df = pd.read_csv(filepath)
    
    # Keep variables of interest and rename them
    df2 = df[['HomeTeam', 'AwayTeam', 'HF', 'AF', 'HY', 'AY', 'HR', 'AR']]
    df3 = df2.rename(columns={'HF': 'homefouls', 'AF' : 'awayfouls', 'HY' : 'homeyellow', 'AY' : 'awayyellow',
                         'HR' : 'homered', 'AR' : 'awayred'})
    
    df3['Season'] = f'{i}'
    
    # Standarize the names of the teams for the two variables below
    team =  ['HomeTeam', 'AwayTeam']

    for i in team:
        df3[i] = df3[i].str.replace('Alaves', 'Alavés')
        df3[i] = df3[i].str.replace('Almeria', 'Almería')
        df3[i] = df3[i].str.replace('Ath Bilbao', 'Athletic Club')
        df3[i] = df3[i].str.replace('Ath Madrid', 'Atlético Madrid')
        df3[i] = df3[i].str.replace('Cadiz', 'Cádiz')
        df3[i] = df3[i].str.replace('Celta', 'Celta Vigo')
        df3[i] = df3[i].str.replace('Córdoba CF', 'Córdoba')
        df3[i] = df3[i].str.replace('Espanol', 'Espanyol')
        df3[i] = df3[i].str.replace('Gimnastic', 'Gimnàstic')
        df3[i] = df3[i].str.replace('Hercules', 'Hércules')
        df3[i] = df3[i].str.replace('La Coruna', 'La Coruña')
        df3[i] = df3[i].str.replace('Leganes', 'Leganés')
        df3[i] = df3[i].str.replace('Malaga', 'Málaga')
        df3[i] = df3[i].str.replace('Murcia', 'Real Murcia')
        df3[i] = df3[i].str.replace('Real Oviedo', 'Oviedo')
        df3[i] = df3[i].str.replace('Santander', 'Racing Sant')
        df3[i] = df3[i].str.replace('Sociedad', 'Real Sociedad')
        df3[i] = df3[i].str.replace('Sp Gijon', 'Sporting Gijón')
        df3[i] = df3[i].str.replace('Vallecano', 'Rayo Vallecano')

        #print(df3[i].unique())
        
    dataframes[filename] = df3  # Store the DataFrame in the dictionary
    
    dataAway[filename] = df3.groupby(['AwayTeam', 'Season'], as_index = False).agg({'homefouls':'sum', 'homeyellow':'sum', 'homered':'sum'})
    dataHome[filename] = df3.groupby(['HomeTeam', 'Season'], as_index = False).agg({'awayfouls':'sum', 'awayyellow':'sum', 'awayred':'sum'})
    

In [4]:
# Display the keys of each of one of the dictionaries created
dataAway.keys()

dict_keys(['SP10506.csv', 'SP10607.csv', 'SP10708.csv', 'SP10809.csv', 'SP10910.csv', 'SP11011.csv', 'SP11112.csv', 'SP11213.csv', 'SP11314.csv', 'SP11415.csv', 'SP11516.csv', 'SP11617.csv', 'SP11718.csv', 'SP11819.csv', 'SP11920.csv', 'SP12021.csv', 'SP12122.csv'])

In [5]:
# Concatenate all the keys in a dictionary in one dataframe for each Home/Away
dAway = pd.concat(dataAway.values(), ignore_index = True)
dAway = dAway.rename(columns = {'AwayTeam' : 'Equipo'})

dHome = pd.concat(dataHome.values(), ignore_index = True)
dHome = dHome.rename(columns = {'HomeTeam' : 'Equipo'})

In [6]:
# Display the dAway dataframe
dAway

Unnamed: 0,Equipo,Season,homefouls,homeyellow,homered
0,Alavés,0506,415,37,1
1,Athletic Club,0506,322,44,6
2,Atlético Madrid,0506,362,47,1
3,Barcelona,0506,389,56,4
4,Betis,0506,361,47,4
...,...,...,...,...,...
335,Real Madrid,2122,273,49,3
336,Real Sociedad,2122,248,43,1
337,Sevilla,2122,256,44,3
338,Valencia,2122,299,68,2


In [7]:
# Display the dHome dataframe
dHome

Unnamed: 0,Equipo,Season,awayfouls,awayyellow,awayred
0,Alavés,0506,394,56,2
1,Athletic Club,0506,349,57,3
2,Atlético Madrid,0506,371,56,5
3,Barcelona,0506,412,58,6
4,Betis,0506,362,58,2
...,...,...,...,...,...
335,Real Madrid,2122,228,34,0
336,Real Sociedad,2122,245,52,1
337,Sevilla,2122,242,52,1
338,Valencia,2122,303,64,4


In [8]:
# Merge dHome & dAway dataframes by the variables/columns Season and Equipo
CardFoul = pd.merge(dHome, dAway, on = ['Season', 'Equipo'])
CardFoul

Unnamed: 0,Equipo,Season,awayfouls,awayyellow,awayred,homefouls,homeyellow,homered
0,Alavés,0506,394,56,2,415,37,1
1,Athletic Club,0506,349,57,3,322,44,6
2,Atlético Madrid,0506,371,56,5,362,47,1
3,Barcelona,0506,412,58,6,389,56,4
4,Betis,0506,362,58,2,361,47,4
...,...,...,...,...,...,...,...,...
335,Real Madrid,2122,228,34,0,273,49,3
336,Real Sociedad,2122,245,52,1,248,43,1
337,Sevilla,2122,242,52,1,256,44,3
338,Valencia,2122,303,64,4,299,68,2


In [9]:
# Define variables and keep the variables of interest
CardFoul['TA_favor'] = CardFoul['awayyellow'] + CardFoul['homeyellow']
CardFoul['TR_favor'] = CardFoul['awayred'] + CardFoul['homered']
CardFoul['Fls_recibidasContra'] = CardFoul['awayfouls'] + CardFoul['homefouls']

CardFoul = CardFoul[['Equipo', 'Season', 'TA_favor', 'TR_favor', 'Fls_recibidasContra']]

In [10]:
# If CardFoul is a slice, make an explicit copy first
CardFoul = CardFoul.copy()

# Then convert the column safely
CardFoul['Aux'] = CardFoul['Season'].astype('float')

In [11]:
# Filter variables to only have those before the Season 2013/14
CardFoul = CardFoul[CardFoul['Aux'] < 1314]
CardFoul.drop(columns = ['Aux'], inplace = True)
CardFoul

Unnamed: 0,Equipo,Season,TA_favor,TR_favor,Fls_recibidasContra
0,Alavés,0506,93,3,809
1,Athletic Club,0506,101,9,671
2,Atlético Madrid,0506,103,6,733
3,Barcelona,0506,114,10,801
4,Betis,0506,105,6,723
...,...,...,...,...,...
155,Real Sociedad,1213,127,12,613
156,Sevilla,1213,119,8,537
157,Valencia,1213,103,12,538
158,Valladolid,1213,68,7,421


In [12]:
nice

NameError: name 'nice' is not defined