In [1]:
# This project is aimed at examining the effect of an early red card on the outcome of a game
# Here you can find a link to the underlying database, which includes game stats from over 60.000 games: https://www.kaggle.com/datasets/mexwell/football-data-from-transfermarkt

# Hypothesis testing is used to get a reslut on the underlying question: Makes a red card in the first half a draw less probable?
# Null Hypothesis (H0): Receiving a red card in the first half increases the probability that there is a winning / losing team and, thereby, no draw.
# Alternative Hypothesis (H1): Receiving a red card in the first half does not affect the probability of a draw.

In [73]:
import pandas as pd
import numpy as np

# Read CSV files
appearances = pd.read_csv('appearances.csv')
games = pd.read_csv('games.csv')

In [75]:
# Merge the dataframes on game_id
merged_df = pd.merge(appearances, games, on='game_id')
merged_df.sort_values(by='game_id', ascending = True)

Unnamed: 0,appearance_id,game_id,player_id,player_club_id,player_current_club_id,date_x,player_name,competition_id_x,yellow_cards,red_cards,...,stadium,attendance,referee,url,home_club_formation,away_club_formation,home_club_name,away_club_name,aggregate,competition_type
3363,2211607_95755,2211607,95755,610,1096,2012-08-05,Viktor Fischer,NLSC,0,0,...,Johan Cruijff ArenA,50000.0,Björn Kuipers,https://www.transfermarkt.co.uk/spielbericht/i...,,,Eindhovense Voetbalvereniging Philips Sport Ve...,AFC Ajax Amsterdam,4:2,other
3337,2211607_111184,2211607,111184,610,1269,2012-08-05,Dico Koppers,NLSC,0,0,...,Johan Cruijff ArenA,50000.0,Björn Kuipers,https://www.transfermarkt.co.uk/spielbericht/i...,,,Eindhovense Voetbalvereniging Philips Sport Ve...,AFC Ajax Amsterdam,4:2,other
3362,2211607_72462,2211607,72462,383,234,2012-08-05,Luciano Narsingh,NLSC,0,0,...,Johan Cruijff ArenA,50000.0,Björn Kuipers,https://www.transfermarkt.co.uk/spielbericht/i...,,,Eindhovense Voetbalvereniging Philips Sport Ve...,AFC Ajax Amsterdam,4:2,other
3338,2211607_12282,2211607,12282,610,12321,2012-08-05,Daley Blind,NLSC,0,0,...,Johan Cruijff ArenA,50000.0,Björn Kuipers,https://www.transfermarkt.co.uk/spielbericht/i...,,,Eindhovense Voetbalvereniging Philips Sport Ve...,AFC Ajax Amsterdam,4:2,other
3340,2211607_124891,2211607,124891,610,114,2012-08-05,Aras Özbiliz,NLSC,0,0,...,Johan Cruijff ArenA,50000.0,Björn Kuipers,https://www.transfermarkt.co.uk/spielbericht/i...,,,Eindhovense Voetbalvereniging Philips Sport Ve...,AFC Ajax Amsterdam,4:2,other
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1582990,4352472_257097,4352472,257097,124,124,2024-05-25,Ben Davies,SFA,0,0,...,Celtic Park,48556.0,Nick Walsh,https://www.transfermarkt.co.uk/spielbericht/i...,4-3-3 Attacking,4-2-3-1,The Celtic Football Club,Rangers Football Club,1:0,domestic_cup
1582989,4352472_156941,4352472,156941,124,124,2024-05-25,John Lundstram,SFA,0,0,...,Celtic Park,48556.0,Nick Walsh,https://www.transfermarkt.co.uk/spielbericht/i...,4-3-3 Attacking,4-2-3-1,The Celtic Football Club,Rangers Football Club,1:0,domestic_cup
1582988,4352472_128899,4352472,128899,124,124,2024-05-25,Jack Butland,SFA,0,0,...,Celtic Park,48556.0,Nick Walsh,https://www.transfermarkt.co.uk/spielbericht/i...,4-3-3 Attacking,4-2-3-1,The Celtic Football Club,Rangers Football Club,1:0,domestic_cup
1583001,4352472_420931,4352472,420931,371,371,2024-05-25,Daizen Maeda,SFA,1,0,...,Celtic Park,48556.0,Nick Walsh,https://www.transfermarkt.co.uk/spielbericht/i...,4-3-3 Attacking,4-2-3-1,The Celtic Football Club,Rangers Football Club,1:0,domestic_cup


In [77]:
# Determine the winning team
def determine_winner(row):
    if row['home_club_goals'] > row['away_club_goals']:
        return row['home_club_id']
    elif row['home_club_goals'] < row['away_club_goals']:
        return row['away_club_id']
    else:
        return None  # Draw

merged_df['winning_club_id'] = merged_df.apply(determine_winner, axis=1)

merged_df.sort_values(by='game_id', ascending = True)

Unnamed: 0,appearance_id,game_id,player_id,player_club_id,player_current_club_id,date_x,player_name,competition_id_x,yellow_cards,red_cards,...,attendance,referee,url,home_club_formation,away_club_formation,home_club_name,away_club_name,aggregate,competition_type,winning_club_id
3363,2211607_95755,2211607,95755,610,1096,2012-08-05,Viktor Fischer,NLSC,0,0,...,50000.0,Björn Kuipers,https://www.transfermarkt.co.uk/spielbericht/i...,,,Eindhovense Voetbalvereniging Philips Sport Ve...,AFC Ajax Amsterdam,4:2,other,383.0
3337,2211607_111184,2211607,111184,610,1269,2012-08-05,Dico Koppers,NLSC,0,0,...,50000.0,Björn Kuipers,https://www.transfermarkt.co.uk/spielbericht/i...,,,Eindhovense Voetbalvereniging Philips Sport Ve...,AFC Ajax Amsterdam,4:2,other,383.0
3362,2211607_72462,2211607,72462,383,234,2012-08-05,Luciano Narsingh,NLSC,0,0,...,50000.0,Björn Kuipers,https://www.transfermarkt.co.uk/spielbericht/i...,,,Eindhovense Voetbalvereniging Philips Sport Ve...,AFC Ajax Amsterdam,4:2,other,383.0
3338,2211607_12282,2211607,12282,610,12321,2012-08-05,Daley Blind,NLSC,0,0,...,50000.0,Björn Kuipers,https://www.transfermarkt.co.uk/spielbericht/i...,,,Eindhovense Voetbalvereniging Philips Sport Ve...,AFC Ajax Amsterdam,4:2,other,383.0
3340,2211607_124891,2211607,124891,610,114,2012-08-05,Aras Özbiliz,NLSC,0,0,...,50000.0,Björn Kuipers,https://www.transfermarkt.co.uk/spielbericht/i...,,,Eindhovense Voetbalvereniging Philips Sport Ve...,AFC Ajax Amsterdam,4:2,other,383.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1582990,4352472_257097,4352472,257097,124,124,2024-05-25,Ben Davies,SFA,0,0,...,48556.0,Nick Walsh,https://www.transfermarkt.co.uk/spielbericht/i...,4-3-3 Attacking,4-2-3-1,The Celtic Football Club,Rangers Football Club,1:0,domestic_cup,371.0
1582989,4352472_156941,4352472,156941,124,124,2024-05-25,John Lundstram,SFA,0,0,...,48556.0,Nick Walsh,https://www.transfermarkt.co.uk/spielbericht/i...,4-3-3 Attacking,4-2-3-1,The Celtic Football Club,Rangers Football Club,1:0,domestic_cup,371.0
1582988,4352472_128899,4352472,128899,124,124,2024-05-25,Jack Butland,SFA,0,0,...,48556.0,Nick Walsh,https://www.transfermarkt.co.uk/spielbericht/i...,4-3-3 Attacking,4-2-3-1,The Celtic Football Club,Rangers Football Club,1:0,domestic_cup,371.0
1583001,4352472_420931,4352472,420931,371,371,2024-05-25,Daizen Maeda,SFA,1,0,...,48556.0,Nick Walsh,https://www.transfermarkt.co.uk/spielbericht/i...,4-3-3 Attacking,4-2-3-1,The Celtic Football Club,Rangers Football Club,1:0,domestic_cup,371.0


In [78]:
# Identify red cards in the first half
merged_df['red_card_first_half'] = (merged_df['red_cards'] == 1) & (merged_df['minutes_played'] <= 45)

# Create won_games DataFrame
won_games = merged_df[merged_df['winning_club_id'].notnull()]

# Filter won_games Data Frame for matches where there was a red_card_first_half
red_card_with_winner = won_games[won_games['red_card_first_half'] == True]

red_card_with_winner

Unnamed: 0,appearance_id,game_id,player_id,player_club_id,player_current_club_id,date_x,player_name,competition_id_x,yellow_cards,red_cards,...,referee,url,home_club_formation,away_club_formation,home_club_name,away_club_name,aggregate,competition_type,winning_club_id,red_card_first_half
2051,2222974_44357,2222974,44357,660,2477,2012-07-29,Oleksandr Kucher,UKR1,0,1,...,Sergiy Boyko,https://www.transfermarkt.co.uk/shakhtar-donet...,,,FC Shakhtar Donetsk,FK Kryvbas Kryvyi Rig,1:0,domestic_league,660.0,True
2967,2222638_36903,2222638,36903,3725,1083,2012-08-04,Anton Amelchenko,RU1,0,1,...,Sergey Lapochkin,https://www.transfermarkt.co.uk/alania-vladika...,,,Spartak Vladikavkaz (-2020),RFK Akhmat Grozny,5:0,domestic_league,2703.0,True
3133,2229922_35813,2229922,35813,2282,1245,2012-08-04,Víctor Vázquez,BE1,0,1,...,Joeri van de Velde,https://www.transfermarkt.co.uk/rsc-charleroi_...,,,Royal Charleroi Sporting Club,Club Brugge Koninklijke Voetbalvereniging,0:1,domestic_league,2282.0,True
3222,2230282_45931,2230282,45931,2578,2578,2012-08-04,Grégory Tadé,SC1,0,1,...,Stevie O'Reilly,https://www.transfermarkt.co.uk/heart-of-midlo...,,,Heart of Midlothian Football Club,Saint Johnstone Football Club,2:0,domestic_league,43.0,True
3396,2222599_15597,2222599,15597,2439,2439,2012-08-05,Artur Tlisov,RU1,0,1,...,Vladimir Kazmenko,https://www.transfermarkt.co.uk/krylya-sovetov...,,,PFK Krylya Sovetov Samara,Kuban Krasnodar (-2018),2:1,domestic_league,2696.0,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1578188,4129768_182079,4129768,182079,2381,2381,2024-05-12,İbrahim Akdağ,TR1,0,1,...,Bahattin Şimşek,https://www.transfermarkt.co.uk/sivasspor_basa...,3-4-3,4-3-3 Attacking,Sivasspor Kulübü,İstanbul Başakşehir Futbol Kulübü,0:1,domestic_league,6890.0,True
1578836,4095411_472423,4095411,472423,631,631,2024-05-15,Reece James,GB1,0,1,...,Michael Salisbury,https://www.transfermarkt.co.uk/brighton-amp-h...,4-2-3-1,4-2-3-1,Brighton and Hove Albion Football Club,Chelsea Football Club,1:2,domestic_league,631.0,True
1578986,4112923_30321,4112923,30321,367,367,2024-05-15,Óscar Trejo,ES1,0,1,...,Isidro Díaz de Mera Escuderos,https://www.transfermarkt.co.uk/rayo-vallecano...,4-2-3-1,4-2-3-1,Rayo Vallecano de Madrid S.A.D.,Granada Club de Fútbol S.A.D.,2:1,domestic_league,367.0,True
1579823,4109588_314969,4109588,314969,3325,3325,2024-05-18,Júnior Pius,PO1,0,1,...,Manuel Oliveira,https://www.transfermarkt.co.uk/sporting-cp_gd...,3-4-3,4-2-3-1,Sporting Clube de Portugal,Grupo Desportivo de Chaves,3:0,domestic_league,336.0,True


In [79]:
# For hypothesis testing 4 values are needed

# 1. How many games have there been where there was a red card in the first half?
v1 = merged_df[merged_df['red_card_first_half'] == True].shape[0]

# 2. How many games that had a red card in the first half had a winning / losing team?
v2 = red_card_with_winner.shape[0]

# 3. How many games have there been where there was no red card in the first half?
v3 = merged_df[merged_df['red_card_first_half'] == False].shape[0]

# 4. How many games that had no red card in the first half had a winning / losing team?
v4 = won_games[won_games['red_card_first_half'] == False].shape[0]

# Print for debugging
print(f"Total games where a red card was received in the first half: {v1}")
print(f"Total games where no red card was received in the first half: {v3}")
print(f"Number of games drawn with a red card in the first half: {v1-v2}")
print(f"Number of games drawn without a red card in the first half: {v3-v4}")

Total games where a red card was received in the first half: 2625
Total games where no red card was received in the first half: 1581007
Number of games drawn with a red card in the first half: 629
Number of games drawn without a red card in the first half: 371793


In [80]:
from scipy.stats import chi2_contingency

# Observed frequencies
# Format: [Drawn, Not Drawn]
observed = np.array([
    [(v1-v2), (v1-(v1-v2))],  # Red card in the first half
    [(v3-v4), (v3-(v3-v4))]  # No red card in the first half
])

# Perform chi-square test
chi2_stat, p_value, dof, expected = chi2_contingency(observed)

# Print results
print(f"Chi-Square Statistic: {chi2_stat:.4f}")
print(f"P-Value: {p_value:.4f}")
print(f"Degrees of Freedom: {dof}")
print("Expected Frequencies:")
print(expected)
print(" ")

# Interpret the p-value
alpha = 0.05  # significance level
if p_value < alpha:
    print("Fail to reject the null hypothesis.")
    print("There is evidence to suggest that receiving a red card in the first half affects the probability of a draw.")
else:
    print("Reject the null hypothesis.")
    print("There is not enough evidence to suggest that receiving a red card in the first half affects the probability of a draw.")

Chi-Square Statistic: 0.2652
P-Value: 0.6066
Degrees of Freedom: 1
Expected Frequencies:
[[6.17320028e+02 2.00767997e+03]
 [3.71804680e+05 1.20920232e+06]]
 
Reject the null hypothesis.
There is not enough evidence to suggest that receiving a red card in the first half affects the probability of a draw.
