In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from linearmodels.panel import PanelOLS

# Load the data
data_panel = pd.read_csv('../data/final/data_panel.csv')

# Set the index
data_panel.set_index(['Player', 'Season'], inplace=True)

In [2]:
# Fixed effects model
Y = data_panel['Log_Return_MV']
X = data_panel.drop(columns=['Log_Return_MV', 'Log_MV2', 'MP', 'Age_Original'])
fe_model = PanelOLS(Y, X, entity_effects=True)
fe_results = fe_model.fit(cov_type="robust")
print(fe_results)

                          PanelOLS Estimation Summary                           
Dep. Variable:          Log_Return_MV   R-squared:                        0.7001
Estimator:                   PanelOLS   R-squared (Between):             -1.0287
No. Observations:                4488   R-squared (Within):               0.7001
Date:                Sun, Oct 20 2024   R-squared (Overall):              0.2722
Time:                        21:27:49   Log-likelihood                   -637.11
Cov. Estimator:                Robust                                           
                                        F-statistic:                      1120.2
Entities:                        1122   P-value                           0.0000
Avg Obs:                       4.0000   Distribution:                  F(7,3359)
Min Obs:                       4.0000                                           
Max Obs:                       4.0000   F-statistic (robust):             590.93
                            

In [3]:
# Below average performance
data_panel['Underperformance_Magnitude'] = -data_panel['Overall_Performance_Index']
data_panel.loc[data_panel['Overall_Performance_Index'] >= 0, 'Underperformance_Magnitude'] = 0
data_panel['Underperformer_Log_MV1_Interaction'] = data_panel['Underperformance_Magnitude'] * data_panel['Log_MV1']

# Fixed effects model
Y = data_panel['Log_Return_MV']
X = data_panel.drop(columns=['Log_Return_MV', 'Log_MV2', 'Perf_MV1_Interaction', 'Overall_Performance_Index', 'MP', 'Age_Original'])
fe_model = PanelOLS(Y, X, entity_effects=True)
fe_results = fe_model.fit(cov_type="robust")
print(fe_results)

                          PanelOLS Estimation Summary                           
Dep. Variable:          Log_Return_MV   R-squared:                        0.7004
Estimator:                   PanelOLS   R-squared (Between):             -1.0920
No. Observations:                4488   R-squared (Within):               0.7004
Date:                Sun, Oct 20 2024   R-squared (Overall):              0.2568
Time:                        21:27:52   Log-likelihood                   -634.71
Cov. Estimator:                Robust                                           
                                        F-statistic:                      1121.9
Entities:                        1122   P-value                           0.0000
Avg Obs:                       4.0000   Distribution:                  F(7,3359)
Min Obs:                       4.0000                                           
Max Obs:                       4.0000   F-statistic (robust):             598.59
                            

In [4]:
# Create tiers of MV1
data_panel['MV1_Tier'] = pd.qcut(data_panel['Log_MV1'], q=5, labels=['Lowest', 'Low', 'Medium', 'High', 'Highest'])

# Create dummy variables for MV1_Tier, dropping the first category
data_panel = pd.get_dummies(data_panel, columns=['MV1_Tier'], prefix='MV1_Tier', drop_first=True)

In [5]:
# Create interaction terms between Underperformance_Magnitude and each MV1_Tier dummy
data_panel['Underperf_Low'] = data_panel['Underperformance_Magnitude'] * data_panel['MV1_Tier_Low']
data_panel['Underperf_Medium'] = data_panel['Underperformance_Magnitude'] * data_panel['MV1_Tier_Medium']
data_panel['Underperf_High'] = data_panel['Underperformance_Magnitude'] * data_panel['MV1_Tier_High']
data_panel['Underperf_Highest'] = data_panel['Underperformance_Magnitude'] * data_panel['MV1_Tier_Highest']

# Define the dependent variable
Y = data_panel['Log_Return_MV']

# Define the independent variables
X = data_panel[
    [
        'Underperformance_Magnitude',
        'MV1_Tier_Low',
        'MV1_Tier_Medium',
        'MV1_Tier_High',
        'MV1_Tier_Highest',
        'Underperf_Low',
        'Underperf_Medium',
        'Underperf_High',
        'Underperf_Highest',
        'Age',
        'Min',
        'Team_Rating',
        'Latest_Transfer_Fee'
    ]
]

# Fit the Fixed Effects Model
fe_model = PanelOLS(Y, X, entity_effects=True)
fe_results = fe_model.fit(cov_type="robust")
print(fe_results)

                          PanelOLS Estimation Summary                           
Dep. Variable:          Log_Return_MV   R-squared:                        0.5710
Estimator:                   PanelOLS   R-squared (Between):             -10.538
No. Observations:                4488   R-squared (Within):               0.5710
Date:                Sun, Oct 20 2024   R-squared (Overall):             -2.1784
Time:                        21:27:53   Log-likelihood                   -1440.7
Cov. Estimator:                Robust                                           
                                        F-statistic:                      343.25
Entities:                        1122   P-value                           0.0000
Avg Obs:                       4.0000   Distribution:                 F(13,3353)
Min Obs:                       4.0000                                           
Max Obs:                       4.0000   F-statistic (robust):             201.01
                            

In [6]:
data_panel.to_csv('../data/final/data_panel_hyp1.csv')
