In [2]:
# Import necessary libraries
import pandas as pd
from linearmodels.panel import PanelOLS
import statsmodels.api as sm

In [3]:
# Load data
data_panel = pd.read_csv('../data/final/data_panel_hyp1.csv')

In [3]:
# Create future performance variable
data_panel['Future_Performance'] = data_panel.groupby('Player')['Overall_Performance_Index'].shift(-1)

# Drop rows where Future_Performance is NaN
data_stage1 = data_panel.dropna(subset=['Future_Performance'])

In [4]:
# Define the dependent variable for Stage 1
Y_stage1 = data_stage1['Future_Performance']

# Define the independent variables for Stage 1
X_stage1_vars = [
    'Log_MV2', 'Overall_Performance_Index', 'Age',
    'Team_Rating', 'Min', 'Latest_Transfer_Fee'
]

X_stage1 = data_stage1[X_stage1_vars]

# Add a constant term
X_stage1 = sm.add_constant(X_stage1)

# Fit the OLS regression
stage1_model = sm.OLS(Y_stage1, X_stage1)
stage1_results = stage1_model.fit()

# Display the summary
print(stage1_results.summary())

                            OLS Regression Results                            
Dep. Variable:     Future_Performance   R-squared:                       0.273
Model:                            OLS   Adj. R-squared:                  0.272
Method:                 Least Squares   F-statistic:                     210.4
Date:                Thu, 10 Oct 2024   Prob (F-statistic):          1.89e-228
Time:                        13:21:30   Log-Likelihood:                -4217.9
No. Observations:                3366   AIC:                             8450.
Df Residuals:                    3359   BIC:                             8493.
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
                                coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------------
const                 

In [5]:
# Get the predicted (expected) future performance
data_stage1 = data_stage1.copy()
data_stage1['Expected_Future_Performance'] = stage1_results.predict(X_stage1)

In [6]:
# Use data where Expected_Future_Performance is available
data_stage2 = data_stage1.copy()

# Set the index if not already set
data_stage2 = data_stage2.set_index(['Player', 'Season'])

In [7]:
# Create interaction term
data_stage2['Expected_Future_Performance_Log_MV1'] = data_stage2['Expected_Future_Performance'] * data_stage2['Log_MV1']

In [8]:
# Define the independent variables for Stage 2
X_stage2_vars = [
    'Expected_Future_Performance', 'Expected_Future_Performance_Log_MV1', 'Log_MV1', 'Age', 
    'Team_Rating', 'Min', 'Latest_Transfer_Fee'
]

# Prepare the data for PanelOLS
Y_stage2_panel = data_stage2['Log_Return_MV']
X_stage2_panel = data_stage2[X_stage2_vars]

# Fit the Fixed Effects model with entity effects
stage2_model = PanelOLS(Y_stage2_panel, X_stage2_panel, entity_effects=True)
stage2_results = stage2_model.fit(cov_type='robust')

# Display the summary
print(stage2_results.summary)

                          PanelOLS Estimation Summary                           
Dep. Variable:          Log_Return_MV   R-squared:                        0.7856
Estimator:                   PanelOLS   R-squared (Between):             -1.0144
No. Observations:                3366   R-squared (Within):               0.7856
Date:                Thu, Oct 10 2024   R-squared (Overall):              0.1912
Time:                        13:21:30   Log-likelihood                    50.235
Cov. Estimator:                Robust                                           
                                        F-statistic:                      1170.8
Entities:                        1122   P-value                           0.0000
Avg Obs:                       3.0000   Distribution:                  F(7,2237)
Min Obs:                       3.0000                                           
Max Obs:                       3.0000   F-statistic (robust):             785.25
                            