In [1]:
# Import necessary libraries
import pandas as pd
import statsmodels.api as sm
from linearmodels.panel import PanelOLS
from sklearn.preprocessing import StandardScaler

# Load the data
data_panel = pd.read_csv('../data/final/data_panel_hyp1.csv')

In [2]:
# Create variable for performance in the previous season
data_panel['Performance_Prev_Season'] = data_panel.groupby('Player')['Overall_Performance_Index'].shift(1)

# Drop na values
data_panel.dropna(inplace=True)

In [3]:
# Step 1: Calculate Overvaluation Measure

# Regression of Log_MV1 on Overall_Performance_Index and controls to get residuals
X_overvaluation_vars = [
    'Performance_Prev_Season', 'Age', 'Team_Rating', 'Min', 'Latest_Transfer_Fee'
]

X_overvaluation = data_panel[X_overvaluation_vars]
X_overvaluation = sm.add_constant(X_overvaluation)

Y_overvaluation = data_panel['Log_MV1']

# Fit the OLS regression
overvaluation_model = sm.OLS(Y_overvaluation, X_overvaluation)
overvaluation_results = overvaluation_model.fit()

In [4]:
# Get the residuals (Overvaluation measure)
data_panel['Overvaluation'] = overvaluation_results.resid

# Standardize Overvaluation measure using StandardScaler
scaler = StandardScaler()
data_panel['Overvaluation'] = scaler.fit_transform(data_panel[['Overvaluation']])

In [5]:
# Create interaction term between Overvaluation and Performance Decline
data_panel['Overvaluation_x_Underperformance'] = data_panel['Overvaluation'] * data_panel['Underperformance_Magnitude']

In [6]:
# Update independent variables
X_hypB_int_vars = [
    'Overvaluation', 'Underperformance_Magnitude', 'Overvaluation_x_Underperformance',
    'Age', 'Team_Rating', 'Min', 'Latest_Transfer_Fee'
]

# Prepare variables
data_hypB_int = data_panel.set_index(['Player', 'Season'])

X_hypB_int_panel = data_hypB_int[X_hypB_int_vars]
Y_hypB_int_panel = data_hypB_int['Log_Return_MV']

# Fit the Fixed Effects model with entity effects
hypB_int_model = PanelOLS(Y_hypB_int_panel, X_hypB_int_panel, entity_effects=True)
hypB_int_results = hypB_int_model.fit(cov_type='robust')

# Display the summary
print(hypB_int_results.summary)

                          PanelOLS Estimation Summary                           
Dep. Variable:          Log_Return_MV   R-squared:                        0.4911
Estimator:                   PanelOLS   R-squared (Between):             -1.6811
No. Observations:                3366   R-squared (Within):               0.4911
Date:                Thu, Oct 10 2024   R-squared (Overall):             -0.3459
Time:                        13:21:33   Log-likelihood                    246.04
Cov. Estimator:                Robust                                           
                                        F-statistic:                      308.37
Entities:                        1122   P-value                           0.0000
Avg Obs:                       3.0000   Distribution:                  F(7,2237)
Min Obs:                       3.0000                                           
Max Obs:                       3.0000   F-statistic (robust):             200.30
                            