In [None]:
# before running, please install python, pandas and statsmodels first
import zipfile
import pickle
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [None]:
#unzip and load data frame
with zipfile.ZipFile('../Data/df_preproccessed.zip', 'r') as zipf:
    name = zipf.namelist()[0] 
    with zipf.open(name) as file:
        df = pickle.load(file)

# define logit model function (exogenous regime-switching logit)
def regime_switching_logit(df = pd.DataFrame):
    results = []

    # separate logit models for suspected withholding and suspected push-in
    df_mod1 = df.loc[(df['competitive_dispatch_status_discretized']== 1)]
    df_mod2 = df.loc[(df['competitive_dispatch_status_discretized']== 0)]

    X1 = df_mod1[['net_profit_withhold_marginal']]
    X1 = sm.add_constant(X1)

    y1 = (df_mod1['deviation_discretized'] == -1 ).astype(int) # values -1 becomes 1, otherwise 0

    model1 = sm.Logit(y1, X1)
    result1 = model1.fit(maxiter=1000, method='nm')

    X2 = df_mod2[['net_profit_pushin_marginal']]
    X2 = sm.add_constant(X2)

    y2 = (df_mod2['deviation_discretized'] == 1 ).astype(int) # keep values 1, otherwise 0

    model2 = sm.Logit(y2, X2)
    result2 = model2.fit(maxiter=1000, method='nm')

    results.append(result1)
    results.append(result2)

    return results

## Run model: main specification, market-level

In [None]:
# run function and show results
results = regime_switching_logit(df)

for i, result in enumerate(results):
    print(f"Model {i+1} Summary:\n")
    print(result.summary())

## Run model: slice by company

In [None]:
#rank companies and run model in order
comp_size = pd.DataFrame(df.groupby('company')['net_exposure'].agg(['min', 'max'])) #determine company size based on max. net exposure
comp_size['abs'] = comp_size[['min', 'max']].abs().max(axis=1)
comp_size = comp_size.sort_values('abs', ascending = False)
comp_size['rank'] = range(1, len(comp_size)+1) #rank companies by size
comp_size = comp_size.reset_index()
rank_df = pd.DataFrame({'company': comp_size['company'], 'rank': comp_size['rank']})

#run function for each company in order of size
results = []
for comp in rank_df['company']:
    df_comp = df.loc[df['company'] == comp]
    print(f'{comp}')
    result = regime_switching_logit(df_comp)
    print(result[0].summary(), result[1].summary())
    results.append(result)