In [57]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from os.path import join

import warnings
warnings.filterwarnings(action='ignore')

from sklearn.preprocessing import StandardScaler

directory = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
cambodia_file = r'Game Data\joinedDataCambodia.csv'
vietnam_file = r'Game Data\joinedDataVietnam.csv'
merged_file = r'Game Data\mergedData.csv'

sensitivity_folder=r'Sensitivity Analysis'
sensitivity_file = r'sensitivity_analysis_processed.csv'
sensitivity_results=r'Results'

clusters_folder="Clusters"
subsidy_max=10

cambodia_csv = pd.read_csv(os.path.join(directory, cambodia_file), engine="python")
vietnam_csv = pd.read_csv(os.path.join(directory, vietnam_file), engine='python')
merged_csv = pd.read_csv(os.path.join(directory, merged_file), engine='python')
sensitivity_csv = pd.read_csv(os.path.join(directory, sensitivity_folder, sensitivity_file), engine='python')

countries=['Vietnam', 'Cambodia']

metrics_title=['Income Per Patch',
 'Net Production Per Patch',
 'NCH Patches',
 'Non-HS Strategy Adoption',
 'Production Benefit Per NCH Patch',
 'Income Deviation',
 'HS Patches'
 ]

metrics=['Income Per Patch',
 'Net Production Per Patch',
 'NCH Patches (%)',
 'Non-HS Strategy Adoption (%)',
 'Production Benefit Per NCH Patch',
 'Income Deviation',
 'HS Patches (%)',]

metrics_extended =['Income Per Patch',
 'Net Production Per Patch',
 'NCH Patches',
 'Non-HS Strategy Adoption',
 'Production Benefit Per NCH Patch',
 'Income Deviation',
 'HS Patches',
 'Strategy Switches',
 'Switches to NCH Strategy',
 'Popular Strategy Adoption',
#  'HS Cells (Non-NCH Strategy)',
#  'HS Cells (NCH Strategy)',
 'NCH Patches (HS Strategy)',
 'NCH Patches (Non-HS Strategy)',
 'NCH Efficiency (HS Strategy)',
 'NCH Efficiency (Non-HS Strategy)',
 ]

metrics_extended_label =['Income Per Patch',
 'Net Production Per Patch',
 'NCH Patches (%)',
 'Non-HS Strategy Adoption (%)',
 'Production Benefit Per NCH Patch',
 'Income Deviation',
 'HS Patches (%)',
 'Strategy Switches',
 'Switches to NCH Strategy',
 'Popular Strategy Adoption (%)',
#  'HS Cells (Non-NCH Strategy)',
#  'HS Cells (NCH Strategy)',
 'NCH Patches (HS Strategy) (%)',
 'NCH Patches (Non-HS Strategy) (%)',
 'NCH Efficiency (HS Strategy)',
 'NCH Efficiency (Non-HS Strategy)',
 ]

set_percent_inds=[2, 3, 6, 9, 10, 11]

metrics_index=16

import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
def to_percentage(x, pos):
    return f'{x * 100:.0f}%'
def to_percentage_decimal(x, pos):
    return f'{x * 100:.1f}%'

In [63]:
# standardizing all outcomes to z-score

outcomes_df=sensitivity_csv.iloc[:, 11:]
parameters_df=sensitivity_csv.iloc[:, :10]
outcomes_df.columns=metrics_title

import statsmodels.api as sm
from itertools import combinations

df=parameters_df
scaler = StandardScaler()
for column in range(len(df.columns)):
    df.iloc[:,column]=scaler.fit_transform(df.iloc[:,column].values.reshape(-1, 1))

# scaling outcomes to z-score
for column in outcomes_df.columns:
    scaler = StandardScaler()
    outcomes_df[column]=scaler.fit_transform(outcomes_df[column].values.reshape(-1, 1))
 
df = pd.DataFrame(parameters_df)

all_columns = df.columns  # Exclude the target variable column
for i in range(2, 3):
    for combination in combinations(all_columns, i):
        if len(combination) > 1:
            col_name = '*'.join(combination)
            df[col_name] = parameters_df[list(combination)].prod(axis=1)

            df[col_name]=scaler.fit_transform(df[col_name].values.reshape(-1, 1))


df = sm.add_constant(df)
coeff_dfs=[]

for column in outcomes_df.columns:
    y = outcomes_df[column].values
    model = sm.OLS(y, df).fit()

    print(model.summary())

    coefficients = model.params
    sorted_coefs = coefficients.reindex(coefficients.abs().sort_values(ascending=False).index)
    p_values = model.pvalues.reindex(sorted_coefs.index)

    temp_df=pd.DataFrame({'Coefficient': sorted_coefs, 'P-value': p_values})
    
    # include only p-value<0.1
    coeff_dfs.append(temp_df[temp_df['P-value'] < 0.1])

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.734
Model:                            OLS   Adj. R-squared:                  0.734
Method:                 Least Squares   F-statistic:                 2.595e+04
Date:                Sat, 22 Jun 2024   Prob (F-statistic):               0.00
Time:                        12:35:15   Log-Likelihood:            -3.9119e+05
No. Observations:              517000   AIC:                         7.825e+05
Df Residuals:                  516944   BIC:                         7.831e+05
Df Model:                          55                                         
Covariance Type:            nonrobust                                         
                                       coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------------------
const   

In [64]:
for i in range(len(coeff_dfs)):
    print("\n", metrics_extended[i])
    print(coeff_dfs[i])


 Income Per Patch
                                  Coefficient        P-value
subsidy                              0.672735   0.000000e+00
initialExperience                    0.286570   0.000000e+00
nchPercent                           0.264440   0.000000e+00
initialExperience*nchPercent        -0.264394   0.000000e+00
subsidy*nchPercent                   0.140221   0.000000e+00
carePatchMean                       -0.118356   0.000000e+00
farmSize                             0.088418   0.000000e+00
subsidy*carePatchMean                0.079547   0.000000e+00
trustIndexMean                       0.071602   0.000000e+00
careFarmerMean                       0.064167   0.000000e+00
subsidy*careOthersMean              -0.034198   0.000000e+00
giniCoeff                            0.026849  7.914413e-246
trustIndexMean*initialExperience    -0.025976  2.033786e-286
careOthersMean                       0.025027  3.680054e-266
farmSize*subsidy                    -0.021952  2.387048e-172
subsi

In [65]:
for i in range(len(coeff_dfs)):
    coeff_dfs[i].to_csv(os.path.join(directory, sensitivity_folder, sensitivity_results, metrics_extended[i]+".csv"))

In [66]:
param_dict=dict()
for parameter in parameters_df.columns:
    param_dict[parameter]=pd.DataFrame(columns=['Coefficient', 'P-value'])
    for i in range(len(coeff_dfs)):
        for row in coeff_dfs[i].index:
            if parameter in row:
                temp=row+', '+metrics_extended[i]
                param_dict[parameter].loc[temp]=coeff_dfs[i].loc[row]
    param_dict[parameter].sort_values(by='Coefficient', key=abs, inplace=True, ascending=False)


In [67]:
for col in param_dict.keys():
    param_dict[col].to_csv(os.path.join(directory, sensitivity_folder, sensitivity_results+"_metrics", col+".csv"))