# Statistical Tests for Accuracy Differences

Run McNemar's test to see if accuracy differences are significant.

In [1]:
import pandas as pd
from statsmodels.stats.contingency_tables import mcnemar

In [2]:
# Dataframe to store all test results
results = pd.DataFrame(columns=['Model A', 'Model B', 'p, Non-Exact', 'p, Non-Exact, Continuity Corrected', 'p, Exact'])

## Function

In [3]:
def run_test(merged_data, model_1_name, model_2_name):
    '''
    Run McNemar's Test on two models' predictions.

    Parameters:
    - merged_data: a pandas DataFrame with columns 'ticker', 'fixed_quarter_date', and 'correct_model_1' and 'correct_model_2', which are boolean values indicating whether the prediction was correct for each model.
    '''

    print('model 1 accuracy: ' + str(merged_data['correct_model_1'].mean()))
    print('model 2 accuracy: ' + str(merged_data['correct_model_2'].mean()))

    # Fill contingency table cells
    m1_correct_m2_correct = merged_data[(merged_data['correct_model_1'] == True) & (merged_data['correct_model_2'] == True)].shape[0]
    m1_correct_m2_incorrect = merged_data[(merged_data['correct_model_1'] == True) & (merged_data['correct_model_2'] == False)].shape[0]
    m1_incorrect_m2_correct = merged_data[(merged_data['correct_model_1'] == False) & (merged_data['correct_model_2'] == True)].shape[0]
    m1_incorrect_m2_incorrect = merged_data[(merged_data['correct_model_1'] == False) & (merged_data['correct_model_2'] == False)].shape[0]
    
    # Create a contingency table
    # Rows for m1 correct and incorrect
    # Columns for m2 correct and incorrect
    contingency_table = [[m1_correct_m2_correct, m1_correct_m2_incorrect], 
                         [m1_incorrect_m2_correct, m1_incorrect_m2_incorrect]]
  
    print('McNemar\'s Test contingency table:')
    print(contingency_table)

    # McNemar's Test, non-exact, without any continuity correction 
    print('Test results, non-exact, no continuity correction:')
    print(mcnemar(contingency_table, exact=False, correction=False)) 
    ncc_p_value = mcnemar(contingency_table, exact=False, correction=False).pvalue

    # McNemar's Test, non-exact, with continuity correction
    print('Test results, non-exact, with continuity correction:')
    print(mcnemar(contingency_table, exact=False, correction=True))
    cc_p_value = mcnemar(contingency_table, exact=False, correction=True).pvalue

    # McNemar's Test, exact, without any continuity correction
    print('Test results, exact, no continuity correction:')
    print(mcnemar(contingency_table, exact=True, correction=False))
    ec_p_value = mcnemar(contingency_table, exact=True, correction=False).pvalue

    # Add results to the results dataframe
    results.loc[len(results)] = [model_1_name, model_2_name, ncc_p_value, cc_p_value, ec_p_value]

    # ncc_result = mcnemar(contingency_table, exact=True, correction=False)
    # ncc_statistic = ncc_result.statistic
    # ncc_p_value = ncc_result.pvalue

def test_models(model_1_short_path, model_2_short_path, model_1_name, model_2_name):

    print('running for ' + model_1_short_path + ' and ' + model_2_short_path)

    # Load first model from '../../Data/Predictions/' + model_1_short_path
    model_1_data = pd.read_excel('../../Data/Predictions/' + model_1_short_path)
    print(model_1_data.head())
    # Third column is true, last is predicted, add column for 'correct'
    model_1_data['correct'] = model_1_data.iloc[:, 2] == model_1_data.iloc[:, 3]
    # Load second model from '../../Data/Predictions/' + model_2_short_path
    model_2_data = pd.read_excel('../../Data/Predictions/' + model_2_short_path)
    print(model_2_data.head())
    # Third column is true, last is predicted, add column for 'correct'
    model_2_data['correct'] = model_2_data.iloc[:, 2] == model_2_data.iloc[:, 3]
    # Merge the two datasets on 'ticker' and 'fixed_quarter_date'
    merged_data = pd.merge(model_1_data, model_2_data, on=['ticker', 'fixed_quarter_date'], suffixes=('_model_1', '_model_2'))

    # Run test
    run_test(merged_data, model_1_name, model_2_name)

## Run Tests

In [4]:
test_models('Logistic Regression/include_previous_rating_model_2/include_previous_rating_model_2_predictions.xlsx',
            'Logistic Regression/include_previous_rating_model_3/include_previous_rating_model_3_predictions.xlsx', 
            'Logistic - Include Previous Rating Model 2', 
            'Logistic - Include Previous Rating Model 3')

running for Logistic Regression/include_previous_rating_model_2/include_previous_rating_model_2_predictions.xlsx and Logistic Regression/include_previous_rating_model_3/include_previous_rating_model_3_predictions.xlsx
  ticker fixed_quarter_date Rating include_previous_rating_model_2_predictions
0   AAPL         2016-07-01     AA                                          AA
1   ABBV         2015-04-01      A                                           A
2   ABBV         2016-04-01      A                                           A
3    ABC         2012-04-01      A                                           A
4    ABC         2013-01-01      A                                           A
  ticker fixed_quarter_date Rating include_previous_rating_model_3_predictions
0   AAPL         2016-07-01     AA                                          AA
1   ABBV         2015-04-01      A                                           A
2   ABBV         2016-04-01      A                                     

  statistic = (np.abs(n1 - n2) - corr)**2 / (1. * (n1 + n2))
  statistic = (np.abs(n1 - n2) - corr)**2 / (1. * (n1 + n2))


In [5]:
test_models('Logistic Regression/exclude_previous_rating_model_2/exclude_previous_rating_model_2_predictions.xlsx',
            'Logistic Regression/exclude_previous_rating_model_3/exclude_previous_rating_model_3_predictions.xlsx',
            'Logistic - Exclude Previous Rating Model 2',
            'Logistic - Exclude Previous Rating Model 3')

running for Logistic Regression/exclude_previous_rating_model_2/exclude_previous_rating_model_2_predictions.xlsx and Logistic Regression/exclude_previous_rating_model_3/exclude_previous_rating_model_3_predictions.xlsx
  ticker fixed_quarter_date Rating exclude_previous_rating_model_2_predictions
0   AAPL         2016-07-01     AA                                          AA
1   ABBV         2015-04-01      A                                          AA
2   ABBV         2016-04-01      A                                          AA
3    ABC         2012-04-01      A                                           A
4    ABC         2013-01-01      A                                           A
  ticker fixed_quarter_date Rating exclude_previous_rating_model_3_predictions
0   AAPL         2016-07-01     AA                                          AA
1   ABBV         2015-04-01      A                                          AA
2   ABBV         2016-04-01      A                                     

In [6]:
test_models('Logistic Regression/smote_rating_change_model_2/smote_rating_change_model_2_predictions.xlsx',
            'Logistic Regression/smote_rating_change_model_3/smote_rating_change_model_3_predictions.xlsx',
            'Logistic - SMOTE Rating Change Model 2',
            'Logistic - SMOTE Rating Change Model 3')

running for Logistic Regression/smote_rating_change_model_2/smote_rating_change_model_2_predictions.xlsx and Logistic Regression/smote_rating_change_model_3/smote_rating_change_model_3_predictions.xlsx
  ticker fixed_quarter_date Change Direction Since Last Fixed Quarter Date  \
0   AAPL         2016-07-01                Same As Last Fixed Quarter Date   
1   ABBV         2015-04-01                Same As Last Fixed Quarter Date   
2   ABBV         2016-04-01                Same As Last Fixed Quarter Date   
3    ABC         2012-04-01                Same As Last Fixed Quarter Date   
4    ABC         2013-01-01                Same As Last Fixed Quarter Date   

   smote_rating_change_model_2_predictions  
0          Same As Last Fixed Quarter Date  
1  Downgrade Since Last Fixed Quarter Date  
2          Same As Last Fixed Quarter Date  
3          Same As Last Fixed Quarter Date  
4          Same As Last Fixed Quarter Date  
  ticker fixed_quarter_date Change Direction Since Last Fix

In [7]:
test_models('XGBoost/include_previous_rating_model_2/include_previous_rating_model_2_predictions.xlsx',
            'XGBoost/include_previous_rating_model_3/include_previous_rating_model_3_predictions.xlsx',
            'XGBoost - Include Previous Rating Model 2',
            'XGBoost - Include Previous Rating Model 3')

running for XGBoost/include_previous_rating_model_2/include_previous_rating_model_2_predictions.xlsx and XGBoost/include_previous_rating_model_3/include_previous_rating_model_3_predictions.xlsx
  ticker fixed_quarter_date Rating include_previous_rating_model_2_predictions
0   AAPL         2016-07-01     AA                                          AA
1   ABBV         2015-04-01      A                                           A
2   ABBV         2016-04-01      A                                           A
3    ABC         2012-04-01      A                                           A
4    ABC         2013-01-01      A                                           A
  ticker fixed_quarter_date Rating include_previous_rating_model_3_predictions
0   AAPL         2016-07-01     AA                                          AA
1   ABBV         2015-04-01      A                                           A
2   ABBV         2016-04-01      A                                           A
3    ABC        

  statistic = (np.abs(n1 - n2) - corr)**2 / (1. * (n1 + n2))
  statistic = (np.abs(n1 - n2) - corr)**2 / (1. * (n1 + n2))


In [8]:
test_models('XGBoost/exclude_previous_rating_model_2/exclude_previous_rating_model_2_predictions.xlsx',
            'XGBoost/exclude_previous_rating_model_3/exclude_previous_rating_model_3_predictions.xlsx',
            'XGBoost - Exclude Previous Rating Model 2',
            'XGBoost - Exclude Previous Rating Model 3')

running for XGBoost/exclude_previous_rating_model_2/exclude_previous_rating_model_2_predictions.xlsx and XGBoost/exclude_previous_rating_model_3/exclude_previous_rating_model_3_predictions.xlsx
  ticker fixed_quarter_date Rating exclude_previous_rating_model_2_predictions
0   AAPL         2016-07-01     AA                                          AA
1   ABBV         2015-04-01      A                                           A
2   ABBV         2016-04-01      A                                           A
3    ABC         2012-04-01      A                                          BB
4    ABC         2013-01-01      A                                           A
  ticker fixed_quarter_date Rating exclude_previous_rating_model_3_predictions
0   AAPL         2016-07-01     AA                                          AA
1   ABBV         2015-04-01      A                                           A
2   ABBV         2016-04-01      A                                           A
3    ABC        

In [9]:
test_models('XGBoost/smote_rating_change_model_2/smote_rating_change_model_2_predictions.xlsx',
            'XGBoost/smote_rating_change_model_3/smote_rating_change_model_3_predictions.xlsx',
            'XGBoost - SMOTE Rating Change Model 2',
            'XGBoost - SMOTE Rating Change Model 3')

running for XGBoost/smote_rating_change_model_2/smote_rating_change_model_2_predictions.xlsx and XGBoost/smote_rating_change_model_3/smote_rating_change_model_3_predictions.xlsx
  ticker fixed_quarter_date Change Direction Since Last Fixed Quarter Date  \
0   AAPL         2016-07-01                Same As Last Fixed Quarter Date   
1   ABBV         2015-04-01                Same As Last Fixed Quarter Date   
2   ABBV         2016-04-01                Same As Last Fixed Quarter Date   
3    ABC         2012-04-01                Same As Last Fixed Quarter Date   
4    ABC         2013-01-01                Same As Last Fixed Quarter Date   

  smote_rating_change_model_2_predictions  
0         Same As Last Fixed Quarter Date  
1         Same As Last Fixed Quarter Date  
2         Same As Last Fixed Quarter Date  
3         Same As Last Fixed Quarter Date  
4         Same As Last Fixed Quarter Date  
  ticker fixed_quarter_date Change Direction Since Last Fixed Quarter Date  \
0   AAPL   

### Graph NN Tests

In [10]:
# Reformatted GNN data

# Example
inductive_gnn_test = pd.read_excel('../../Data/Predictions/Graph Neural Network/Inductive/exclude_previous_rating_model_2_predictions.xlsx')
print(inductive_gnn_test.head())

def reformat_gnn_data(short_path):
    # Load GNN data
    gnn_df = pd.read_excel('../../Data/Predictions/Graph Neural Network/' + short_path)
    # Split node on ' : ' into 'ticker' and 'fixed_quarter_date'
    gnn_df[['ticker', 'fixed_quarter_date']] = gnn_df['node'].str.split(' : ', expand=True)
    # Keep only 'ticker', 'fixed_quarter_date', 'target', pred
    gnn_df = gnn_df[['ticker', 'fixed_quarter_date', 'target', 'pred']]
    # Save to new file
    # New path - replace / with /reformatted_
    new_path = short_path.replace('/', '/reformatted_')
    gnn_df.to_excel('../../Data/Predictions/Graph Neural Network/' + new_path, index=False)

reformat_gnn_data('Transductive/exclude_previous_rating_model_2_predictions.xlsx')
reformat_gnn_data('Transductive/exclude_previous_rating_model_3_predictions.xlsx')
reformat_gnn_data('Inductive/exclude_previous_rating_model_2_predictions.xlsx')
reformat_gnn_data('Inductive/exclude_previous_rating_model_3_predictions.xlsx')

  target pred               node
0     AA   AA  AAPL : 2016-07-01
1      A    A  ABBV : 2015-04-01
2      A    A  ABBV : 2016-04-01
3      A    A   ABC : 2012-04-01
4      A  BBB   ABC : 2013-01-01


#### Internal GNN Comparisons

In [11]:
# Test models
test_models('Graph Neural Network/Transductive/reformatted_exclude_previous_rating_model_2_predictions.xlsx',
            'Graph Neural Network/Transductive/reformatted_exclude_previous_rating_model_3_predictions.xlsx',
            'GNN - Transductive - Model 2',
            'GNN - Transductive - Model 3')

running for Graph Neural Network/Transductive/reformatted_exclude_previous_rating_model_2_predictions.xlsx and Graph Neural Network/Transductive/reformatted_exclude_previous_rating_model_3_predictions.xlsx
  ticker fixed_quarter_date target pred
0   AAPL         2016-07-01     AA   AA
1   ABBV         2015-04-01      A   AA
2   ABBV         2016-04-01      A    A
3    ABC         2012-04-01      A    A
4    ABC         2013-01-01      A  BBB
  ticker fixed_quarter_date target pred
0   AAPL         2016-07-01     AA   AA
1   ABBV         2015-04-01      A   AA
2   ABBV         2016-04-01      A    A
3    ABC         2012-04-01      A    A
4    ABC         2013-01-01      A    A
model 1 accuracy: 0.6632653061224489
model 2 accuracy: 0.676530612244898
McNemar's Test contingency table:
[[611, 39], [52, 278]]
Test results, non-exact, no continuity correction:
pvalue      0.17295491798841672
statistic   1.8571428571428572
Test results, non-exact, with continuity correction:
pvalue      0.208

In [12]:
# Test models
test_models('Graph Neural Network/Inductive/reformatted_exclude_previous_rating_model_2_predictions.xlsx',
            'Graph Neural Network/Inductive/reformatted_exclude_previous_rating_model_3_predictions.xlsx',
            'GNN - Inductive - Model 2',
            'GNN - Inductive - Model 3')

running for Graph Neural Network/Inductive/reformatted_exclude_previous_rating_model_2_predictions.xlsx and Graph Neural Network/Inductive/reformatted_exclude_previous_rating_model_3_predictions.xlsx
  ticker fixed_quarter_date target pred
0   AAPL         2016-07-01     AA   AA
1   ABBV         2015-04-01      A    A
2   ABBV         2016-04-01      A    A
3    ABC         2012-04-01      A    A
4    ABC         2013-01-01      A  BBB
  ticker fixed_quarter_date target pred
0   AAPL         2016-07-01     AA   AA
1   ABBV         2015-04-01      A    A
2   ABBV         2016-04-01      A    A
3    ABC         2012-04-01      A    A
4    ABC         2013-01-01      A  BBB
model 1 accuracy: 0.6653061224489796
model 2 accuracy: 0.6836734693877551
McNemar's Test contingency table:
[[600, 52], [70, 258]]
Test results, non-exact, no continuity correction:
pvalue      0.1031768800930285
statistic   2.6557377049180326
Test results, non-exact, with continuity correction:
pvalue      0.123778009

#### Transductive versus Inductive

In [13]:
# Test models
test_models('Graph Neural Network/Transductive/reformatted_exclude_previous_rating_model_2_predictions.xlsx',
            'Graph Neural Network/Inductive/reformatted_exclude_previous_rating_model_2_predictions.xlsx',
            'GNN - Transductive - Model 2',
            'GNN - Inductive - Model 2')

running for Graph Neural Network/Transductive/reformatted_exclude_previous_rating_model_2_predictions.xlsx and Graph Neural Network/Inductive/reformatted_exclude_previous_rating_model_2_predictions.xlsx
  ticker fixed_quarter_date target pred
0   AAPL         2016-07-01     AA   AA
1   ABBV         2015-04-01      A   AA
2   ABBV         2016-04-01      A    A
3    ABC         2012-04-01      A    A
4    ABC         2013-01-01      A  BBB
  ticker fixed_quarter_date target pred
0   AAPL         2016-07-01     AA   AA
1   ABBV         2015-04-01      A    A
2   ABBV         2016-04-01      A    A
3    ABC         2012-04-01      A    A
4    ABC         2013-01-01      A  BBB
model 1 accuracy: 0.6632653061224489
model 2 accuracy: 0.6653061224489796
McNemar's Test contingency table:
[[605, 45], [47, 283]]
Test results, non-exact, no continuity correction:
pvalue      0.8348273291852392
statistic   0.043478260869565216
Test results, non-exact, with continuity correction:
pvalue      0.9169

In [14]:
# Test models
test_models('Graph Neural Network/Transductive/reformatted_exclude_previous_rating_model_3_predictions.xlsx',
            'Graph Neural Network/Inductive/reformatted_exclude_previous_rating_model_3_predictions.xlsx',
            'GNN - Transductive - Model 3',
            'GNN - Inductive - Model 3')

running for Graph Neural Network/Transductive/reformatted_exclude_previous_rating_model_3_predictions.xlsx and Graph Neural Network/Inductive/reformatted_exclude_previous_rating_model_3_predictions.xlsx
  ticker fixed_quarter_date target pred
0   AAPL         2016-07-01     AA   AA
1   ABBV         2015-04-01      A   AA
2   ABBV         2016-04-01      A    A
3    ABC         2012-04-01      A    A
4    ABC         2013-01-01      A    A
  ticker fixed_quarter_date target pred
0   AAPL         2016-07-01     AA   AA
1   ABBV         2015-04-01      A    A
2   ABBV         2016-04-01      A    A
3    ABC         2012-04-01      A    A
4    ABC         2013-01-01      A  BBB
model 1 accuracy: 0.676530612244898
model 2 accuracy: 0.6836734693877551
McNemar's Test contingency table:
[[617, 46], [53, 264]]
Test results, non-exact, no continuity correction:
pvalue      0.48172771631662636
statistic   0.494949494949495
Test results, non-exact, with continuity correction:
pvalue      0.5464935

#### GNN and Other Classifiers

In [15]:
test_models('Graph Neural Network/Inductive/reformatted_exclude_previous_rating_model_3_predictions.xlsx',
            'Graph Neural Network/Other Classifiers on GNN Data/lr_retrain_predictions.xlsx',
            'GNN - Inductive - Model 3',
            'Logistic Regression - Retrain on GNN Data')

running for Graph Neural Network/Inductive/reformatted_exclude_previous_rating_model_3_predictions.xlsx and Graph Neural Network/Other Classifiers on GNN Data/lr_retrain_predictions.xlsx
  ticker fixed_quarter_date target pred
0   AAPL         2016-07-01     AA   AA
1   ABBV         2015-04-01      A    A
2   ABBV         2016-04-01      A    A
3    ABC         2012-04-01      A    A
4    ABC         2013-01-01      A  BBB
  ticker fixed_quarter_date Rating prediction
0   AAPL         2016-07-01     AA         AA
1   ABBV         2015-04-01      A         AA
2   ABBV         2016-04-01      A         AA
3    ABC         2012-04-01      A          A
4    ABC         2013-01-01      A          A
model 1 accuracy: 0.6836734693877551
model 2 accuracy: 0.639795918367347
McNemar's Test contingency table:
[[511, 159], [116, 194]]
Test results, non-exact, no continuity correction:
pvalue      0.00951434446647381
statistic   6.723636363636364
Test results, non-exact, with continuity correction:

In [16]:
test_models('Graph Neural Network/Inductive/reformatted_exclude_previous_rating_model_3_predictions.xlsx',
            'Graph Neural Network/Other Classifiers on GNN Data/xgb_retrain_predictions.xlsx',
            'GNN - Inductive - Model 3',
            'XGBoost - Retrain on GNN Data')

running for Graph Neural Network/Inductive/reformatted_exclude_previous_rating_model_3_predictions.xlsx and Graph Neural Network/Other Classifiers on GNN Data/xgb_retrain_predictions.xlsx
  ticker fixed_quarter_date target pred
0   AAPL         2016-07-01     AA   AA
1   ABBV         2015-04-01      A    A
2   ABBV         2016-04-01      A    A
3    ABC         2012-04-01      A    A
4    ABC         2013-01-01      A  BBB
  ticker fixed_quarter_date Rating prediction
0   AAPL         2016-07-01     AA         AA
1   ABBV         2015-04-01      A          A
2   ABBV         2016-04-01      A          A
3    ABC         2012-04-01      A          A
4    ABC         2013-01-01      A          A
model 1 accuracy: 0.6836734693877551
model 2 accuracy: 0.9030612244897959
McNemar's Test contingency table:
[[648, 22], [237, 73]]
Test results, non-exact, no continuity correction:
pvalue      1.0433221550712604e-40
statistic   178.47490347490347
Test results, non-exact, with continuity correct

In [17]:
test_models('Graph Neural Network/Transductive/reformatted_exclude_previous_rating_model_3_predictions.xlsx',
            'Graph Neural Network/Other Classifiers on GNN Data/lr_retrain_predictions.xlsx',
            'GNN - Transductive - Model 3',
            'Logistic Regression - Retrain on GNN Data')

running for Graph Neural Network/Transductive/reformatted_exclude_previous_rating_model_3_predictions.xlsx and Graph Neural Network/Other Classifiers on GNN Data/lr_retrain_predictions.xlsx
  ticker fixed_quarter_date target pred
0   AAPL         2016-07-01     AA   AA
1   ABBV         2015-04-01      A   AA
2   ABBV         2016-04-01      A    A
3    ABC         2012-04-01      A    A
4    ABC         2013-01-01      A    A
  ticker fixed_quarter_date Rating prediction
0   AAPL         2016-07-01     AA         AA
1   ABBV         2015-04-01      A         AA
2   ABBV         2016-04-01      A         AA
3    ABC         2012-04-01      A          A
4    ABC         2013-01-01      A          A
model 1 accuracy: 0.676530612244898
model 2 accuracy: 0.639795918367347
McNemar's Test contingency table:
[[515, 148], [112, 205]]
Test results, non-exact, no continuity correction:
pvalue      0.025573669368214657
statistic   4.984615384615385
Test results, non-exact, with continuity correcti

In [18]:
test_models('Graph Neural Network/Transductive/reformatted_exclude_previous_rating_model_3_predictions.xlsx',
            'Graph Neural Network/Other Classifiers on GNN Data/xgb_retrain_predictions.xlsx',
            'GNN - Transductive - Model 3',
            'XGBoost - Retrain on GNN Data')

running for Graph Neural Network/Transductive/reformatted_exclude_previous_rating_model_3_predictions.xlsx and Graph Neural Network/Other Classifiers on GNN Data/xgb_retrain_predictions.xlsx
  ticker fixed_quarter_date target pred
0   AAPL         2016-07-01     AA   AA
1   ABBV         2015-04-01      A   AA
2   ABBV         2016-04-01      A    A
3    ABC         2012-04-01      A    A
4    ABC         2013-01-01      A    A
  ticker fixed_quarter_date Rating prediction
0   AAPL         2016-07-01     AA         AA
1   ABBV         2015-04-01      A          A
2   ABBV         2016-04-01      A          A
3    ABC         2012-04-01      A          A
4    ABC         2013-01-01      A          A
model 1 accuracy: 0.676530612244898
model 2 accuracy: 0.9030612244897959
McNemar's Test contingency table:
[[639, 24], [246, 71]]
Test results, non-exact, no continuity correction:
pvalue      1.3561625012089296e-41
statistic   182.53333333333333
Test results, non-exact, with continuity corre

In [19]:
test_models('Graph Neural Network/Inductive/reformatted_exclude_previous_rating_model_3_predictions.xlsx',
            'Graph Neural Network/Other Classifiers on GNN Data/lr_pretrained_predictions.xlsx',
            'GNN - Inductive - Model 3',
            'Logistic Regression - Pretrained on GNN Data')

running for Graph Neural Network/Inductive/reformatted_exclude_previous_rating_model_3_predictions.xlsx and Graph Neural Network/Other Classifiers on GNN Data/lr_pretrained_predictions.xlsx
  ticker fixed_quarter_date target pred
0   AAPL         2016-07-01     AA   AA
1   ABBV         2015-04-01      A    A
2   ABBV         2016-04-01      A    A
3    ABC         2012-04-01      A    A
4    ABC         2013-01-01      A  BBB
  ticker fixed_quarter_date Rating exclude_previous_rating_model_3_predictions
0   AAPL         2016-07-01     AA                                          AA
1   ABBV         2015-04-01      A                                          AA
2   ABBV         2016-04-01      A                                         AAA
3    ABC         2012-04-01      A                                           A
4    ABC         2013-01-01      A                                           A
model 1 accuracy: 0.6836734693877551
model 2 accuracy: 0.6469387755102041
McNemar's Test conting

In [20]:
test_models('Graph Neural Network/Inductive/reformatted_exclude_previous_rating_model_3_predictions.xlsx',
            'Graph Neural Network/Other Classifiers on GNN Data/xgb_pretrained_predictions.xlsx',
            'GNN - Inductive - Model 3',
            'XGBoost - Pretrained on GNN Data')

running for Graph Neural Network/Inductive/reformatted_exclude_previous_rating_model_3_predictions.xlsx and Graph Neural Network/Other Classifiers on GNN Data/xgb_pretrained_predictions.xlsx
  ticker fixed_quarter_date target pred
0   AAPL         2016-07-01     AA   AA
1   ABBV         2015-04-01      A    A
2   ABBV         2016-04-01      A    A
3    ABC         2012-04-01      A    A
4    ABC         2013-01-01      A  BBB
  ticker fixed_quarter_date Rating exclude_previous_rating_model_3_predictions
0   AAPL         2016-07-01     AA                                          AA
1   ABBV         2015-04-01      A                                           A
2   ABBV         2016-04-01      A                                           A
3    ABC         2012-04-01      A                                           A
4    ABC         2013-01-01      A                                           A
model 1 accuracy: 0.6836734693877551
model 2 accuracy: 0.9071428571428571
McNemar's Test contin

In [21]:
test_models('Graph Neural Network/Transductive/reformatted_exclude_previous_rating_model_3_predictions.xlsx',
            'Graph Neural Network/Other Classifiers on GNN Data/lr_pretrained_predictions.xlsx',
            'GNN - Transductive - Model 3',
            'Logistic Regression - Pretrained on GNN Data')

running for Graph Neural Network/Transductive/reformatted_exclude_previous_rating_model_3_predictions.xlsx and Graph Neural Network/Other Classifiers on GNN Data/lr_pretrained_predictions.xlsx
  ticker fixed_quarter_date target pred
0   AAPL         2016-07-01     AA   AA
1   ABBV         2015-04-01      A   AA
2   ABBV         2016-04-01      A    A
3    ABC         2012-04-01      A    A
4    ABC         2013-01-01      A    A
  ticker fixed_quarter_date Rating exclude_previous_rating_model_3_predictions
0   AAPL         2016-07-01     AA                                          AA
1   ABBV         2015-04-01      A                                          AA
2   ABBV         2016-04-01      A                                         AAA
3    ABC         2012-04-01      A                                           A
4    ABC         2013-01-01      A                                           A
model 1 accuracy: 0.676530612244898
model 2 accuracy: 0.6469387755102041
McNemar's Test conti

In [22]:
test_models('Graph Neural Network/Transductive/reformatted_exclude_previous_rating_model_3_predictions.xlsx',
            'Graph Neural Network/Other Classifiers on GNN Data/xgb_pretrained_predictions.xlsx',
            'GNN - Transductive - Model 3',
            'XGBoost - Pretrained on GNN Data')

running for Graph Neural Network/Transductive/reformatted_exclude_previous_rating_model_3_predictions.xlsx and Graph Neural Network/Other Classifiers on GNN Data/xgb_pretrained_predictions.xlsx
  ticker fixed_quarter_date target pred
0   AAPL         2016-07-01     AA   AA
1   ABBV         2015-04-01      A   AA
2   ABBV         2016-04-01      A    A
3    ABC         2012-04-01      A    A
4    ABC         2013-01-01      A    A
  ticker fixed_quarter_date Rating exclude_previous_rating_model_3_predictions
0   AAPL         2016-07-01     AA                                          AA
1   ABBV         2015-04-01      A                                           A
2   ABBV         2016-04-01      A                                           A
3    ABC         2012-04-01      A                                           A
4    ABC         2013-01-01      A                                           A
model 1 accuracy: 0.676530612244898
model 2 accuracy: 0.9071428571428571
McNemar's Test cont

## P-Values Table

In [23]:
p_values_table = results.copy()

# Round last 3 columns to two decimal places
p_values_table['p, Non-Exact'] = p_values_table['p, Non-Exact'].round(2)
p_values_table['p, Non-Exact, Continuity Corrected'] = p_values_table['p, Non-Exact, Continuity Corrected'].round(2)
p_values_table['p, Exact'] = p_values_table['p, Exact'].round(2)

# If p, Non-Exact is NaN, set all three columns to NaN
p_values_table.loc[p_values_table['p, Non-Exact'].isnull(), ['p, Non-Exact', 'p, Non-Exact, Continuity Corrected', 'p, Exact']] = None

# Convert all columns to strings and replace NaN with '-'
p_values_table = p_values_table.astype(str)
p_values_table = p_values_table.replace('nan', '-')

# Export to LaTeX
# Center all columns
lt_string = p_values_table.to_latex(index=False, column_format='c' * 5, escape=False)
latex_with_font_size = "\\tiny\n" + lt_string + "\n\\normalsize"
with open('../../Output/Modelling/p_values_table.tex', 'w') as f:
    f.write(latex_with_font_size)

p_values_table

Unnamed: 0,Model A,Model B,"p, Non-Exact","p, Non-Exact, Continuity Corrected","p, Exact"
0,Logistic - Include Previous Rating Model 2,Logistic - Include Previous Rating Model 3,-,-,-
1,Logistic - Exclude Previous Rating Model 2,Logistic - Exclude Previous Rating Model 3,0.31,0.35,0.35
2,Logistic - SMOTE Rating Change Model 2,Logistic - SMOTE Rating Change Model 3,0.17,0.19,0.19
3,XGBoost - Include Previous Rating Model 2,XGBoost - Include Previous Rating Model 3,-,-,-
4,XGBoost - Exclude Previous Rating Model 2,XGBoost - Exclude Previous Rating Model 3,0.0,0.0,0.0
5,XGBoost - SMOTE Rating Change Model 2,XGBoost - SMOTE Rating Change Model 3,0.06,0.11,0.11
6,GNN - Transductive - Model 2,GNN - Transductive - Model 3,0.17,0.21,0.21
7,GNN - Inductive - Model 2,GNN - Inductive - Model 3,0.1,0.12,0.12
8,GNN - Transductive - Model 2,GNN - Inductive - Model 2,0.83,0.92,0.92
9,GNN - Transductive - Model 3,GNN - Inductive - Model 3,0.48,0.55,0.55
