# Statistical Tests for Accuracy Differences

Run McNemar's test to see if accuracy differences are significant.

In [1]:
import pandas as pd
from statsmodels.stats.contingency_tables import mcnemar

## Function

In [2]:
def test_models(model_1_short_path, model_2_short_path):

    print('running for ' + model_1_short_path + ' and ' + model_2_short_path)

    # Load first model from '../../Data/Predictions/' + model_1_short_path
    model_1_data = pd.read_excel('../../Data/Predictions/' + model_1_short_path)
    print(model_1_data.head())
    # Third column is true, last is predicted, add column for 'correct'
    model_1_data['correct'] = model_1_data.iloc[:, 2] == model_1_data.iloc[:, 3]
    # Load second model from '../../Data/Predictions/' + model_2_short_path
    model_2_data = pd.read_excel('../../Data/Predictions/' + model_2_short_path)
    print(model_2_data.head())
    # Third column is true, last is predicted, add column for 'correct'
    model_2_data['correct'] = model_2_data.iloc[:, 2] == model_2_data.iloc[:, 3]
    # Merge the two datasets on 'ticker' and 'fixed_quarter_date'
    merged_data = pd.merge(model_1_data, model_2_data, on=['ticker', 'fixed_quarter_date'], suffixes=('_model_1', '_model_2'))

    # Fill contingency table cells
    m1_correct_m2_correct = merged_data[(merged_data['correct_model_1'] == True) & (merged_data['correct_model_2'] == True)].shape[0]
    m1_correct_m2_incorrect = merged_data[(merged_data['correct_model_1'] == True) & (merged_data['correct_model_2'] == False)].shape[0]
    m1_incorrect_m2_correct = merged_data[(merged_data['correct_model_1'] == False) & (merged_data['correct_model_2'] == True)].shape[0]
    m1_incorrect_m2_incorrect = merged_data[(merged_data['correct_model_1'] == False) & (merged_data['correct_model_2'] == False)].shape[0]
    
    # Create a contingency table
    # Rows for m1 correct and incorrect
    # Columns for m2 correct and incorrect
    contingency_table = [[m1_correct_m2_correct, m1_correct_m2_incorrect], 
                         [m1_incorrect_m2_correct, m1_incorrect_m2_incorrect]]
  
    print('McNemar\'s Test (Exact) contingency table:')
    print(contingency_table)

    # McNemar's Test, exact, without any continuity correction 
    print('Test results, no continuity correction:')
    print(mcnemar(contingency_table, exact=True, correction=False)) 
    # ncc_result = mcnemar(contingency_table, exact=True, correction=False)
    # ncc_statistic = ncc_result.statistic
    # ncc_p_value = ncc_result.pvalue

## Run Tests

In [3]:
test_models('Logistic Regression/include_previous_rating_model_2/include_previous_rating_model_2_predictions.xlsx',
            'Logistic Regression/include_previous_rating_model_3/include_previous_rating_model_3_predictions.xlsx')

running for Logistic Regression/include_previous_rating_model_2/include_previous_rating_model_2_predictions.xlsx and Logistic Regression/include_previous_rating_model_3/include_previous_rating_model_3_predictions.xlsx
  ticker fixed_quarter_date Rating include_previous_rating_model_2_predictions
0   AAPL         2016-07-01     AA                                          AA
1   ABBV         2015-04-01      A                                           A
2   ABBV         2016-04-01      A                                           A
3    ABC         2012-04-01      A                                           A
4    ABC         2013-01-01      A                                           A
  ticker fixed_quarter_date Rating include_previous_rating_model_3_predictions
0   AAPL         2016-07-01     AA                                          AA
1   ABBV         2015-04-01      A                                           A
2   ABBV         2016-04-01      A                                     

In [4]:
test_models('Logistic Regression/exclude_previous_rating_model_2/exclude_previous_rating_model_2_predictions.xlsx',
            'Logistic Regression/exclude_previous_rating_model_3/exclude_previous_rating_model_3_predictions.xlsx')

running for Logistic Regression/exclude_previous_rating_model_2/exclude_previous_rating_model_2_predictions.xlsx and Logistic Regression/exclude_previous_rating_model_3/exclude_previous_rating_model_3_predictions.xlsx
  ticker fixed_quarter_date Rating exclude_previous_rating_model_2_predictions
0   AAPL         2016-07-01     AA                                          AA
1   ABBV         2015-04-01      A                                          AA
2   ABBV         2016-04-01      A                                          AA
3    ABC         2012-04-01      A                                           A
4    ABC         2013-01-01      A                                           A
  ticker fixed_quarter_date Rating exclude_previous_rating_model_3_predictions
0   AAPL         2016-07-01     AA                                          AA
1   ABBV         2015-04-01      A                                          AA
2   ABBV         2016-04-01      A                                     

In [5]:
test_models('Logistic Regression/smote_rating_change_model_2/smote_rating_change_model_2_predictions.xlsx',
            'Logistic Regression/smote_rating_change_model_3/smote_rating_change_model_3_predictions.xlsx')

running for Logistic Regression/smote_rating_change_model_2/smote_rating_change_model_2_predictions.xlsx and Logistic Regression/smote_rating_change_model_3/smote_rating_change_model_3_predictions.xlsx
  ticker fixed_quarter_date Change Direction Since Last Fixed Quarter Date  \
0   AAPL         2016-07-01                Same As Last Fixed Quarter Date   
1   ABBV         2015-04-01                Same As Last Fixed Quarter Date   
2   ABBV         2016-04-01                Same As Last Fixed Quarter Date   
3    ABC         2012-04-01                Same As Last Fixed Quarter Date   
4    ABC         2013-01-01                Same As Last Fixed Quarter Date   

   smote_rating_change_model_2_predictions  
0          Same As Last Fixed Quarter Date  
1  Downgrade Since Last Fixed Quarter Date  
2          Same As Last Fixed Quarter Date  
3          Same As Last Fixed Quarter Date  
4          Same As Last Fixed Quarter Date  
  ticker fixed_quarter_date Change Direction Since Last Fix

In [6]:
test_models('Logistic Regression/smote_rating_change_model_2/smote_rating_change_model_2_predictions.xlsx',
            'Logistic Regression/smote_rating_change_model_3/smote_rating_change_model_3_predictions.xlsx')

running for Logistic Regression/smote_rating_change_model_2/smote_rating_change_model_2_predictions.xlsx and Logistic Regression/smote_rating_change_model_3/smote_rating_change_model_3_predictions.xlsx
  ticker fixed_quarter_date Change Direction Since Last Fixed Quarter Date  \
0   AAPL         2016-07-01                Same As Last Fixed Quarter Date   
1   ABBV         2015-04-01                Same As Last Fixed Quarter Date   
2   ABBV         2016-04-01                Same As Last Fixed Quarter Date   
3    ABC         2012-04-01                Same As Last Fixed Quarter Date   
4    ABC         2013-01-01                Same As Last Fixed Quarter Date   

   smote_rating_change_model_2_predictions  
0          Same As Last Fixed Quarter Date  
1  Downgrade Since Last Fixed Quarter Date  
2          Same As Last Fixed Quarter Date  
3          Same As Last Fixed Quarter Date  
4          Same As Last Fixed Quarter Date  
  ticker fixed_quarter_date Change Direction Since Last Fix

In [7]:
test_models('XGBoost/include_previous_rating_model_2/include_previous_rating_model_2_predictions.xlsx',
            'XGBoost/include_previous_rating_model_3/include_previous_rating_model_3_predictions.xlsx')

running for XGBoost/include_previous_rating_model_2/include_previous_rating_model_2_predictions.xlsx and XGBoost/include_previous_rating_model_3/include_previous_rating_model_3_predictions.xlsx
  ticker fixed_quarter_date Rating include_previous_rating_model_2_predictions
0   AAPL         2016-07-01     AA                                          AA
1   ABBV         2015-04-01      A                                           A
2   ABBV         2016-04-01      A                                           A
3    ABC         2012-04-01      A                                           A
4    ABC         2013-01-01      A                                           A
  ticker fixed_quarter_date Rating include_previous_rating_model_3_predictions
0   AAPL         2016-07-01     AA                                          AA
1   ABBV         2015-04-01      A                                           A
2   ABBV         2016-04-01      A                                           A
3    ABC        

In [8]:
test_models('XGBoost/exclude_previous_rating_model_2/exclude_previous_rating_model_2_predictions.xlsx',
            'XGBoost/exclude_previous_rating_model_3/exclude_previous_rating_model_3_predictions.xlsx')

running for XGBoost/exclude_previous_rating_model_2/exclude_previous_rating_model_2_predictions.xlsx and XGBoost/exclude_previous_rating_model_3/exclude_previous_rating_model_3_predictions.xlsx
  ticker fixed_quarter_date Rating exclude_previous_rating_model_2_predictions
0   AAPL         2016-07-01     AA                                          AA
1   ABBV         2015-04-01      A                                           A
2   ABBV         2016-04-01      A                                           A
3    ABC         2012-04-01      A                                          BB
4    ABC         2013-01-01      A                                           A
  ticker fixed_quarter_date Rating exclude_previous_rating_model_3_predictions
0   AAPL         2016-07-01     AA                                          AA
1   ABBV         2015-04-01      A                                           A
2   ABBV         2016-04-01      A                                           A
3    ABC        

In [9]:
test_models('XGBoost/rating_change_model_2/rating_change_model_2_predictions.xlsx',
            'XGBoost/rating_change_model_3/rating_change_model_3_predictions.xlsx')

running for XGBoost/rating_change_model_2/rating_change_model_2_predictions.xlsx and XGBoost/rating_change_model_3/rating_change_model_3_predictions.xlsx
  ticker fixed_quarter_date Change Direction Since Last Fixed Quarter Date  \
0   AAPL         2016-07-01                Same As Last Fixed Quarter Date   
1   ABBV         2015-04-01                Same As Last Fixed Quarter Date   
2   ABBV         2016-04-01                Same As Last Fixed Quarter Date   
3    ABC         2012-04-01                Same As Last Fixed Quarter Date   
4    ABC         2013-01-01                Same As Last Fixed Quarter Date   

  rating_change_model_2_predictions  
0   Same As Last Fixed Quarter Date  
1   Same As Last Fixed Quarter Date  
2   Same As Last Fixed Quarter Date  
3   Same As Last Fixed Quarter Date  
4   Same As Last Fixed Quarter Date  
  ticker fixed_quarter_date Change Direction Since Last Fixed Quarter Date  \
0   AAPL         2016-07-01                Same As Last Fixed Quarter D

In [10]:
test_models('XGBoost/smote_rating_change_model_2/smote_rating_change_model_2_predictions.xlsx',
            'XGBoost/smote_rating_change_model_3/smote_rating_change_model_3_predictions.xlsx')

running for XGBoost/smote_rating_change_model_2/smote_rating_change_model_2_predictions.xlsx and XGBoost/smote_rating_change_model_3/smote_rating_change_model_3_predictions.xlsx
  ticker fixed_quarter_date Change Direction Since Last Fixed Quarter Date  \
0   AAPL         2016-07-01                Same As Last Fixed Quarter Date   
1   ABBV         2015-04-01                Same As Last Fixed Quarter Date   
2   ABBV         2016-04-01                Same As Last Fixed Quarter Date   
3    ABC         2012-04-01                Same As Last Fixed Quarter Date   
4    ABC         2013-01-01                Same As Last Fixed Quarter Date   

  smote_rating_change_model_2_predictions  
0         Same As Last Fixed Quarter Date  
1         Same As Last Fixed Quarter Date  
2         Same As Last Fixed Quarter Date  
3         Same As Last Fixed Quarter Date  
4         Same As Last Fixed Quarter Date  
  ticker fixed_quarter_date Change Direction Since Last Fixed Quarter Date  \
0   AAPL   