In [53]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd

### Basic weak-form test results

In [60]:
df_autocorr_test = pd.read_csv('data/results/autocorr_test.csv', sep='|')
print(f'{len(df_autocorr_test) - sum(df_autocorr_test["autocorrelation"])} out of {len(df_autocorr_test)} stock time series are weak-form efficient according to the autocorrelation test.')
df_runs_test = pd.read_csv('data/results/runs_test.csv', sep='|')
print(f'{sum(df_runs_test["random"])} out of {len(df_runs_test)} stock time series are weak form efficient according to the runs test.')
df_unit_root_test = pd.read_csv('data/results/unit_root_test.csv')
print(f'{len(df_unit_root_test.query('price_unit_root == True'))} out of {len(df_unit_root_test)} stock time series are weak-form efficient according to the unit root test.')
df_var_ratio_test = pd.read_csv('data/results/var_ratio_test.csv')
print(f'{df_var_ratio_test['data_from_random_walk'].sum()} out of {len(df_var_ratio_test)} stock time series are weak-form efficient according to the variance ratio test.')

1341 out of 3273 stock time series are weak-form efficient according to the autocorrelation test.
2172 out of 3273 stock time series are weak form efficient according to the runs test.
2555 out of 3273 stock time series are weak-form efficient according to the unit root test.
451 out of 3273 stock time series are weak-form efficient according to the variance ratio test.


### Predictions results

In [55]:
def calculate_sign_change_accuracy(actual, predicted):
    """
    Returns the % of correctly predicted sign changes in consecutive Time Series Values
    """
    #check to ensure lists are large enough and of equal length
    if len(actual) != len(predicted) or len(actual) < 2:
        raise ValueError("Both lists must have the same length and at least two elements.")

    # Calculate the differences between consecutive elements
    actual_diffs = [j - i for i, j in zip(actual[:-1], actual[1:])]
    predicted_diffs = [j - i for i, j in zip(predicted[:-1], predicted[1:])]

    #compute % of correctly predicted sign change
    match_sign_count = 0
    for pred_diff, actual_diff in zip(predicted_diffs,actual_diffs):
        if (pred_diff<0 and actual_diff<0) or (pred_diff>0 and actual_diff>0):
            match_sign_count+=1

    match_sign_ratio = match_sign_count/len(predicted_diffs)
    return match_sign_ratio*100
sign_accuracies = []
sign_accuracies_only_significant = []
r2s = []
r2s_only_significant = []
corrs = []
corrs_only_significant = []
df_sample = pd.read_csv('data/results/sample_100.csv', sep='|')
for file in os.listdir('data/predictions/'):
    if file.endswith('.csv'):
        df = pd.read_csv(f'data/predictions/{file}', sep='|')
        filename = file.split('_')[1] + '.csv'
        df_temp = df_sample.query('file == @filename')
        accuracy = calculate_sign_change_accuracy(df["actual"], df["pred"])
        sign_accuracies.append(accuracy)
        # calculate r^2 of predicted vs actual
        r2 = np.corrcoef(df["actual"], df["pred"])[0, 1]**2
        r2s.append(r2)
        corr = np.corrcoef(df["actual"], df["pred"])[0, 1]
        corrs.append(corr)
        if df_temp.iloc[0,4]:
            sign_accuracies_only_significant.append(accuracy)
            r2s_only_significant.append(r2)
            corrs_only_significant.append(corr)
            
print(f'Number of Predictions: {len(sign_accuracies)}')
print(f'of which {len(sign_accuracies_only_significant)} are from significant models')

print(f'Average Sign Change Accuracy: {np.mean(sign_accuracies):.2f}%')
print(f'Average R^2: {np.mean(r2s):.2f}')
print(f'Average Correlation: {np.mean(corrs):.2f}')
print(f'Average Sign Change Accuracy (Only Significant): {np.mean(sign_accuracies_only_significant):.2f}%')
print(f'Average R^2 (Only Significant): {np.mean(r2s_only_significant):.2f}')
print(f'Average Correlation (Only Significant): {np.mean(corrs_only_significant):.2f}')

Number of Predictions: 100
of which 65 are from significant models
Average Sign Change Accuracy: 45.79%
Average R^2: 0.48
Average Correlation: 0.66
Average Sign Change Accuracy (Only Significant): 45.18%
Average R^2 (Only Significant): 0.46
Average Correlation (Only Significant): 0.64


In [56]:
print(sign_accuracies)

[42.10526315789473, 36.84210526315789, 42.10526315789473, 47.368421052631575, 42.10526315789473, 52.63157894736842, 42.10526315789473, 52.63157894736842, 42.10526315789473, 36.84210526315789, 47.368421052631575, 21.052631578947366, 36.84210526315789, 47.368421052631575, 52.63157894736842, 21.052631578947366, 57.89473684210527, 31.57894736842105, 36.84210526315789, 52.63157894736842, 63.1578947368421, 42.10526315789473, 52.63157894736842, 52.63157894736842, 52.63157894736842, 52.63157894736842, 57.89473684210527, 42.10526315789473, 57.89473684210527, 42.10526315789473, 52.63157894736842, 52.63157894736842, 52.63157894736842, 42.10526315789473, 36.84210526315789, 42.10526315789473, 31.57894736842105, 63.1578947368421, 36.84210526315789, 63.1578947368421, 47.368421052631575, 42.10526315789473, 42.10526315789473, 47.368421052631575, 57.89473684210527, 15.789473684210526, 47.368421052631575, 26.31578947368421, 47.368421052631575, 36.84210526315789, 36.84210526315789, 52.63157894736842, 47.3