In [119]:
import pandas as pd
data = pd.read_csv('earning_prediction_quotes_parallel.csv')
data.head()

Unnamed: 0,Company,Ticker,Earning Time,Earning Score,Starting Week,Earning Date,Market Time,Left-day Open,Right-day Open,Left-day Close,Right-day Close,Left-day High,Right-day High,Left-day Low,Right-day Low
0,Citigroup,C,Tue before\rmarket,33,2020-01-13,2020-01-14,PREMARKET,74.69,76.29,75.77,76.95,75.84,78.08,74.22,76.07
1,Delta,DAL,Tue before\rmarket,-12,2020-01-13,2020-01-14,PREMARKET,58.62,61.61,59.08,61.03,59.29,61.82,58.62,60.44
2,U.S. Bank,USB,Wed before\rmarket,44,2020-01-13,2020-01-15,PREMARKET,53.43,52.28,53.33,51.81,53.91,52.61,53.22,51.35
3,IBM,IBM,Tue after\rmarket,-39,2020-01-20,2020-01-21,AFH,126.33,131.39,127.58,131.91,127.75,133.65,126.14,130.68
4,Netflix,NFLX,Tue after\rmarket,-21,2020-01-20,2020-01-21,AFH,340.0,332.55,338.11,326.0,341.0,336.3,332.59,323.6


In [122]:
print(f'Original has {data.shape[0]} rows')
df_pruned = data.dropna()
print(f'Pruned has {df_pruned.shape[0]} rows')

Original has 1299 rows
Pruned has 1209 rows


In [194]:
SCORE_COL = 'Earning Score'
LABEL_BULL = 'BULL'
LABEL_BEAR = 'BEAR'
LABEL_NEUTRAL = 'NEUTRAL'

# percentage wise
BEAR_CHANGE = -7.5
BULL_CHANGE = 7.5

In [195]:
def label_diff(value, threshold):
    if value > threshold:
        return LABEL_BULL
    elif value > -threshold:
        return LABEL_NEUTRAL
    else:
        return LABEL_BEAR

In [196]:
def predict(score, threshold_bull, threshold_bear):    
    if score > threshold_bull:
        return LABEL_BULL
    elif score > threshold_bear:
        return LABEL_NEUTRAL
    else:
        return LABEL_BEAR

In [197]:
from enum import Enum

class DiffMethod(Enum):
    CLOSE = 1
    HI_LOW = 2

In [262]:
def prediction_analysis(df, diff_method, threshold_bull, threshold_bear):    
    diff_col = 'DIFF'
    if diff_method == DiffMethod.CLOSE:
        data_pruned[diff_col] = 100 * ((data_pruned['Right-day Close'] / data_pruned['Left-day Close']) - 1)
    elif diff_method == DiffMethod.HI_LOW:
        data_pruned[diff_col] = 100 * ((data_pruned['Right-day High'] / data_pruned['Left-day Low']) - 1)
    else:
        return
    true_label_col = 'True Label'
    predict_label_col = 'Predict'
    data_pruned[true_label_col] = data_pruned.apply(lambda row: label_diff(row[diff_col]), axis=1)
    data_pruned[predict_label_col] = data_pruned.apply(lambda row: predict(row[SCORE_COL], threshold_bull, threshold_bear), axis=1)
    
    is_equal = data_pruned[true_label_col] == data_pruned[predict_label_col]
    right = is_equal.sum()
    accuracy = (right / is_equal.size * 100).round(2)
    print(f'Threshold_bull: {threshold_bull}, threshold_bear: {threshold_bear},\
          accuracy: {accuracy} [{right}/{is_equal.size}]')
    
    # Bull: recall
    bull_true_positive = ((data_pruned[true_label_col] == LABEL_BULL) & is_equal).sum()
    bull_positive = (data_pruned[true_label_col] == LABEL_BULL).sum()
    bull_recall = (bull_true_positive / bull_positive * 100).round(2)
    bull_recall_note = f'{bull_true_positive}|{bull_positive}'
    
    # Bull: precision
    bull_predicted_positive = (data_pruned[predict_label_col] == LABEL_BULL).sum()
    bull_precision = (bull_true_positive/ bull_predicted_positive *100).round(2)
    bull_precision_note = f'{bull_true_positive}|{bull_predicted_positive}'
    
    # Bear: recall
    bear_true_positive = ((data_pruned[true_label_col] == LABEL_BEAR) & is_equal).sum()
    bear_positive = (data_pruned[true_label_col] == LABEL_BEAR).sum()
    bear_recall = (bear_true_positive / bear_positive * 100).round(2)    
    bear_recall_note = f'{bear_true_positive}|{bear_positive}'

    # Bear: precision
    bear_predicted_positive = (data_pruned[predict_label_col] == LABEL_BEAR).sum()
    bear_precision = (bear_true_positive/ bear_predicted_positive *100).round(2)
    bear_precision_note = f'{bear_true_positive}|{bear_predicted_positive}'
    
    # Neutral: recall
    neutral_true_positive = ((data_pruned[true_label_col] == LABEL_NEUTRAL) & is_equal).sum()
    neutral_positive = (data_pruned[true_label_col] == LABEL_NEUTRAL).sum()
    neutral_recall = (neutral_true_positive / neutral_positive * 100).round(2)    
    neutral_recall_note = f'{neutral_true_positive}|{neutral_positive}'
    
    # Neutral: precision
    neutral_predicted_positive = (data_pruned[predict_label_col] == LABEL_NEUTRAL).sum()
    neutral_precision = (neutral_true_positive/ neutral_predicted_positive *100).round(2)
    neutral_precision_note = f'{neutral_true_positive}|{neutral_predicted_positive}'
    
    
    print('\n')
    stats = [accuracy, 
            bull_precision, bull_precision_note, bear_precision, bear_precision_note, neutral_precision, neutral_precision_note,
            bull_recall, bull_recall_note, bear_recall, bear_recall_note, neutral_recall, neutral_recall_note]
    return stats, data_pruned

In [271]:
res_builder = []
for diff_method in [DiffMethod.CLOSE, DiffMethod.HI_LOW]:  
    print(f'diff_method:{diff_method}')
    for gap in range(0, 15, 3):
        BULL_CHANGE = gap
        BEAR_CHANGE = -gap    
        print(f'Stock change > {BULL_CHANGE}%: Bull, < {BEAR_CHANGE}%:BEAR, between: NEUTRAL')
        for threshold in range(0, 100, 20):
            threshold_bull = threshold
            threshold_bear = -threshold
            stats, data_pruned = prediction_analysis(df_pruned, diff_method, threshold_bull, threshold_bear)
            row = [diff_method, BULL_CHANGE, BEAR_CHANGE, threshold_bull, threshold_bear]
            row.extend(stats)
            res_builder.append(row)
    print('\n\n')
    
columns =  ['Diff Method', 'Bull indicator[stock%]', 'Bear Indicator[stock%]', 'Bull Threshold[Score]', 'Bear Threshold[Score]',
            'Total Accuracy[%]', 
            'Bull Precision[%]', 'Bull Precision Note', 'Bear Precision[%]', 'Bear Precision Note', 'Neutral Precision[%]', 'Neutral Precision Note',
            'Bull Recall[%]', 'Bull Recall Note', 'Bear Recall[%]', 'Bear Recall Note', 'Neutral Recall[%]', 'Neutral Recall Note']
report = pd.DataFrame(res_builder, columns=columns)

diff_method:DiffMethod.CLOSE
Stock change > 0%: Bull, < 0%:BEAR, between: NEUTRAL
Threshold_bull: 0, threshold_bear: 0,          accuracy: 50.87 [615/1209]


Threshold_bull: 20, threshold_bear: -20,          accuracy: 27.05 [327/1209]


Threshold_bull: 40, threshold_bear: -40,          accuracy: 13.4 [162/1209]


Threshold_bull: 60, threshold_bear: -60,          accuracy: 4.88 [59/1209]


Threshold_bull: 80, threshold_bear: -80,          accuracy: 1.16 [14/1209]


Stock change > 3%: Bull, < -3%:BEAR, between: NEUTRAL
Threshold_bull: 0, threshold_bear: 0,          accuracy: 31.43 [380/1209]


Threshold_bull: 20, threshold_bear: -20,          accuracy: 34.0 [411/1209]


Threshold_bull: 40, threshold_bear: -40,          accuracy: 37.72 [456/1209]


Threshold_bull: 60, threshold_bear: -60,          accuracy: 38.96 [471/1209]






Threshold_bull: 80, threshold_bear: -80,          accuracy: 39.12 [473/1209]


Stock change > 6%: Bull, < -6%:BEAR, between: NEUTRAL
Threshold_bull: 0, threshold_bear: 0,          accuracy: 17.87 [216/1209]


Threshold_bull: 20, threshold_bear: -20,          accuracy: 38.21 [462/1209]


Threshold_bull: 40, threshold_bear: -40,          accuracy: 53.18 [643/1209]


Threshold_bull: 60, threshold_bear: -60,          accuracy: 60.71 [734/1209]


Threshold_bull: 80, threshold_bear: -80,          accuracy: 63.44 [767/1209]


Stock change > 9%: Bull, < -9%:BEAR, between: NEUTRAL
Threshold_bull: 0, threshold_bear: 0,          accuracy: 10.26 [124/1209]


Threshold_bull: 20, threshold_bear: -20,          accuracy: 40.78 [493/1209]


Threshold_bull: 40, threshold_bear: -40,          accuracy: 60.71 [734/1209]


Threshold_bull: 60, threshold_bear: -60,          accuracy: 73.12 [884/1209]


Threshold_bull: 80, threshold_bear: -80,          accuracy: 77.83 [941/1209]


Stock change > 12%: Bull, < -

In [272]:
report.to_csv('report.csv', index=False)