In [119]:
import pandas as pd
data = pd.read_csv('earning_prediction_quotes_parallel.csv')
data.head()

Unnamed: 0,Company,Ticker,Earning Time,Earning Score,Starting Week,Earning Date,Market Time,Left-day Open,Right-day Open,Left-day Close,Right-day Close,Left-day High,Right-day High,Left-day Low,Right-day Low
0,Citigroup,C,Tue before\rmarket,33,2020-01-13,2020-01-14,PREMARKET,74.69,76.29,75.77,76.95,75.84,78.08,74.22,76.07
1,Delta,DAL,Tue before\rmarket,-12,2020-01-13,2020-01-14,PREMARKET,58.62,61.61,59.08,61.03,59.29,61.82,58.62,60.44
2,U.S. Bank,USB,Wed before\rmarket,44,2020-01-13,2020-01-15,PREMARKET,53.43,52.28,53.33,51.81,53.91,52.61,53.22,51.35
3,IBM,IBM,Tue after\rmarket,-39,2020-01-20,2020-01-21,AFH,126.33,131.39,127.58,131.91,127.75,133.65,126.14,130.68
4,Netflix,NFLX,Tue after\rmarket,-21,2020-01-20,2020-01-21,AFH,340.0,332.55,338.11,326.0,341.0,336.3,332.59,323.6


In [122]:
print(f'Original has {data.shape[0]} rows')
df_pruned = data.dropna()
print(f'Pruned has {df_pruned.shape[0]} rows')

Original has 1299 rows
Pruned has 1209 rows


In [194]:
SCORE_COL = 'Earning Score'
LABEL_BULL = 'BULL'
LABEL_BEAR = 'BEAR'
LABEL_NEUTRAL = 'NEUTRAL'

# percentage wise
BEAR_CHANGE = -7.5
BULL_CHANGE = 7.5

In [195]:
def label_diff(value, threshold):
    if value > threshold:
        return LABEL_BULL
    elif value > -threshold:
        return LABEL_NEUTRAL
    else:
        return LABEL_BEAR

In [196]:
def predict(score, threshold_bull, threshold_bear):    
    if score > threshold_bull:
        return LABEL_BULL
    elif score > threshold_bear:
        return LABEL_NEUTRAL
    else:
        return LABEL_BEAR

In [197]:
from enum import Enum

class DiffMethod(Enum):
    CLOSE = 1
    HI_LOW = 2

In [206]:
def prediction_analysis(df, diff_method, threshold_bull, threshold_bear):    
    diff_col = 'DIFF'
    if diff_method == DiffMethod.CLOSE:
        data_pruned[diff_col] = 100 * ((data_pruned['Right-day Close'] / data_pruned['Left-day Close']) - 1)
    elif diff_method == DiffMethod.HI_LOW:
        data_pruned[diff_col] = 100 * ((data_pruned['Right-day High'] / data_pruned['Left-day Low']) - 1)
    else:
        return
    true_label_col = 'True Label'
    predict_label_col = 'Predict'
    data_pruned[true_label_col] = data_pruned.apply(lambda row: label_diff(row[diff_col]), axis=1)
    data_pruned[predict_label_col] = data_pruned.apply(lambda row: predict(row[SCORE_COL], threshold_bull, threshold_bear), axis=1)
    
    is_equal = data_pruned[true_label_col] == data_pruned[predict_label_col]
    right = is_equal.sum()
    accuracy = (right / is_equal.size * 100).round(2)
    print(f'Threshold_bull: {threshold_bull}, threshold_bear: {threshold_bear},\
          accuracy: {accuracy} [{right}/{is_equal.size}]')
    
    # for true bull cases, accuracy
    
    return data_pruned

In [208]:
for diff_method in [DiffMethod.CLOSE, DiffMethod.HI_LOW]:  
    print(f'Diff_method:{diff_method}')
    for gap in range(0, 15, 2):
        
        print(f'Stock change > {BULL_CHANGE}%: Bull, < {BEAR_CHANGE}%:BEAR, between: NEUTRAL')
        for threshold in range(20, 100, 20):
            threshold_bull = threshold
            threshold_bear = -threshold
        
            res = prediction_analysis(df_pruned, diff_method, threshold_bull, threshold_bear)

Diff_method:DiffMethod.CLOSE
Stock change > 14%: Bull, < -14%:BEAR, between: NEUTRAL
Stock change > 14%: Bull, < -14%:BEAR, between: NEUTRAL
Stock change > 14%: Bull, < -14%:BEAR, between: NEUTRAL
Stock change > 14%: Bull, < -14%:BEAR, between: NEUTRAL
Stock change > 14%: Bull, < -14%:BEAR, between: NEUTRAL
Stock change > 14%: Bull, < -14%:BEAR, between: NEUTRAL
Stock change > 14%: Bull, < -14%:BEAR, between: NEUTRAL
Stock change > 14%: Bull, < -14%:BEAR, between: NEUTRAL
Diff_method:DiffMethod.HI_LOW
Stock change > 14%: Bull, < -14%:BEAR, between: NEUTRAL
Stock change > 14%: Bull, < -14%:BEAR, between: NEUTRAL
Stock change > 14%: Bull, < -14%:BEAR, between: NEUTRAL
Stock change > 14%: Bull, < -14%:BEAR, between: NEUTRAL
Stock change > 14%: Bull, < -14%:BEAR, between: NEUTRAL
Stock change > 14%: Bull, < -14%:BEAR, between: NEUTRAL
Stock change > 14%: Bull, < -14%:BEAR, between: NEUTRAL
Stock change > 14%: Bull, < -14%:BEAR, between: NEUTRAL


In [147]:
res.to_csv('analysis.csv', index=False)