In [None]:
# import necessry libraries
import sys
import os
import msgpack
import zlib
import numpy as np
import datetime
import matplotlib.pyplot as plt
import matplotlib.dates as md
from pathlib import Path

In [None]:
# import files from src
#sys.path.insert(0,"Desktop/Fintech_Bootcamp/Project_2/Project-2/src")



In [None]:
import analysis_helper as ah
import datetime_helper as dh

In [None]:
# define the location of the input file
filename_augmento_topics = Path("../Project-2/Project_2/augmento_topics.msgpack.zlib")
filename_augmento_data = Path("../Project-2/Project_2/augmento_data.msgpack.zlib")
filename_eth_data = Path("../Project-2/Project_2/eth_data.msgpack.zlib")

In [None]:
isExist = os.path.exists(filename_augmento_topics)

print(isExist)

In [None]:
def strip_data_by_time(t_data, data, t_min, t_max):
	data = np.array([s for s, t in zip(data, t_data) if t >= t_min and t <= t_max])
	t_data = np.array([t for t in t_data if t >= t_min and t <= t_max])
	return t_data, data

In [None]:
def load_example_data(filename_augmento_topics,
                      filename_augmento_data,
                      filename_eth_data,
                      datetime_start=None,
                      datetime_end=None):

	# load the topics
	with open(filename_augmento_topics, "rb") as f:
		temp = msgpack.unpackb(zlib.decompress(f.read()))
		augmento_topics = {int(k) : v for k, v in temp.items()}
		augmento_topics_inv = {v : int(k) for k, v in temp.items()}
	
	# load the augmento data
	with open(filename_augmento_data, "rb") as f:
		temp = msgpack.unpackb(zlib.decompress(f.read()))
		t_aug_data = np.array([el["t_epoch"] for el in temp], dtype=np.float64)
		aug_data = np.array([el["counts"] for el in temp], dtype=np.int32)
	
	# load the price data
	with open(filename_eth_data, "rb") as f:
		temp = msgpack.unpackb(zlib.decompress(f.read()))
		t_price_data = np.array(['Date'in temp], dtype=np.float64)
		#price_data = np.array([el["open"] for el in temp], dtype=np.float64)
		price_data = np.array(['Close' in temp], dtype=np.float64)
	
	# set the start and end times if they are specified
	if datetime_start != None:
		t_start = dh.datetime_to_epoch(datetime_start)
	else:
		t_start = max(np.min(t_aug_data), np.min(t_price_data))
	
	if datetime_end != None:
		t_end = dh.datetime_to_epoch(datetime_end)
	else:
		t_end = min(np.max(t_aug_data), np.max(t_price_data))
	
	# strip the sentiments and prices outside the shared time range
	t_aug_data, aug_data = strip_data_by_time(t_aug_data, aug_data, t_start, t_end)
	t_price_data, price_data = strip_data_by_time(t_price_data, price_data, t_start, t_end)
	
	return augmento_topics, augmento_topics_inv, t_aug_data, aug_data, t_price_data, price_data

In [None]:
# load the example data
all_data = load_example_data(filename_augmento_topics,
                             filename_augmento_data,
                             filename_eth_data)
aug_topics, aug_topics_inv, t_aug_data, aug_data, t_price_data, price_data = all_data
all_topics = aug_data.T.astype(float)



In [None]:
# Pick Topics
aug_signal_a = aug_data[aug_topics_inv["Bullish"]].astype(np.float64)
aug_signal_b = aug_data[aug_topics_inv["Bearish"]].astype(np.float64)

In [None]:
# calculate PNL for a given strategy
# if sentiment positive go long, else go short
# fees are assumed to be 0.75% (taker fee from BITMEX)

def strategy(price_data, signal_a, signal_b, window_1 = 24 * 7, window_2 = 24*7,buy_sell_fee = 0.0075, pnl_0 = 1.0):    
    sent_score = ah.nb_calc_sentiment_score_a(signal_a,signal_b,window_1,window_2)
    pnl = ah.nb_backtest_a(price_data, sent_score, 1.0, buy_sell_fee)
    return pnl

# PNL of various moving window size for a given combination of topics
def window_combination(price_data,top_a,top_b,end_day_x,end_day_y,start_day_x=0,start_day_y=0,buy_sell_fee=0.0075):
    total_comb = np.zeros(shape=(end_day_x,end_day_y))
    print("Calculating...")
    for i in range(start_day_x,end_day_x):
        for j in range(start_day_y,end_day_y):
            total_comb[i][j] = strategy(price_data,top_a,top_b,window_1=24*(i+1),window_2=24*(j+1),buy_sell_fee = 0.0075)[-1]
    print("Done.")
    return total_comb[start_day_x:end_day_x,start_day_y:end_day_y]