In [None]:
import json
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime, timedelta, timezone
import requests
import os
from time import sleep
import pytz
from scipy.stats import pearsonr



In [None]:
# LOAD DATA


def getData(start_interval=datetime(2023,5,7, tzinfo=pytz.UTC), end_interval=datetime.now(tz=pytz.UTC), filter_position=(0,50)):
    path_most_traded_json = "/home/alberto/Docker/Trading/tracker/json/most_traded_coins.json"
    f = open(path_most_traded_json, "r")
    most_traded_coin = json.loads(f.read())
    start_coin_list = filter_position[0]
    end_coin_list = filter_position[1]
    most_traded_coin_list = most_traded_coin['most_traded_coins'][start_coin_list:end_coin_list]


    path_dir = "/home/alberto/Docker/Trading/analysis/json"
    list_json = os.listdir(path_dir)
    full_paths = [path_dir + "/{0}".format(x) for x in list_json]
    print(full_paths)

    list_json_info = []
    for full_path in full_paths:
        json_ = {'path': full_path, 'time': os.path.getmtime(full_path)}
        list_json_info.append(json_)

    list_json_info.sort(key=lambda x: x['time'], reverse=False)
    list_json_info

    data= {}
    for json_info in list_json_info:
        sleep(1)
        path = json_info['path']
        print(f'Retrieving data from {path}')
        f = open(path, "r")
        
        temp_data_dict = json.loads(f.read())
        
        for coin in temp_data_dict['data']:
            if coin in most_traded_coin_list:
                if coin not in data:
                    data[coin] = []
                for obs in temp_data_dict['data'][coin]:
                    if datetime.fromisoformat(obs['_id']) >= start_interval and datetime.fromisoformat(obs['_id']) <= end_interval:
                        data[coin].append(obs)
        del temp_data_dict

    return data
                    



In [None]:
start_interval=datetime(2023,5,7)
end_interval=datetime.now()
filter_position = {
    'first': (0,100),
    'second': (50,100),
    'third': (100,150),
    'fourth': (150,200)
}
filter_position=(0,100)


data = getData(start_interval=start_interval, end_interval=end_interval, filter_position=filter_position)

In [None]:
data['ETHUSDT'][0]['_id']

In [None]:
len(data['AVAXUSDT'])

In [None]:
def check_correlation(data, field_volume, field_price, coin = None, limit_volume=3):
    '''
    This function checks the correlation between 2 fields chosen
    '''

    list1 = []
    list2 = []

    # get the volume field based on the timeframe (e.g. 5m, 15m, ..., 6h)
    timeframe = field_volume.split('_')[-1]
    volume_field = 'vol_' + timeframe
    minute_or_hours = timeframe[-1]
    if minute_or_hours == 'm':
        jump = int(timeframe[:-1])
    else:
        jump = int(timeframe[:-1]) * 60
    
    print(jump)

    # analyze one coin if speicified
    if coin is not None:
        for obs in data[coin]:
            
            if obs[field_volume] != None and obs[field_price] != None:
                list1.append(obs[field_volume])
                list2.append(obs[field_price])
        
        correlation, p_value = pearsonr(list1, list2)

        print("Correlation:", correlation)
        print("P-value:", p_value)

    # analyze all coins in data
    else:
        correlations = []
        pvalues = []
        n_coins = 0
        for coin in list(data.keys()):
            del list1
            del list2

            list1 = []
            list2 = []

            # for obs_vol,obs_price in zip(data[coin][:-60], data[coin][60:]):
            #     if obs_vol[field_volume] != None and obs_price[field_price] != None and obs_vol[volume_field] >= limit_volume:
            
            #         list1.append(obs_vol[field_volume])
            #         list2.append(obs_price[field_price])

            for obs in data[coin]:
                if obs[field_volume] != None and obs[field_price] != None and obs[volume_field] >= limit_volume:
                    list1.append(obs[field_volume])
                    list2.append(obs[field_price])
                
            if len(list1) > 3:
                n_coins += 1
                correlation, p_value = pearsonr(list1, list2)

                # get correlation and pvalue
                if correlation != None and p_value != None and isinstance(correlation, np.float64):
                    correlations.append(correlation)
                    pvalues.append(p_value)
                else:
                    type_ = type(correlation)
                    print(f'{coin}: pvalue={p_value}, correlation={correlation}, type={type_}')
                    
            # else:
            #     print(coin)
        
        # get the average correlation for all the analyzed coins
        print(len(correlations))
        max_corr = max(correlations)
        min_corr = min(correlations)
        std_dev_corr = np.std(correlations)
        print(f' {n_coins} have been analyzed')
        print(f'Max corr: {max_corr}')
        print(f'Min corr: {min_corr}')
        print(f'Std corr: {std_dev_corr}')

        print("Correlation:", np.mean(correlations))
        print("P-value:", np.mean(pvalues))

        return correlations, pvalues

coin = None
correlations,pvalues = check_correlation(data, 'buy_vol_60m', 'price_%_1d', limit_volume=2)
# print(correlations)
# print(pvalues)

In [None]:
def analyze_events(data, buy_vol_field, vol_field, minutes_price_windows, event_buy_volume, event_volume):
    '''
    This function analyzes what happens in terms of price changes after certain events

    data: it is the dataset
    buy_vol_field: it is the key that determines which buy_vol in terms of timeframe (5m, 15m, 30m, 60m, ..., 1d) --> STRING (e.g. buy_vol_5m)
    vol_field: it is the key that determines which vol in terms of timeframe (5m, 15m, 30m, 60m, ..., 1d) --> STRING (e.g. vol_5m)
    minutes_price_windows: time window in terms of minutes. how many minutes into the future I want the check the price changes? --> INTEGER (e.g. 60)
    event_buy_volume: it is the value of "buy_vol_field". --> FLOAT (e.g. 0.6) MUST BE BETWEEN 0 and 1
    event_volume: it is the value of "buy_vol_field". --> FLOAT (e.g. 2.0)
    '''
    price_changes = {}
    events = 0
    tot_n_coins = len(list(data.keys()))
    # analyze for each coin
    for coin in list(data.keys()):
        # initialize limit_window
        limit_window = datetime(2000,1,1)

        # get initial price of the coin
        initial_price = data[coin][0]['price']
        price_changes[coin] = []
        # check through each observation of the coin
        for obs, index in zip(data[coin], range(len(data[coin]))):
            # this datetime_obs is needed to not trigger too many events. For example two closed events will overlap each other, this is not ideal
            datetime_obs = datetime.fromisoformat(obs['_id'])
            if obs[buy_vol_field] is not None:
                # if event is triggered
                if obs[buy_vol_field] >= event_buy_volume and obs[vol_field] > event_volume and datetime_obs > limit_window:
                    events += 1
                    limit_window  = datetime_obs + timedelta(minutes=minutes_price_windows)
                    # get all the price changes in the "minutes_price_windows"
                    for obs_price in data[coin][index:index+minutes_price_windows]:
                        # if not isinstance(type(obs_price['price']), float):
                        #     print(type(obs_price['price']))
                        change = (obs_price['price'] - initial_price)/initial_price
                        if not np.isnan(change):
                            price_changes[coin].append(change)
                    #price_changes[coin] = np.mean(price_changes[coin])

    #print(price_changes['DOGEUSDT'])
    total_changes = []
    summary = {}
    coins = 0
    for coin in price_changes:
        if len(price_changes[coin]) > 0:
            coins += 1
        price_changes[coin] = np.mean(price_changes[coin])
        if not np.isnan(price_changes[coin]):
            total_changes.append(price_changes[coin])

    mean_total_changes = np.mean(total_changes)

    print(f'Analysis for {buy_vol_field} and {vol_field}')
    print(f'There were {events} events')
    print(f'{coins}/{tot_n_coins} have been analyzed from {start_interval} to {end_interval}')
    print(f'The average of the changes in the time window is {mean_total_changes}')


    #return price_changes, mean_total_changes


analyze_events(data, buy_vol_field='buy_vol_30m', vol_field='vol_30m', minutes_price_windows=60
                                                   , event_buy_volume=0.6, event_volume=2)



In [None]:
#['_id',
#'price', price_%_1d','price_%_6h','price_%_3h','price_%_1h',
#'vol_1m', 'buy_vol_1m', buy_trd_1m
#'vol_5m','vol_5m_std','buy_vol_5m','buy_trd_5m'
#'vol_15m','vol_15m_std','buy_vol_15m','buy_trd_15m',
#'vol_30m','vol_30m_std','buy_vol_30m','buy_trd_30m',
#'vol_60m','vol_60m_std','buy_vol_60m','buy_trd_60m',
#'vol_24h','vol_24h_std','buy_vol_24h','buy_trd_24h',]

In [None]:
def search_focus_points(data, most_traded_coins,  days_start=4, days_finish=3, volume_limit=0.8, buy_volume_limit=0.55, referenceTime_volume='vol_5m',
                         referenceTime_Buyvolume='buy_vol_5m', filter_position=50):
    '''
    This function looks for all those points in time for each coin that have interesting events.
    '''

    most_traded_coins = most_traded_coins[:filter_position]

    minutes_start = int(days_start*24*60)
    if days_finish:
        minutes_end = int(days_finish*24*60)
    else:
        minutes_end = None

    events = {}
    for coin in data:
        if coin in most_traded_coins:
            i = 0
            for obs in data[coin][-minutes_start:minutes_end]:
                try:
                    i += 1
                    if obs[referenceTime_volume] and obs[referenceTime_volume] >= volume_limit and obs[referenceTime_Buyvolume] >= buy_volume_limit:
                        if coin not in events:
                            events[coin] = []
                        obs['position'] = most_traded_coins.index(coin)
                        obs['timeseries_position'] = i
                        events[coin].append(obs)
                except:
                    pass


    
    n_coins = len(events)
    print(f'Total Coins: {n_coins}')
    for coin in events:
        len_events_coin = len(events[coin])
        position = events[coin][0]['position']
        datetime_first_event = events[coin][0]['_id']
        print(f'{len_events_coin} events for {coin} in position {position}')
        print(f'First event at {datetime_first_event}')


    # minutes past from which evaluate price changes
    reference_time = [5, 10, 15, 20, 25, 30, 40, 50, 60, 90, 120, 240, 480, 720, 1440, 2880, 5760, 10080]
    reference_time = [5, 10, 15, 20, 25, 30, 40, 50, 60, 90, 120, 240, 480, 720, 1440, 2880, 5760]
    reference_time = [5, 10, 15, 20, 25, 30, 40, 50, 60, 90, 120, 240, 480, 720, 1440]
    #reference_time = [5]

    results = {}
    for coin in events:
        for event in events[coin]:
            initial_price = event['price']
            iso_timestamp_start = event['_id']
            datetime_initial_price = datetime.fromisoformat(iso_timestamp_start)
            for minutes in reference_time:
                estimated_iso_timestamp_end = (datetime_initial_price + timedelta(minutes=minutes)).isoformat()
                position_timeseries = event['timeseries_position']
                timeseries = data[coin][-minutes_start:minutes_end]
                final_price = timeseries[position_timeseries]['price']
                real_iso_timestamp_end = timeseries[position_timeseries+minutes]['_id']
                print(f'Estimated ISO timestamp: {estimated_iso_timestamp_end}')
                print(f'Real ISO timestamp: {real_iso_timestamp_end}')
    
                
    # TODO: CHECK IF THE COIN IS CONSISTENT IN BENCHMARK
        

        
    return events


events = search_focus_points(data, most_traded_coins,  days_start=2, days_finish=1, volume_limit=6, buy_volume_limit=0.55, referenceTime_volume='vol_5m',
                         referenceTime_Buyvolume='buy_vol_5m', filter_position=50)



In [None]:
events['IOSTUSDT']