Assuming that we have sucessfully download the coin data and the historical pumps. 
We can now retrieve the appropriate data using timestamps for analysis


In [20]:
import os, sys
import pandas as pd 
import numpy as np
import requests 
from datetime import datetime, date, time, timedelta
import pytz
import matplotlib.pyplot as plt
import matplotlib.dates as mdate
from mpl_finance import candlestick_ohlc
import plotly.graph_objects as go
import plotly.express as px
import matplotlib.ticker as mticker

In [21]:
sys.path.append(os.path.abspath(os.path.join('../..')))
from utils import toUnixTimestamp

In [22]:
def getPumpHour(x: datetime):
    '''Only extract the next perfect hour. If the minute falls into the later half of the hour
    ,increase the hour by 1. 
    '''
    t = datetime(x.year,x.month,x.day,x.hour)
    if x.minute >= 30:
        t = t + timedelta(hours=1)
    return t

In [23]:
def getPumpHourTimestamp_old(x: datetime, t:str):
    '''
        Prevent automatic convesrion to Local Timezone by datetime
        :param x: datetime variable of the desired frame
        :param t: string representation of UNIX timestamp of x
    '''
    t = int(t)
    if x.minute < 30:
        return t -(t%3600)
    else:
        return t -(t%3600) + 3600

In [24]:
def getPriorTimestamp(x:datetime, t:str, h:int):
    '''
        Get the timestamp - h hours. 
        :param x: datetime of the desired chronological point
        :param h: integer, how many hours to go back in time
        :param t: integer, time stamp of the exact hour (0 minutes and 0 seconds)
    '''
    t = int(t)
#     t = t - (t%3600)
    if x.minute < 30:
        t =  t -(t%3600)
    else:
        t = t -(t%3600) + 3600
    #Back off depending on how many hours 
    if h == 0:
        return t
    return t - h*3600

In [25]:
def getPriorDaystamp(x:datetime, d:int=1):
    '''
        Get the timestamp - d days at 0 am GMT/UTC of the x-d days. 
        :param d: integer, time stamp of the exact hour (0 minutes and 0 seconds)
    '''
    t = x - timedelta(days=d,hours=x.hour,seconds=x.second + 60*x.minute)
    t = int(datetime.timestamp(t))
    return t

In [27]:
p = pd.read_pickle('../all_pumps.pkl')
p = p.reset_index(drop=True)

In [28]:
p.shape

(60375, 7)

In [29]:
p[10:15]

Unnamed: 0,currency,exchange,pumptime_edited,coin_name,timestamp,event_date,event_time
10,NAV,Binance,2019-12-19 17:00:15+00:00,LINK,1576774815,2019-12-19,17:00:15
11,NAV,Binance,2019-12-19 17:00:15+00:00,DNT,1576774815,2019-12-19,17:00:15
12,NAV,Binance,2019-12-19 17:00:15+00:00,QKC,1576774815,2019-12-19,17:00:15
13,NAV,Binance,2019-12-19 17:00:15+00:00,THETA,1576774815,2019-12-19,17:00:15
14,NAV,Binance,2019-12-19 17:00:15+00:00,CDT,1576774815,2019-12-19,17:00:15


In [30]:
x = p.pumptime_edited[12]
getPriorDaystamp(x,2)

1576540800

## Define Merging Functions

In [40]:
def getOHLCVfor_X_Hours(currency:str, exchange: str, pump:pd.DataFrame, h_back: int = 7):
    pump_df = pump.copy()
    try:
        filename = '../../Coin_Data/Hourly/Hourly_'+currency+'_at_'+exchange+'.pkl'
        print("---Looking for ",filename)
        ohlcv = pd.read_pickle(filename)
        base_append = ['open','high','low','close','volumeto']
        back_list = []
        
        for h in range(h_back+1): 
            hour_var = 'pump_hour_'+str(h)
            back_list.append(hour_var)
            
            #Make a copy of the ohlcv with new variable names
            append_name = ['time'] + [name+'_'+str(h) for name in base_append]
            ohlcv_h = ohlcv[['time']+base_append]
            ohlcv_h.columns = append_name
            #print(ohlcv_h.columns.values)
            pump_df[hour_var] = pump_df[['pumptime_edited','timestamp']].apply(func=lambda x: getPriorTimestamp(x[0],x[1],h),axis=1)
            
            #Perform the merge to get data
            pump = pd.merge(left=pump_df,left_on=hour_var,right=ohlcv_h,right_on='time',how='left')
            pump = pump.drop('time',axis=1)
            pump_df = pump.reset_index(drop=True)
            
        return pump_df
    except FileNotFoundError:
        print('No HOURLY data found on coin!')
        return {'currency':currency,'exchange':exchange}

In [45]:
def getOHLCVfor_X_Days(currency:str, exchange: str, pump:pd.DataFrame, d_back: int = 2):
    pump_df = pump.copy()
    try:
        filename = '../../Coin_Data/Daily/Daily_'+currency+'_at_'+exchange+'.pkl'
        print("---Looking for ",filename)
        ohlcv = pd.read_pickle(filename)
        base_append = ['open','high','low','close','volumeto']
        back_list = []
        
        #We only want the info the days BEFORE the pump, not the day it happened
        for d in range(1,d_back+1): 
            day_var = 'pump_day_'+str(d)
            back_list.append(day_var)
            
            #Make a copy of the ohlcv with new variable names
            append_name = ['time'] + [name+'_day_'+str(d) for name in base_append]
            ohlcv_d = ohlcv[['time']+base_append]
            ohlcv_d.columns = append_name
            #print(ohlcv_h.columns.values)
            pump_df[day_var] = pump_df['pumptime_edited'].apply(func=lambda x: getPriorDaystamp(x,d))
            
            #Perform the merge to get data
            pump = pd.merge(left=pump_df,left_on=day_var,right=ohlcv_d,right_on='time',how='left')
            pump = pump.drop('time',axis=1)
            pump_df = pump.reset_index(drop=True)
            
        return pump_df
    except FileNotFoundError:
        print('No DAILY data found on coin!')
        return {'currency':currency,'exchange':exchange}

### Test Individual Coins

In [42]:
pump = p.loc[(p.currency == 'AST') & (p.exchange == 'Binance')]

In [44]:
merge_data = getOHLCVfor_X_Hours('AST','Binance',pump,6)
merge_data.head(2)

---Looking for  ../../Coin_Data/Hourly/Hourly_AST_at_Binance.pkl


Unnamed: 0,currency,exchange,pumptime_edited,coin_name,timestamp,event_date,event_time,pump_hour_0,open_0,high_0,...,high_5,low_5,close_5,volumeto_5,pump_hour_6,open_6,high_6,low_6,close_6,volumeto_6
0,AST,Binance,2019-10-04 18:02:03+00:00,VIB,1570212123,2019-10-04,18:02:03,1570212000,3e-06,3e-06,...,3e-06,3e-06,3e-06,1.171,1570190400,3e-06,3e-06,3e-06,3e-06,0.6295
1,AST,Binance,2019-10-04 18:02:03+00:00,STEEM,1570212123,2019-10-04,18:02:03,1570212000,3e-06,3e-06,...,3e-06,3e-06,3e-06,1.171,1570190400,3e-06,3e-06,3e-06,3e-06,0.6295


In [46]:
merge_data = getOHLCVfor_X_Days('AST','Binance',merge_data,7)

---Looking for  ../../Coin_Data/Daily/Daily_AST_at_Binance.pkl


In [16]:
coin_list = pd.read_pickle('../Coin_Data/coin_list.pkl')

In [17]:
coin_list.head(2)

Unnamed: 0,currency,exchange,Id
0,KMD,Binance,26132
1,WAN,Binance,240142


In [18]:
coin_list = coin_list[['currency','exchange']].drop_duplicates()
# #Only keep the rows where ID is available 
# ind = coin_list.Id[coin_list.Id.isna()].index
# x = coin_list.drop(index=ind)
# type(x.iloc[23].Id)

In [19]:
coin_error_hour = []
coin_error_day  = []
for index,row in coin_list.iterrows():
    currency = row['currency']
    exchange = row['exchange']
    temp_2 = None
    #Must select only the rows with the right currency 
    pump = p.loc[(p.currency == currency) & (p.exchange == exchange)]
    temp = getOHLCVfor_X_Hours(currency,exchange,pump,7)
    if isinstance(temp, pd.DataFrame) : 
        temp_2 = getOHLCVfor_X_Days(currency,exchange,temp,7)
        merge_data = pd.concat([merge_data,temp_2],axis=0,sort=False)
    else:
        #Append daily data only when the coin is available
        coin_error_hour.append(temp)
           

---Looking for  ../Coin_Data/Hourly_KMD_at_Binance.pkl
---Looking for  ../Coin_Data/Daily_KMD_at_Binance.pkl
---Looking for  ../Coin_Data/Hourly_WAN_at_Binance.pkl
---Looking for  ../Coin_Data/Daily_WAN_at_Binance.pkl
---Looking for  ../Coin_Data/Hourly_QSP_at_Binance.pkl
---Looking for  ../Coin_Data/Daily_QSP_at_Binance.pkl
---Looking for  ../Coin_Data/Hourly_GVT_at_Yobit.pkl
---Looking for  ../Coin_Data/Daily_GVT_at_Yobit.pkl
---Looking for  ../Coin_Data/Hourly_POWR_at_Binance.pkl
---Looking for  ../Coin_Data/Daily_POWR_at_Binance.pkl
---Looking for  ../Coin_Data/Hourly_OST_at_Binance.pkl
---Looking for  ../Coin_Data/Daily_OST_at_Binance.pkl
---Looking for  ../Coin_Data/Hourly_SNT_at_Binance.pkl
---Looking for  ../Coin_Data/Daily_SNT_at_Binance.pkl
---Looking for  ../Coin_Data/Hourly_BTS_at_Binance.pkl
---Looking for  ../Coin_Data/Daily_BTS_at_Binance.pkl
---Looking for  ../Coin_Data/Hourly_VIB_at_Binance.pkl
---Looking for  ../Coin_Data/Daily_VIB_at_Binance.pkl
---Looking for  ../Co

---Looking for  ../Coin_Data/Hourly_BRD_at_Binance.pkl
---Looking for  ../Coin_Data/Daily_BRD_at_Binance.pkl
---Looking for  ../Coin_Data/Hourly_LEND_at_Binance.pkl
---Looking for  ../Coin_Data/Daily_LEND_at_Binance.pkl
---Looking for  ../Coin_Data/Hourly_AMB_at_Binance.pkl
---Looking for  ../Coin_Data/Daily_AMB_at_Binance.pkl
---Looking for  ../Coin_Data/Hourly_AST_at_Binance.pkl
---Looking for  ../Coin_Data/Daily_AST_at_Binance.pkl
---Looking for  ../Coin_Data/Hourly_IOC_at_Bittrex.pkl
---Looking for  ../Coin_Data/Daily_IOC_at_Bittrex.pkl
---Looking for  ../Coin_Data/Hourly_ARDR_at_Binance.pkl
---Looking for  ../Coin_Data/Daily_ARDR_at_Binance.pkl
---Looking for  ../Coin_Data/Hourly_STORJ_at_Binance.pkl
---Looking for  ../Coin_Data/Daily_STORJ_at_Binance.pkl
---Looking for  ../Coin_Data/Hourly_TNB_at_Binance.pkl
---Looking for  ../Coin_Data/Daily_TNB_at_Binance.pkl
---Looking for  ../Coin_Data/Hourly_PIVX_at_Binance.pkl
---Looking for  ../Coin_Data/Daily_PIVX_at_Binance.pkl
---Lookin

---Looking for  ../Coin_Data/Hourly_WTC_at_Binance.pkl
---Looking for  ../Coin_Data/Daily_WTC_at_Binance.pkl
---Looking for  ../Coin_Data/Hourly_WINGS_at_Binance.pkl
---Looking for  ../Coin_Data/Daily_WINGS_at_Binance.pkl
---Looking for  ../Coin_Data/Hourly_TNT_at_Yobit.pkl
---Looking for  ../Coin_Data/Daily_TNT_at_Yobit.pkl
---Looking for  ../Coin_Data/Hourly_ZIP_at_Yobit.pkl
---Looking for  ../Coin_Data/Daily_ZIP_at_Yobit.pkl
---Looking for  ../Coin_Data/Hourly_FTEC_at_Yobit.pkl
No HOURLY data found on coin!
---Looking for  ../Coin_Data/Hourly_VIA_at_Yobit.pkl
---Looking for  ../Coin_Data/Daily_VIA_at_Yobit.pkl
---Looking for  ../Coin_Data/Hourly_TDH_at_Yobit.pkl
No HOURLY data found on coin!
---Looking for  ../Coin_Data/Hourly_OK_at_Yobit.pkl
---Looking for  ../Coin_Data/Daily_OK_at_Yobit.pkl
---Looking for  ../Coin_Data/Hourly_BSV_at_Bittrex.pkl
---Looking for  ../Coin_Data/Daily_BSV_at_Bittrex.pkl
---Looking for  ../Coin_Data/Hourly_BAT_at_Bittrex.pkl
---Looking for  ../Coin_Data/

---Looking for  ../Coin_Data/Daily_KRL_at_Yobit.pkl


In [20]:
merge_data = merge_data.drop_duplicates()
merge_data = merge_data.reset_index(drop=True)
merge_data.shape

(437, 103)

In [21]:
merge_data[merge_data.open_0.isna()].shape

(10, 103)

In [22]:
len(coin_error_hour)

59

In [23]:
is_na_indices = merge_data[merge_data.isna().any(axis=1)].index

In [24]:
pump_data = merge_data.drop(index=is_na_indices)

In [25]:
pump_data.columns.values

array(['channelLink', 'channelTitle', 'currency', 'duration', 'exchange',
       'priceBeforePump', 'signalTime', 'pumptime_edited', 'timestamp',
       'pump_weekday', 'pump_date', 'pump_hour', 'pump_minute',
       'pump_hour_0', 'open_0', 'high_0', 'low_0', 'close_0',
       'volumeto_0', 'pump_hour_1', 'open_1', 'high_1', 'low_1',
       'close_1', 'volumeto_1', 'pump_hour_2', 'open_2', 'high_2',
       'low_2', 'close_2', 'volumeto_2', 'pump_hour_3', 'open_3',
       'high_3', 'low_3', 'close_3', 'volumeto_3', 'pump_hour_4',
       'open_4', 'high_4', 'low_4', 'close_4', 'volumeto_4',
       'pump_hour_5', 'open_5', 'high_5', 'low_5', 'close_5',
       'volumeto_5', 'pump_hour_6', 'open_6', 'high_6', 'low_6',
       'close_6', 'volumeto_6', 'pump_day_1', 'open_day_1', 'high_day_1',
       'low_day_1', 'close_day_1', 'volumeto_day_1', 'pump_day_2',
       'open_day_2', 'high_day_2', 'low_day_2', 'close_day_2',
       'volumeto_day_2', 'pump_day_3', 'open_day_3', 'high_day_3',
     

## The new data set should have all Available Value

In [26]:
pump_data.isna().any().any()

False

## MANUALLY VERIFY SOME RANDOM COINS AND COMPARE DATA

In [27]:
pump_data[pump_data.index == 151][['currency','exchange','signalTime','pumptime_edited','timestamp','pump_hour_0','pump_hour_1']]

Unnamed: 0,currency,exchange,signalTime,pumptime_edited,timestamp,pump_hour_0,pump_hour_1
151,REQ,Binance,2019-07-13T08:48:44Z,2019-07-13 08:48:44+00:00,1563007724,1563008400,1563004800


In [34]:
test = pump_data.iloc[210:215].sort_values('pumptime_edited')
test[['currency','exchange','signalTime','pumptime_edited','timestamp','pump_hour_0','pump_hour_1']]

Unnamed: 0,currency,exchange,signalTime,pumptime_edited,timestamp,pump_hour_0,pump_hour_1
218,LEND,Yobit,2019-03-08T19:00:11,2019-03-08 19:00:11+00:00,1552071611,1552071600,1552068000
217,LEND,Yobit,2019-03-17T19:00:12.645,2019-03-17 19:00:12+00:00,1552849212,1552849200,1552845600
216,LEND,Yobit,2019-04-09T18:00:01.944,2019-04-09 18:00:01+00:00,1554832801,1554832800,1554829200
215,LEND,Yobit,2019-06-15T17:00:11.522,2019-06-15 17:00:11+00:00,1560618011,1560618000,1560614400
214,LEND,Yobit,2019-06-18T17:00:00,2019-06-18 17:00:00+00:00,1560877200,1560877200,1560873600


## TEST DAILY DATA

In [73]:
x1 = pd.read_pickle('../Coin_Data/Daily_LEND_at_Yobit.pkl')

In [74]:
x1 = x1[x1.date_utc >= datetime.strptime('2019/03/10 00:00:00+0000',"%Y/%m/%d %H:%M:%S%z")]
x1 = x1[x1.date_utc <= datetime.strptime('2019/03/18 00:00:00+0000',"%Y/%m/%d %H:%M:%S%z")]
x1.sort_values('date_utc',ascending=False)

Unnamed: 0,close,high,low,open,time,volumefrom,volumeto,exchange,date,date_utc
34,2e-06,2e-06,2e-06,2e-06,1552867200,437.6,0.001033,Yobit,2019-03-17 17:00:00,2019-03-18 00:00:00+00:00
33,2e-06,2e-06,2e-06,2e-06,1552780800,0.0,0.0,Yobit,2019-03-16 17:00:00,2019-03-17 00:00:00+00:00
32,2e-06,2e-06,2e-06,2e-06,1552694400,0.0,0.0,Yobit,2019-03-15 17:00:00,2019-03-16 00:00:00+00:00
31,2e-06,2e-06,2e-06,2e-06,1552608000,0.0,0.0,Yobit,2019-03-14 17:00:00,2019-03-15 00:00:00+00:00
30,2e-06,3e-06,2e-06,3e-06,1552521600,175.63,0.000302,Yobit,2019-03-13 17:00:00,2019-03-14 00:00:00+00:00
29,3e-06,3e-06,3e-06,3e-06,1552435200,0.0,0.0,Yobit,2019-03-12 17:00:00,2019-03-13 00:00:00+00:00
28,3e-06,3e-06,3e-06,3e-06,1552348800,0.0,0.0,Yobit,2019-03-11 17:00:00,2019-03-12 00:00:00+00:00
27,3e-06,3e-06,2e-06,2e-06,1552262400,1643.86,0.004025,Yobit,2019-03-10 17:00:00,2019-03-11 00:00:00+00:00
26,2e-06,2e-06,2e-06,2e-06,1552176000,413.64,0.000889,Yobit,2019-03-09 16:00:00,2019-03-10 00:00:00+00:00


## If volume matches, then the rest should matches too

In [72]:
test.filter(regex='volumeto_day_*')[test.index == 217]

Unnamed: 0,volumeto_day_1,volumeto_day_2,volumeto_day_3,volumeto_day_4,volumeto_day_5,volumeto_day_6,volumeto_day_7
217,0.0,0.0,0.000302,0.0,0.0,0.004025,0.000889


###... Random Test Notice that in BitCoin everything is the same, but converting to Satoshi (1E-8 of a Bitcoin) makes a difference

In [44]:
x1[x1.index==50]['close'] *1E8

Series([], Name: close, dtype: float64)

In [45]:
x1[x1.index==51]['close'] *1E8

Series([], Name: close, dtype: float64)

# TEST HOURLY DATA

In [65]:
y1 = pd.read_pickle('../Coin_Data/Hourly_LEND_at_Yobit.pkl')

In [67]:
y1 = y1[y1.date_utc >= datetime.strptime('2019/03/08 11:00:00+0000',"%Y/%m/%d %H:%M:%S%z")]
y1 = y1[y1.date_utc <= datetime.strptime('2019/03/08 21:00:00+0000',"%Y/%m/%d %H:%M:%S%z")]
y1.sort_values('date_utc',ascending=False)

Unnamed: 0,close,high,low,open,time,volumefrom,volumeto,exchange,date,date_utc
1797,2e-06,2e-06,2e-06,2e-06,1552078800,0.0,0.0,Yobit,2019-03-08 13:00:00,2019-03-08 21:00:00+00:00
1796,2e-06,2e-06,2e-06,2e-06,1552075200,4271.65,0.009611,Yobit,2019-03-08 12:00:00,2019-03-08 20:00:00+00:00
1795,2e-06,2e-06,2e-06,2e-06,1552071600,0.0,0.0,Yobit,2019-03-08 11:00:00,2019-03-08 19:00:00+00:00
1794,2e-06,2e-06,2e-06,2e-06,1552068000,0.0,0.0,Yobit,2019-03-08 10:00:00,2019-03-08 18:00:00+00:00
1793,2e-06,2e-06,2e-06,2e-06,1552064400,0.0,0.0,Yobit,2019-03-08 09:00:00,2019-03-08 17:00:00+00:00
1792,2e-06,2e-06,2e-06,2e-06,1552060800,0.0,0.0,Yobit,2019-03-08 08:00:00,2019-03-08 16:00:00+00:00
1791,2e-06,2e-06,2e-06,2e-06,1552057200,0.0,0.0,Yobit,2019-03-08 07:00:00,2019-03-08 15:00:00+00:00
1790,2e-06,2e-06,2e-06,2e-06,1552053600,0.0,0.0,Yobit,2019-03-08 06:00:00,2019-03-08 14:00:00+00:00
1789,2e-06,2e-06,2e-06,2e-06,1552050000,0.0,0.0,Yobit,2019-03-08 05:00:00,2019-03-08 13:00:00+00:00
1788,2e-06,2e-06,2e-06,2e-06,1552046400,0.0,0.0,Yobit,2019-03-08 04:00:00,2019-03-08 12:00:00+00:00


In [69]:
test.filter(regex='volumeto_[0-9]')[test.index == 218]

Unnamed: 0,volumeto_0,volumeto_1,volumeto_2,volumeto_3,volumeto_4,volumeto_5,volumeto_6,volumeto_7
218,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


...Things Look Good...__PUMP_DATA__ is the file to use going forward...

In [75]:
pump_data.to_pickle('pump_data.pkl')