This module: 
1. Tests the pumped_coin to test the Download social functions. 
2. Move them to the uitls functions bank 
3. Download them

In [14]:
import os, sys
import pandas as pd 
import numpy as np
import requests 
from datetime import datetime, date, time, timedelta
import pytz
import matplotlib.pyplot as plt
import matplotlib.dates as mdate
from mpl_finance import candlestick_ohlc
import plotly.graph_objects as go
import plotly.express as px
import matplotlib.ticker as mticker
%load_ext autoreload
%matplotlib inline
import sys, os

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [15]:
#To add the super-parent directory
sys.path.append(os.path.abspath(os.path.join('../..' )))
from utils import *

Get all the list of unique coins along with their ID for prepartion...

In [16]:
coinlist = pd.read_pickle('../../Coin_Data/coin_list.pkl')[['currency','Id']]
#We only select those with valid ID; 
coinlist = coinlist.drop_duplicates().dropna()
coinlist.head()

Unnamed: 0,currency,Id
0,KMD,26132
1,WAN,240142
2,QSP,397757
3,GVT,385952
4,POWR,339617


Recycle and modify the existing Hourly Function to get Daily Data. 
The getDailySocialfromList calls on the getHistoricalDailySocial. 

In [17]:
def toUnixTimestamp(date:str, is_dst:bool=False):
    '''
    Convert the day in format %Y%M%D into UNIX timestamp at UTC/GMT time.
    :param date: date in YYYYMMDD format
    :param is_dst: whether Daylight Saving Time is on. Default: False
    '''
    print("If in US, check if DST is on and modify is_dst parameter (default=False)!")
    try:
        dt = datetime.strptime(date,"%Y%m%d")
        delta = 7 if is_dst else 8
        val = dt - timedelta(hours=delta)
        timestamp = str(int(datetime.timestamp(val)))
    except ValueError:
        raise
    return timestamp


In [18]:
def getDailySocialfromList(API_KEY:str, coin_list:pd.DataFrame, toDate:str='',toDate_is_dst=False,endDate:str='',endDate_is_dst=False):
    '''
    Use CryptoCompare API to download the HOURLY Social data according to specified parameters
    :param API_KEY: string of the personal key acqured on the site 
    :param coin_list: Panda Dataframe that cointains the coinlist and coin_id
    :param toDate: string of date in format 'YYYYMMDD' of the begining period 
    :param endDate: string of date in format  'YYYMMDD' of the end period to download
    :param is_dst: Boolen value whether each date under Daylight Saving Time
    '''
    endTs = toUnixTimestamp(endDate,endDate_is_dst)
    toTs = toUnixTimestamp(toDate,toDate_is_dst)
    error_coin = []
    
    #Loop through the coin list and download the data
    for index, row in coin_list.iterrows():
        coin = row['currency']
        coin_id = row['Id']
        print("\n-------\nCoin: ",coin)
        print("Coin_id: ",coin_id)
        
        #Sleep for 5 seconds after ever 10 coins;
        if index % 10 == 0:
            sleeper.sleep(5)
        
        if coin_id is np.NaN:
            error_coin.append((coin,coin_id))
            print("\n---!!Cannot lookup ID for coin ",coin)
        else:    
            filename = '../../Social_Data/Daily/Daily_'+coin+'_'+coin_id+'.pkl'
            print('\n---','Now downloading {0} with ID {1}'.format(coin,coin_id))
            data = getHistoricalDailySocial(API_KEY,coin_id,toTs=toTs,endTs=endTs)
            
            if type(data) is str:
                error_coin.append((coin,coin_id))
                continue
            #Preprocess the data to append the extra information
            data = convertTimeStamp(data)
            data.sort_values('time',ascending=True, inplace=True)
            data['date_utc'] = convertUTC(data,'date')

            pd.to_pickle(data,path=filename)
            print('Coin donwloaded! Saved at ' + filename)
            
            sleeper.sleep(1)
    return error_coin

In [19]:
def getHistoricalDailySocial(API_KEY:str,  coin_id:str, toTs:str, endTs: str=''
                           , limit:str='2000',verbose=True):
    '''
    Request Daily Social Signal data on the specified exchange until we hit the toTS date.
    API_KEY is required to download the data from their site.
    :param coin_id: str, given ID of coin on Cryptocompare
    :param toTs: UNIX timestamp, date to reach back in the past
    :param endTs: UNIX timestamp, date to stop on the right of interval
    '''
    coin_id='&coinId='+coin_id
   
    #If no end period (endTs) is specified, then the API defaults to the current date
    stopdate = int(toTs)
   
    if endTs != '':
        toTs = '&toTs='+endTs
    else:
        toTS = ''
    toTs='&toTS='+toTs
    limit='&limit='+limit
    
    api_key = '&api_key='+API_KEY
    
    API_REQUEST = 'https://min-api.cryptocompare.com/data/social/coin/histo/day?'
    url = API_REQUEST+coin_id+toTs+limit+api_key
    print(url)
    try: 
        temp = requests.get(url).json()['Data']
        df_temp = pd.DataFrame(temp)
        df = pd.DataFrame(temp)
        #Get the minimum timestamp/date to retrieve further back
        min_date = df_temp.time.min()
    except:
        print('Check URL format!')
        return 'Check URL format!!!'
        
    cnt=1
    while min_date > stopdate : 
        toTs = '&toTs='+str(min_date)
        url = API_REQUEST+coin_id+toTs+limit+api_key
        if verbose: 
            print('Mindate before: ',datetime.fromtimestamp(min_date))
            print('New batch: ' + url)
        
        temp = requests.get(url).json()['Data']
        df_temp = pd.DataFrame(temp)
        min_date = df_temp.time.min()
        
        if verbose: 
            print('Mindate after: ',datetime.fromtimestamp(min_date))
        #Append the dataset into the existin one 
        df = pd.concat([df, df_temp],axis=0)
        cnt += 1
        if cnt > 4:
            break
    
    return df

In [65]:
def getHistoricalHourlySocial(API_KEY:str,  coin_id:str, toTs:str, endTs: str=''
                           , limit:str='2000',verbose=True):
    '''
    Request Hourly Social Signal data on the specified exchange until we hit the toTS date.
    API_KEY is required to download the data from their site.
    '''
    coin_id='&coinId='+coin_id
   
    #If no end period (endTs) is specified, then the API defaults to the current date
    stopdate = int(toTs)
   
    if endTs != '':
        toTs = '&toTs='+endTs
    else:
        toTS = ''
    toTs='&toTS='+toTs
    limit='&limit='+limit
    
    api_key = '&api_key='+API_KEY
    
    API_REQUEST = 'https://min-api.cryptocompare.com/data/social/coin/histo/hour?'
    url = API_REQUEST+coin_id+toTs+limit+api_key
    print(url)
    try: 
        temp = requests.get(url).json()['Data']
        df_temp = pd.DataFrame(temp)
        df = pd.DataFrame(temp)
        #Get the minimum timestamp/date to retrieve further back
        min_date = df_temp.time.min()
    except:
        print('Check URL format!')
        return 'Check URL format!!!'
        
    cnt=1
    while min_date > stopdate : 
        toTs = '&toTs='+str(min_date)
        url = API_REQUEST+coin_id+toTs+limit+api_key
        if verbose: 
            print('Mindate before: ',datetime.fromtimestamp(min_date))
            print('New batch: ' + url)
        
        temp = requests.get(url).json()['Data']
        df_temp = pd.DataFrame(temp)
        min_date = df_temp.time.min()
        
        if verbose: 
            print('Mindate after: ',datetime.fromtimestamp(min_date))
        #Append the dataset into the existin one 
        df = pd.concat([df, df_temp],axis=0,sort=False)
        print("Data appended with shape {}, where mindate is {} and stopdate is {}".format(df.shape, min_date, stopdate))
        cnt+= 1
        if cnt > 10:
            break
    
    return df


In [63]:
def getHourlySocialfromList(API_KEY:str, coin_list:pd.DataFrame, toDate:str='',toDate_is_dst=False,endDate:str='',endDate_is_dst=False):
    '''
    Use CryptoCompare API to download the HOURLY Social data according to specified parameters
    :param API_KEY: string of the personal key acqured on the site 
    :param coin_list: Panda Dataframe that cointains the coinlist and coin_id
    :param toDate: string of date in format 'YYYYMMDD' of the begining period 
    :param endDate: string of date in format  'YYYMMDD' of the end period to download
    :param is_dst: Boolen value whether each date under Daylight Saving Time
    '''
    endTs = toUnixTimestamp(endDate,endDate_is_dst)
    toTs = toUnixTimestamp(toDate,toDate_is_dst)
    error_coin = []
    
    #Loop through the coin list and download the data
    for index, row in coin_list.iterrows():
        coin = row['currency']
        coin_id = row['Id']
        print("\n-------\nCoin: ",coin)
        print("Coin_id: ",coin_id)
        
        #Sleep for 5 seconds after ever 10 coins;
        if index % 10 == 0:
            sleeper.sleep(5)
        
        if coin_id is np.NaN:
            error_coin.append((coin,coin_id))
            print("\n---!!Cannot lookup ID for coin ",coin)
        else:    
            filename = '../../Social_Data/Hourly/Hourly_'+coin+'_'+coin_id+'.pkl'
            print('\n---','Now downloading {0} with ID {1}'.format(coin,coin_id))
            data = getHistoricalHourlySocial(API_KEY,coin_id,toTs=toTs,endTs=endTs)
            print("Data succesfully downloaded with shape", data.shape)
            
            if type(data) is str:
                error_coin.append((coin,coin_id))
                continue
            #Preprocess the data to append the extra information
            data = convertTimeStamp(data)
            data.sort_values('time',ascending=True, inplace=True)
            data['date_utc'] = convertUTC(data,'date')

            pd.to_pickle(data,path=filename)
            print('Coin donwloaded! Saved at ' + filename)
            
            sleeper.sleep(1)
    return error_coin

## TEST  1: Get a single historical data
Get historical data for coin KMD

### DAILY for KMD

In [53]:
API_KEY = '01a9f40086b2d1939be540e04f5ea2a3a0993c1288cc23fe1cd81fbee60ed842'
endDate = '20191001'
toDate = '20180501'
endTs = toUnixTimestamp(endDate,True)
toTs = toUnixTimestamp(toDate,True)
print("END",endTs,"TO", toTs)

If in US, check if DST is on and modify is_dst parameter (default=False)!
If in US, check if DST is on and modify is_dst parameter (default=False)!
END 1569888000 TO 1525132800


In [54]:
KMD = getHistoricalDailySocial(API_KEY, '26132', toTs, endTs )

https://min-api.cryptocompare.com/data/social/coin/histo/day?&coinId=26132&toTS=&toTs=1569888000&limit=2000&api_key=01a9f40086b2d1939be540e04f5ea2a3a0993c1288cc23fe1cd81fbee60ed842


In [55]:
#Note that the dates are correct 
KMD.time.sort_values().iloc[[0,2000]]

0       1397088000
2000    1569888000
Name: time, dtype: int64

In [23]:
KMD.head()

Unnamed: 0,comments,posts,followers,points,overview_page_views,analysis_page_views,markets_page_views,charts_page_views,trades_page_views,forum_page_views,...,reddit_comments_per_day,code_repo_stars,code_repo_forks,code_repo_subscribers,code_repo_open_pull_issues,code_repo_closed_pull_issues,code_repo_open_issues,code_repo_closed_issues,code_repo_contributors,time
0,0,0,0,0,0,0,0,0,0,0,...,0.0,0,0,0,0,0,0,0,0,1397088000
1,0,0,0,0,0,0,0,0,0,0,...,0.0,0,0,0,0,0,0,0,0,1397174400
2,0,0,0,0,0,0,0,0,0,0,...,0.0,0,0,0,0,0,0,0,0,1397260800
3,0,0,0,0,0,0,0,0,0,0,...,0.0,0,0,0,0,0,0,0,0,1397347200
4,0,0,0,0,0,0,0,0,0,0,...,0.0,0,0,0,0,0,0,0,0,1397433600


In [58]:
KMD_cols = KMD.columns
KMD.loc[1995:2000,['time',KMD_cols[1]]]

Unnamed: 0,time,posts
1995,1569456000,111
1996,1569542400,111
1997,1569628800,111
1998,1569715200,111
1999,1569801600,111
2000,1569888000,111


### HOURLY for KMD

In [73]:
INFX_h = getHistoricalHourlySocial(API_KEY, '877310', toTs, endTs )

https://min-api.cryptocompare.com/data/social/coin/histo/hour?&coinId=877310&toTS=&toTs=1569888000&limit=2000&api_key=01a9f40086b2d1939be540e04f5ea2a3a0993c1288cc23fe1cd81fbee60ed842
Check URL format!


## Test 2: Download full data for all coins

## Test DAILY Download

In [40]:
coinlist_2 = coinlist[:2]
getDailySocialfromList(API_KEY, coin_list=coinlist_2,toDate='20180501',toDate_is_dst=True,endDate='20191001'
                       ,endDate_is_dst=True)

If in US, check if DST is on and modify is_dst parameter (default=False)!
If in US, check if DST is on and modify is_dst parameter (default=False)!

-------
Coin:  KMD
Coin_id:  26132

--- Now downloading KMD with ID 26132
https://min-api.cryptocompare.com/data/social/coin/histo/day?&coinId=26132&toTS=&toTs=1569888000&limit=2000&api_key=01a9f40086b2d1939be540e04f5ea2a3a0993c1288cc23fe1cd81fbee60ed842
Coin donwloaded! Saved at ../../Social_Data/Daily/Daily_KMD_26132.pkl

-------
Coin:  WAN
Coin_id:  240142

--- Now downloading WAN with ID 240142
https://min-api.cryptocompare.com/data/social/coin/histo/day?&coinId=240142&toTS=&toTs=1569888000&limit=2000&api_key=01a9f40086b2d1939be540e04f5ea2a3a0993c1288cc23fe1cd81fbee60ed842
Coin donwloaded! Saved at ../../Social_Data/Daily/Daily_WAN_240142.pkl


[]

So this process works. Now test the substraction process for np.
So we don't have to worry about 

## TEST HOURLY DOWNLOAD

In [67]:
getHourlySocialfromList(API_KEY, coin_list=coinlist_2,toDate='20180515',toDate_is_dst=True,endDate='20191001'
                       ,endDate_is_dst=True)

If in US, check if DST is on and modify is_dst parameter (default=False)!
If in US, check if DST is on and modify is_dst parameter (default=False)!

-------
Coin:  KMD
Coin_id:  26132

--- Now downloading KMD with ID 26132
https://min-api.cryptocompare.com/data/social/coin/histo/hour?&coinId=26132&toTS=&toTs=1569888000&limit=2000&api_key=01a9f40086b2d1939be540e04f5ea2a3a0993c1288cc23fe1cd81fbee60ed842
Mindate before:  2019-07-09 09:00:00
New batch: https://min-api.cryptocompare.com/data/social/coin/histo/hour?&coinId=26132&toTs=1562688000&limit=2000&api_key=01a9f40086b2d1939be540e04f5ea2a3a0993c1288cc23fe1cd81fbee60ed842
Mindate after:  2019-04-17 01:00:00
Data appended with shape (4002, 34), where mindate is 1555488000 and stopdate is 1526342400
Mindate before:  2019-04-17 01:00:00
New batch: https://min-api.cryptocompare.com/data/social/coin/histo/hour?&coinId=26132&toTs=1555488000&limit=2000&api_key=01a9f40086b2d1939be540e04f5ea2a3a0993c1288cc23fe1cd81fbee60ed842
Mindate after:  201

[]

## Check one downloaded PD

In [3]:
os.getcwd()

'/Users/hn/Documents/USC Others/ISI Research/PumpCoin/CrytoCompare/All_Pump_Analysis/Before_Announcement_Analysis'

In [5]:
swm = pd.read_pickle("../../Social_Data/Daily/Daily_SWM_755563.pkl")

In [9]:
swm.time.min()

1397088000

In [11]:
swm.time.max()

1569888000

In [68]:
kmd = pd.read_pickle("../../Social_Data/Hourly/Hourly_KMD_26132.pkl")

In [69]:
kmd.date_utc.describe()

count                         14007
unique                        14000
top       2018-08-10 08:00:00+00:00
freq                              2
first     2018-02-24 16:00:00+00:00
last      2019-10-01 00:00:00+00:00
Name: date_utc, dtype: object

In [70]:
kmd.head()

Unnamed: 0,time,comments,posts,followers,points,overview_page_views,analysis_page_views,markets_page_views,charts_page_views,trades_page_views,...,code_repo_stars,code_repo_forks,code_repo_subscribers,code_repo_open_pull_issues,code_repo_closed_pull_issues,code_repo_open_issues,code_repo_closed_issues,code_repo_contributors,date,date_utc
0,1519488000,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,2018-02-24 08:00:00,2018-02-24 16:00:00+00:00
1,1519491600,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,2018-02-24 09:00:00,2018-02-24 17:00:00+00:00
2,1519495200,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,2018-02-24 10:00:00,2018-02-24 18:00:00+00:00
3,1519498800,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,2018-02-24 11:00:00,2018-02-24 19:00:00+00:00
4,1519502400,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,2018-02-24 12:00:00,2018-02-24 20:00:00+00:00


In [72]:
len(np.unique(kmd.columns))

36