In [1]:
import time
import datetime
import requests
import pandas as pd
import os
import shutil

In [2]:
# Convert unix time stmap
def convert_time(ts):
    time = datetime.datetime.fromtimestamp(ts)
    return str(time)
#=(C2+8*3600)/86400+70*365+19

In [3]:
# Drop null values
def drop_empty(dataframe):
    for i in range(0,len(dataframe)):
        if dataframe.loc[i,'close'] == 0:
            dataframe = dataframe.drop([i])
        else:
            break
    return dataframe

In [4]:
# Waiting status display
def wait(sleepTime):
    print("Waiting...restart after:")
    for i in range(0, sleepTime):
        print("    " + str(sleepTime) +" second(s)")
        time.sleep(1)
        sleepTime -= 1

In [5]:
# Get price data via connecting API
def get_price_data(apiKey, url_data, exchange, fsym, tsym, limit, current_time, rounds):
    price_data = pd.DataFrame(columns=('time', 'close','high','low','open','volumefrom','volumeto'))
    print("Estimated finsh time: " + str(rounds * 3) + "(s)")
    duration = 0
    st = time.perf_counter()
    for i in range(0,rounds):
        payload_data = { "api_key": apiKey, "e" : exchange, "fsym": fsym, "tsym": tsym, "limit": limit, "toTs": current_time}
        duration = round(time.perf_counter() - st, 2)
        while(True):
            try: 
                print("Getting data processing in: " + str(i + 1) + '/' + str(rounds), end="\r")
                response = requests.get(url_data, params=payload_data, timeout=20)
                response.raise_for_status()
                result = response.json()
                df = pd.DataFrame(result['Data'])
                price_data = df.append(price_data, sort=True)
                current_time = df.head(1).time.values[0] + 60
                break
            except Exception:
                print("Connection refused by the server..")
                wait(5)
                print("Continue...")
                continue
        
#  end_ts = price_data.tail(1).time.values[0]
    #   #####
    price_data_reindex = price_data.reset_index()
    price_data_dropnull = drop_empty(price_data_reindex)
    print("\n ")
    print("Actual finsh time: " + str(duration) + "(s)")
    print("\n ")
    print("Capturing completed!")
    return price_data_dropnull

In [26]:
# Get pair information via connecting API
def get_histo_data(apiKey, exchange, fsym, tsym, last_file, current_time, all_files_loc):

    # Config data volumn parm
    payload_volumn = {
        "api_key": apiKey,
        "e" : exchange
        }

    # Get data total volumn
    # Use /exchanges to get exchanges data
    url_vol = "https://min-api.cryptocompare.com/data/v4/all/exchanges"
    
    # Url to connect with API
    url_data = "https://min-api.cryptocompare.com/data/histohour"

    # Request data
    while(True):
        try:
            # Send Request
            result_volumn = requests.get(url_vol, params=payload_volumn, timeout=20).json()
            break
        except:
            print("Connection refused by the server..")
            wait(5)
            print("Continue...")
            continue
    # Get start and end timestamp
    try:
        pair_start_at = result_volumn['Data']['exchanges'][exchange]['pairs'][fsym]['tsyms'][tsym]['histo_minute_start_ts']
        #pair_end_at = result_volumn['Data']['exchanges'][exchange]['pairs'][fsym]['tsyms'][tsym]['histo_minute_end_ts']
        pair_end_at = current_time
    except BaseException:
        # If any of parameters is wrong, terminated process
        print("Parameter Error! process terminated...")
        return

    # Calculate data volumn & rounds of the loop based on last file
    rounds = 1
    limit = 2000
    print("Start capturing data: "+ fsym + " to "+ tsym + " in " + exchange)
    try:
        # Try to open the previous data file
        pre_data = pd.read_csv(last_file)
        # If last file is existed
        print(last_file + " Found. ")
        print("Continue capture data...")
        print("----------------------------------------------" )
        pair_start_at = pre_data.tail(1).time.values[0]
        print("Last data end at:  " + convert_time(pair_start_at))
        print("Start capture at:   " + convert_time(current_time))
        # Calculate total volumn of the pair
        data_total_volumn = int(round((current_time - pair_start_at) / 3600))
        if data_total_volumn >= 2000:
            rounds = round(data_total_volumn / 2000)
        else:
            limit = data_total_volumn
        print("Need to be added:  " + str(data_total_volumn) + " record(s)")
        # Get price data
        print("----------------------------------------------" )
        if data_total_volumn == 0:
            print("No data to update")
            print("Terminated...")
        else:
            print("loading...")
            data_update = get_price_data(apiKey, url_data,exchange, fsym, tsym, limit, current_time, rounds).drop(index=[0])
            # Append to previous data file
            new_data = pre_data.append(data_update, sort=True)
            # Generate the standard file name
            file_name = all_files_loc + "/" + exchange + "_hourly_" + fsym + "_" + tsym + "_" + str(current_time) + ".csv" 
            print(file_name)
            new_data.to_csv(file_name)
            print("Successfully saved as file: " + str(file_name))
    except FileNotFoundError:
        # If the previous file is not found
        if len(all_files_loc) == 0:
            # If there is no previous file
            print("History file Not Found.")
        else:
            # If previous file is not existed
            print(last_file + " Not Found.")
        print("Create new file...")
        print("----------------------------------------------" )
        print("History data start at:  " + convert_time(pair_start_at))
        print("Start capture at:        " + convert_time(current_time))
         # Total volumn of the pair
        data_total_volumn = int(round((current_time - pair_start_at) / 3600))
        if data_total_volumn >= 2000:
            rounds = round(data_total_volumn / 2000)
        else:
            limit = data_total_volumn
        print("Total data volumn:      " + str(data_total_volumn))
        print("----------------------------------------------" )
        print("loading...")
        # Get price data
        new_data = get_price_data(apiKey, url_data,exchange, fsym, tsym, limit, current_time, rounds)
        # Generate the standard file name
        file_name = all_files_loc + "/" + exchange + "_hourly_" + fsym + "_" + tsym + "_" + str(current_time) + ".csv" 
        new_data.to_csv(file_name)
        print("Successfully saved as file: " + str(file_name))

In [7]:
# Load the objective exchange(s) info
# You must get the exchange data first
def load_exchange_address_book(file_name):
    return pd.read_csv(file_name)

In [8]:
# Load file list
def load_exist_file(filePath):
    return os.listdir(filePath)

In [9]:
# Check if file is exist
# return the file name
def is_file_exits(file_name, all_files):
    return file_name in all_files

In [11]:
# Get multiple pairs
def get_multi_pairs(apiKey, exchanges, all_files, current_time, last_end_time, all_files_loc, history_file):
    for i in range(0, len(exchanges)):
        # Get exchange and pair info
        exchange_name = exchanges.loc[i, 'exchange']
        fsym = exchanges.loc[i, 'fsym']
        tsym = exchanges.loc[i, 'tsym']
        # Generate standard data file name
        file_name =  exchange_name + "_hourly_" + fsym + "_" + tsym + "_" + str(current_time) + ".csv"
        last_file_name = exchange_name + "_hourly_" + fsym + "_" + tsym + "_" + str(last_end_time) + ".csv"
        if is_file_exits(file_name, all_files):
            print(file_name + " is existed jump over")
            continue
        else:
            print(" ")      
            last_file = history_file + '/'+ last_file_name
            get_histo_data(apiKey, exchange_name, fsym, tsym, last_file, current_time, all_files_loc)
            print(" ")
        print(" ")

In [12]:
# Record last end time
def write_last_end_time(loction, ts):
    f = open(loction, 'w')
    f.write(str(ts))
    f.close()

In [13]:
# Read last end time
def read_lats_end_time(loction):
    return int(open(loction).readline())

In [15]:
# # Main function1
# # Get one specific crypto pair history data

# #=====================(Param Sample)=========================
# # Use your api key   
# apiKey = "xxxxxxxxxxxxxx"

# # Exchange name:  

# exchange = "BitTrex"  

# # Pair 

# fsym = "ETH"        
# tsym = "USDT"   

# # Current unix time
# end = int(time.time())

# #==========================================================

# #========================(Optional)============================
# # Last file 
# # The data file captured last time, 
# # If you want to continue capture data, 
# # Please fill in the last_file with previous file name
# # Otherwise 'last_file' set with empty String as default

# last_file = ""
# #==========================================================
# Please create one before store the data
# all_files_loc = 'D:/Data'
# get_histo_data(apiKey, exchange, fsym, tsym, last_file, end, all_files_loc)

In [16]:
def get_current_ts():
    unit = 3600
    cur_time = int(time.time())
    return cur_time - ( cur_time % unit )

In [17]:
def ts_to_time(ts):
    real_time = time.localtime(ts)
    return  time.strftime("%Y-%m-%d %H:%M:%S", real_time) 

In [18]:
def read_last_cap_time(loc, current_ts):
    if not os.path.exists(loc):
        print("Data history capture time file not found, creating new at: " + loc)
        print("Creating new history file...")
        time.sleep(2)
        create_last_cap_time(loc, current_ts)
        print("Location: " + loc +" is created" + '\n')
        print("Now capture start at: " + str(ts_to_time(current_ts)) + '\n')
        return current_ts
    else:
        print("Data history capture file: " + loc + '\n')
        last_capture_time = read_last_cap_ts(loc)
        print("Last capture start at: " + str(ts_to_time(int(last_capture_time))))
        print("Now capture start at: " + str(ts_to_time(current_ts)))
        print("----------------------------------------------------" + '\n')
        write_last_cap_time(loc, current_ts)
        return last_capture_time

In [19]:
def create_last_cap_time(loc, current_ts):
    file_loc = loc.split('/')[0] + loc.split('/')[1] 
    os.mkdir(file_loc)
    f = open(loc, 'w+')
    f.write(str(current_ts) + '\n')
    f.close()

In [20]:
def write_last_cap_time(loc, current_ts):
    f = open(loc, 'a')
    f.write(str(current_ts) + '\n')
    f.close()

In [21]:
def create_data_storage_file(all_files_loc):
    if not os.path.exists(all_files_loc):
        print("Data storage file not found, creating new at: " + all_files_loc)
        print("Creating new file dictionary...")
        time.sleep(2)
        os.mkdir(all_files_loc)
        print("Location: " + all_files_loc +" is created  \n")
    else:
        print("Data store location: " + all_files_loc + '\n')

In [22]:
def create_histroy_data_file(loc):
    if not os.path.exists(loc):
        print("Data history file not found, creating new at: " + loc)
        print("Creating new file dictionary...")
        time.sleep(2)
        os.mkdir(loc)
        print("Location: " + loc +" is created  \n")
    else:
        print("History data store location: " + loc + '\n')

In [23]:
def read_last_cap_ts(loc):
    f = open(hist_cap_time_loc)
    contents = f.readlines()
    last_ts = contents[len(contents) - 1].replace('\n', '')
    f.close()
    return last_ts

In [24]:
def move_history_file(all_history_files, all_files_loc, histroy_files_loc):
    if len(all_history_files) != 0:
        for i in range(0, len(all_history_files)):
            origin_loc = all_files_loc + '/' + all_history_files[i]
            his_loc = histroy_files_loc + '/' + all_history_files[i]
            shutil.move(origin_loc, his_loc)
            print("Moving file: ")
            print(origin_loc)
            print('to: ')
            print(his_loc + '\n')