# Crypto Price Prediction

Poloniex API docs: https://docs.poloniex.com/#returnchartdata

## Imports

In [64]:
import datetime
import pandas as pd
import requests
import time

## Getting the Data

In [74]:
current_date = int(datetime.datetime.now().strftime('%s'))
end_dt = current_date
time_interval = 365*5*86400
start_dt = end_dt - time_interval

In [75]:
def get_currency_price_data(currency_pair, start_dt, end_dt):
    '''
    '''
    window = 90*86400
    params = {
        'command': 'returnChartData',
        'currencyPair': currency_pair,
        'period': 300
    }
    price_df = pd.DataFrame()
    
    for start in range(start_dt, end_dt, window):
        params['start'] = start
        params['end'] = start + window
        response = requests.get(url='https://poloniex.com/public', params=params)
        # Should technically inspect the response here but I've noticed that a status_code == 200 doesn't necessarily imply that proper data was returned.  I've encountered errors where the API returned text claiming that the response was too large.
        price_df = pd.concat([price_df, pd.DataFrame(response.json())])
        # Possibly add wait period here before calling API again.  I've found it's unnecessary, though.
    
    return price_df

In [79]:
# 5 years of data is ~40MB.
eth_data = get_currency_price_data(currency_pair='USDT_ETH', start_dt=start_dt, end_dt=end_dt)

In [80]:
eth_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 447459 entries, 0 to 7201
Data columns (total 8 columns):
close              447459 non-null float64
date               447459 non-null int64
high               447459 non-null float64
low                447459 non-null float64
open               447459 non-null float64
quoteVolume        447459 non-null float64
volume             447459 non-null float64
weightedAverage    447459 non-null float64
dtypes: float64(7), int64(1)
memory usage: 30.7 MB


In [81]:
# 5 years of data is ~40MB.
btc_data = get_currency_price_data(currency_pair='USDT_BTC', start_dt=start_dt, end_dt=end_dt)

In [82]:
btc_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 496204 entries, 0 to 7201
Data columns (total 8 columns):
close              496204 non-null float64
date               496204 non-null int64
high               496204 non-null float64
low                496204 non-null float64
open               496204 non-null float64
quoteVolume        496204 non-null float64
volume             496204 non-null float64
weightedAverage    496204 non-null float64
dtypes: float64(7), int64(1)
memory usage: 34.1 MB


In [84]:
# btc_data.to_csv('data/btc_price.csv')

## Modeling