In [2]:
from datetime import datetime
import pandas as pd
import os
import numpy as np


def convert_dates(date):
    """
    Converts time since the Unix epoch in milliseconds to a datetime object.
    """
    return datetime.fromtimestamp(date / 1000)


def read_trade_data(filepath):
    """
    Read csv containing crypto trade data at <filepath>, convert time fields
    and select relevant columns.

    :param filepath: path to csv file
    :returns: (exchange, symbol, preprocessed Pandas dataframe)
    """
    trades = pd.read_csv(filepath)
    # convert dates from unix timestamps
    trades['date'] = trades['date'].apply(convert_dates)
    trades.set_index('date', inplace=True)
    trades.sort_index(ascending=True, inplace=True)
    # check data relates to a single exchange and coin pair
    assert trades['exchange'].nunique() == 1, 'Multiple exchanges present'
    assert trades['symbol'].nunique() == 1, 'Multiple symbols present'
    # select relevant columns
    trade_features = trades[['price', 'amount', 'sell']]
    exchange = trades['exchange'].iloc[0]
    symbol = trades['symbol'].iloc[0]
    return exchange, symbol, trade_features


In [3]:
read_trade_data('Bitfinex_BTCEUR_trades_'
                                             '2018_02_02.csv')


('bf', 'btceur',                            price    amount   sell
 date                                             
 2018-02-02 08:00:08  7349.600000  0.004568  False
 2018-02-02 08:00:09  7354.680450  0.010000   True
 2018-02-02 08:00:09  7354.680450  0.004568   True
 2018-02-02 08:00:10  7354.600000  0.102395  False
 2018-02-02 08:00:26  7343.407500  0.086580  False
 2018-02-02 08:00:26  7343.807250  0.204920  False
 2018-02-02 08:00:38  7356.000000  0.300000  False
 2018-02-02 08:00:38  7357.900000  0.088007  False
 2018-02-02 08:00:38  7355.800000  0.100000  False
 2018-02-02 08:00:39  7343.167650  0.088007  False
 2018-02-02 08:01:15  7352.281950  0.002000   True
 2018-02-02 08:01:55  7351.320200  0.300000   True
 2018-02-02 08:01:55  7351.320200  0.300000   True
 2018-02-02 08:01:55  7348.321701  0.300000   True
 2018-02-02 08:01:55  7339.326204  0.300000   True
 2018-02-02 08:01:55  7344.323702  0.300000   True
 2018-02-02 08:01:55  7339.825954  0.100000   True
 2018-02-02 08:

In [4]:
def write_processed(exchange, symbol, data, loc=None):
    """
    Write processed features for a given exchange and coin pair
    to a parquet file named '<exchange>_<symbol>_trades.parquet'.

    :param exchange: str exchange name
    :param symbol: str symbol name
    :param data: dataframe
    :param loc: alternative filepath in which to save
    """
    filename = f'{exchange}_{symbol}_trades.parquet'
    path = os.getcwd() if loc is None else loc
    filepath = os.path.join(path, filename)
    data.to_parquet(filepath)


if __name__ == "__main__":
    exchange, symbol, data = read_trade_data('Bitfinex_BTCEUR_trades_'
                                             '2018_02_02.csv')
    write_processed(exchange, symbol, data)

In [5]:
write_processed('bf', 'btceur' ,data)


In [7]:
df = pd.read_csv('Bitfinex_BTCEUR_trades_'
                                             '2018_02_02.csv')
print(df)

              id exchange  symbol           date        price    amount   sell
0      183601815       bf  btceur  1517529608000  7349.600000  0.004568  False
1      183601825       bf  btceur  1517529609000  7354.680450  0.010000   True
2      183601823       bf  btceur  1517529609000  7354.680450  0.004568   True
3      183601833       bf  btceur  1517529610000  7354.600000  0.102395  False
4      183601904       bf  btceur  1517529626000  7343.407500  0.086580  False
5      183601906       bf  btceur  1517529626000  7343.807250  0.204920  False
6      183602055       bf  btceur  1517529638000  7356.000000  0.300000  False
7      183602057       bf  btceur  1517529638000  7357.900000  0.088007  False
8      183602052       bf  btceur  1517529638000  7355.800000  0.100000  False
9      183602064       bf  btceur  1517529639000  7343.167650  0.088007  False
10     183602333       bf  btceur  1517529675000  7352.281950  0.002000   True
11     183602610       bf  btceur  1517529715000  73

In [None]:
df.head()


In [None]:
def sma(data, smaPeriod):
    j = next(i for i, x in enumerate(data) if x is not None)
    our_range = range(len(data))[j + smaPeriod - 1:]
    empty_list = [None] * (j + smaPeriod - 1)
    sub_result = [np.mean(data[i - smaPeriod + 1: i + 1]) for i in our_range]

    return np.array(empty_list + sub_result)