# Resamples BTC data

In [1]:
from itertools import cycle

import matplotlib.pyplot as plt

import numpy as np

import pandas as pd

In [2]:
DATA_PATHS = ['../../datasets/bitstamp_data.csv.part1', '../../datasets/bitstamp_data.csv.part2',
              '../../datasets/bitstamp_data.csv.part3', '../../datasets/bitstamp_data.csv.part4',
              '../../datasets/bitstamp_data.csv.part5']

In [3]:
def load_btc_data(file_paths):
    # Función que permite convertir el formato de las fechas como unix time
    # en un objeto de fecha.
    unix_time_to_date = lambda x: pd.to_datetime(x,unit='s')
    li = []
    for filename in file_paths:
        df = pd.read_csv(filename, parse_dates=['Timestamp'], date_parser=unix_time_to_date, index_col='Timestamp')
        li.append(df)
    return pd.concat(li, axis=0)

btc = load_btc_data(DATA_PATHS)
btc.drop(['Weighted_Price'], axis=1)

Unnamed: 0_level_0,Open,High,Low,Close,Volume_(BTC),Volume_(Currency)
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2011-12-31 07:52:00,4.39,4.39,4.39,4.39,0.455581,2.000000
2011-12-31 07:53:00,,,,,,
2011-12-31 07:54:00,,,,,,
2011-12-31 07:55:00,,,,,,
2011-12-31 07:56:00,,,,,,
...,...,...,...,...,...,...
2020-12-30 23:56:00,28801.47,28829.42,28785.64,28829.42,0.965221,27804.572129
2020-12-30 23:57:00,28829.42,28863.90,28829.42,28857.06,2.368831,68332.350629
2020-12-30 23:58:00,28850.49,28900.52,28850.49,28882.82,2.466590,71232.784464
2020-12-30 23:59:00,28910.54,28911.52,28867.60,28881.30,7.332773,211870.912660


In [4]:
btc.dtypes, btc.shape

(Open                 float64
 High                 float64
 Low                  float64
 Close                float64
 Volume_(BTC)         float64
 Volume_(Currency)    float64
 Weighted_Price       float64
 dtype: object, (4727777, 7))

In [5]:
logic = {'Open'  : 'first',
         'High'  : 'max',
         'Low'   : 'min',
         'Close' : 'last',
         'Volume_(BTC)': 'sum',
         'Volume_(Currency)': 'sum',}

offset = pd.offsets.timedelta(days=-1)
btc_daily = btc.resample('D', loffset=offset).apply(logic)
btc_daily.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume_(BTC),Volume_(Currency)
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2011-12-30,4.39,4.58,4.39,4.58,95.317878,425.320338
2011-12-31,4.58,5.0,4.58,5.0,21.602,105.77916
2012-01-01,5.0,5.0,5.0,5.0,19.048,95.24
2012-01-02,5.32,5.32,5.14,5.29,88.037281,464.80521
2012-01-03,4.93,5.57,4.93,5.57,107.23326,568.076197


In [6]:
logic = {'Open'  : 'first',
         'High'  : 'max',
         'Low'   : 'min',
         'Close' : 'last',
         'Volume_(BTC)': 'sum',
         'Volume_(Currency)': 'sum',}

offset = pd.offsets.timedelta(hours=-1)
btc_hourly = btc.resample('H', loffset=offset).apply(logic)
btc_hourly.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume_(BTC),Volume_(Currency)
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2011-12-31 06:00:00,4.39,4.39,4.39,4.39,0.455581,2.0
2011-12-31 07:00:00,,,,,0.0,0.0
2011-12-31 08:00:00,,,,,0.0,0.0
2011-12-31 09:00:00,,,,,0.0,0.0
2011-12-31 10:00:00,,,,,0.0,0.0


In [7]:
# Guardamos el dataset con sampleo por dia.
DAILY_DATA_PATH = '../../datasets/bitstamp_data_daily.csv'
btc_daily.to_csv(DAILY_DATA_PATH, index_label='Timestamp', date_format='%s')

In [8]:
# Guardamos el dataset con sampleo por hora.
HOURLY_DATA_PATH = '../../datasets/bitstamp_data_hourly.csv'
btc_hourly.to_csv(HOURLY_DATA_PATH, index_label='Timestamp', date_format='%s')

In [9]:
btc.shape, btc_daily.shape, btc_hourly.shape

((4727777, 7), (3289, 6), (78906, 6))