# Data and model exploration

In this notebook you will find information extracted from the historical data of binance, and how different models perform trading.

In [10]:
from binance.spot import Spot
import pandas as pd
from datetime import datetime

def get_historical_data(symbol, interval, start_date, end_date):
    # Initialize the Spot client
    client = Spot()
    
    # Convert date strings to milliseconds
    start_date = int(datetime.strptime(start_date, "%Y-%m-%d").timestamp() * 1000)
    end_date = int(datetime.strptime(end_date, "%Y-%m-%d").timestamp() * 1000)
    
    # Fetch the historical klines (candlestick data)
    klines = client.klines(symbol=symbol, interval=interval, startTime=start_date, endTime=end_date)
    
    # Convert to DataFrame
    df = pd.DataFrame(klines, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_asset_volume', 'number_of_trades', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore'])
    
    # Convert timestamp to datetime
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
    
    # Set timestamp as index
    df.set_index('timestamp', inplace=True)
    
    # Convert numeric columns to appropriate types
    numeric_columns = ['open', 'high', 'low', 'close', 'volume', 'quote_asset_volume', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume']
    df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, axis=1)
    
    return df

# Example usage
symbol = 'BTCUSDT'
interval = '1h'  # 1 hour interval
start_date = '2022-01-01'
end_date = '2023-12-31'

df = get_historical_data(symbol, interval, start_date, end_date)

In [11]:
df.head()

Unnamed: 0_level_0,open,high,low,close,volume,close_time,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-01-01 03:00:00,46811.77,46916.63,46760.12,46813.2,562.88971,1641009599999,26363260.0,19882,248.28212,11629380.0,0
2022-01-01 04:00:00,46813.21,46887.33,46591.23,46711.05,861.88389,1641013199999,40272040.0,23357,397.53563,18575400.0,0
2022-01-01 05:00:00,46711.05,47555.55,46673.94,47192.55,1400.73642,1641016799999,66188310.0,41431,776.78632,36695320.0,0
2022-01-01 06:00:00,47192.56,47324.42,46940.0,46979.62,613.97776,1641020399999,28941490.0,25196,299.44097,14116070.0,0
2022-01-01 07:00:00,46979.61,47255.85,46864.84,47194.73,646.20081,1641023999999,30399870.0,25166,290.68371,13676290.0,0


In [12]:
df.shape

(500, 11)

In [14]:
df.columns.tolist()

['open',
 'high',
 'low',
 'close',
 'volume',
 'close_time',
 'quote_asset_volume',
 'number_of_trades',
 'taker_buy_base_asset_volume',
 'taker_buy_quote_asset_volume',
 'ignore']

In [13]:
df['ignore'].value_counts()

ignore
0    500
Name: count, dtype: int64