In [18]:
import numpy as np
import pandas as pd
import requests
from concurrent.futures import ThreadPoolExecutor
import datetime as dt
import os
from functools import reduce
from fetch_symbols import get_symbols
import ccxt


class Data:
    def __init__(self, symbols, interval = '1h', start_time = dt.datetime(2020, 1, 1), end_time = dt.datetime(2020, 1, 2), get_data = True,
                 exchange = 'binance'):
        self.symbols = symbols
        self.interval = interval
        self.start_time = start_time
        self.end_time = end_time
        if exchange == 'binance':
            self.available_symbols = self.binance_symbols()
        elif exchange == 'kraken':
            self.available_symbols = self.kraken_symbols()
            
        if get_data:
            self.df = self.get_data()

    def binance_symbols(self):
        """Fetch available symbols from Binance API."""
        response = requests.get("https://api.binance.com/api/v3/exchangeInfo")
        exchange_info = response.json()
        valid_symbols = {s['symbol'] for s in exchange_info['symbols']}
        return [s for s in self.symbols if s in valid_symbols]
    
    def kraken_symbols(self):
        """Fetch available symbols from Kraken API."""
        exchange = ccxt.kraken()
        markets = exchange.load_markets()
        valid_symbols_ = {market['symbol'] for market in markets.values()}
        valid_symbols_ = [s.replace("/USD", "USD") for s in valid_symbols_ if s.endswith('USD')]
        valid_symbols_.sort()
        return [s for s in self.symbols if s in valid_symbols_]

    def fetch_symbol_data(self, symbol, date_list, url, limit):
        """Fetch kline data for a single symbol."""
        all_data = []
        for i in range(len(date_list) - 1):
            params = {
                'symbol': symbol,
                'interval': self.interval,
                'startTime': int(date_list[i].timestamp() * 1000),
                'endTime': int((date_list[i + 1] - dt.timedelta(seconds=1)).timestamp() * 1000),
                'limit': limit,
            }
            response = requests.get(url, params=params)
            data = response.json()
            if isinstance(data, list):
                all_data.extend(data)
        return symbol, all_data

    def get_binance_klines(self, limit=1000):
        """Fetch historical kline data for all symbols in parallel."""
        url = "https://api.binance.com/api/v3/klines"
        date_list = pd.date_range(start=self.start_time, end=self.end_time, freq='D').tolist()
        
        if not self.available_symbols[0].endswith('T'):
            self.available_symbols = [s + 'T' for s in self.available_symbols]

        print(self.available_symbols)
        # Use ThreadPoolExecutor for parallel fetching
        with ThreadPoolExecutor(max_workers=10) as executor:
            results = executor.map(
                lambda symbol: self.fetch_symbol_data(symbol, date_list, url, limit),
                self.available_symbols,
            )

        print(results)
        # Process and combine results
        data_frames = {}
        for symbol, data in results:
            if not data:
                continue
            df = pd.DataFrame(data)
            df = df.iloc[:, 0:6]
            df.columns = ['Open Time', 'open', 'high', 'low', 'close', 'volume']
            df.index = pd.to_datetime(df['Open Time'], unit='ms')
            df.drop('Open Time', axis=1, inplace=True)
            data_frames[symbol] = df

        if not data_frames:
            return None

        combined_df = pd.concat(data_frames, axis=1)
        combined_df = combined_df.swaplevel(axis=1).sort_index(axis=1)
        combined_df = combined_df.apply(pd.to_numeric, errors='coerce')

        return combined_df

    def prepare_data(self, df):
        """Prepare data for analysis."""
        _df = df.copy()
        for coin in df.columns.levels[1]:
            _df['returns', coin] = _df['close', coin].pct_change()
            _df['log_return', coin] = np.log(_df['returns', coin] + 1)
            _df["creturns", coin] = _df["log_return", coin].cumsum().apply(np.exp)
            _df['price', coin] = _df['close', coin]
            _df['volume_in_dollars', coin] = _df['close', coin] * _df['volume', coin]

        df = _df.stack(future_stack=True)
        df.sort_index(axis=1, inplace=True)
        df.index.names = ['date', 'coin']
        df.dropna(inplace=True)

        return df

    def upload_data(self, df, filename):
        """Save data to a CSV file."""
        df.to_csv(filename)

    def get_data(self):
        """Main function to fetch, prepare, and save data."""
        df = self.get_binance_klines()
        if df is not None:
            df = self.prepare_data(df)
            self.upload_data(df, 'data.csv')
        return df
    



class CSV_Data:
    def __init__(self, folder_path, symbols):
        self.folder_path = folder_path
        self.symbols = symbols
        self.df = self.process_folder(folder_path, symbols)
        self.df = self.prepare_data()
        self.upload_data_to_csv(self.df)
        
    
    def prepare_data(self):
        df = self.df.copy()
        for coin in df.columns.levels[1]:
            df['returns', coin] = df['close', coin].pct_change()
            df['log_return', coin] = np.log(df['returns', coin])
            df["creturns", coin] = df["log_return", coin].cumsum().apply(np.exp)
            df['price', coin] = df['close', coin]
            df['volume_in_dollars', coin] = df['close', coin] * df['volume', coin]

        df = df.stack(level=1, future_stack=True)
        df.sort_index(axis=1, inplace=True)
        df.index.names = ['date', 'coin']
        df.dropna(inplace=True)

        return df
    
    def get_data(self, file_path, symbols):
        df = pd.read_csv(file_path)
        df = df.drop(columns = df.columns[-1]).reset_index()
        df.drop(columns = df.columns[0], inplace = True)
        df.drop(index = 0, inplace = True)
        df.columns = ['date', 'coin', 'open', 'high', 'low', 'close', 'volume', 'volume_in_dollars']

        if not df['coin'].iloc[0] in symbols:
            return
        # Clean the date column by stripping whitespace
        df['date'] = df['date'].str.strip()
        # Parse the date column with mixed format
        df['date'] = pd.to_datetime(df['date'], format='mixed', errors='coerce')
        
        df.set_index([df.columns[0], df.columns[1]], inplace = True)
        df = df.unstack()
        return df
    
    def process_folder(self, folder_path, symbols):
        # Get all CSV files in the folder
        csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
        
        dfs = []
        
        for file in csv_files:
            file_path = os.path.join(folder_path, file)
            df = self.get_data(file_path, symbols)
            if df is not None:
                dfs.append(df)
        

        # Get the union of all indices (dates) to align the data
        all_dates = reduce(pd.Index.union, [df.index.get_level_values(0) for df in dfs])

        # Reindex all DataFrames to the same set of dates (adding NaNs where data is missing)
        dfs_aligned = [df.reindex(all_dates, level=0, fill_value=None) for df in dfs]

        # Concatenate all DataFrames
        concatenated_df = pd.concat(dfs_aligned, axis=1)
        concatenated_df = concatenated_df.sort_index(axis=1)
        concatenated_df = concatenated_df.apply(pd.to_numeric, errors='coerce', downcast='float') #Essential to perform calculations
        
        return concatenated_df

    def upload_data_to_csv(self, df):
        # Upload the data to CSV file
        df.to_csv('all_data.csv')
    

    



# Example usage
# symbols = ['BTCUSD', 'ETHUSD']
# symbols = get_symbols()
# # Add the symbol to each string in the list
# updated_symbols = [s + 'T' for s in symbols]
# interval = '1h'
# start_time = dt.datetime(2020, 1, 1)
# end_time = dt.datetime(2020, 1, 7)
# df = Data(updated_symbols, interval, start_time, end_time).df
# print(df)


#Use the below for uploading full data (uploaded to csv)
# symbols = get_symbols()
# binance_symbols = Data(symbols)
# folder_path = r'C:\Users\yassi\OneDrive\Documents\Trading\Algo Trading Projects\Algo Business\data\Binance Data (CSV)'
# df = CSV_Data(folder_path, symbols).df

In [19]:
symbols = ['BTCUSD', 'ETHUSD']

In [20]:
data_instance = Data(symbols, exchange = 'kraken')

['BTCUSDT', 'ETHUSDT']
<generator object Executor.map.<locals>.result_iterator at 0x000001F59606B010>


In [21]:

response = requests.get("https://api.kraken.com/0/public/AssetPairs")
kraken_info = response.json()
kraken_info

{'error': [],
 'result': {'1INCHEUR': {'altname': '1INCHEUR',
   'wsname': '1INCH/EUR',
   'aclass_base': 'currency',
   'base': '1INCH',
   'aclass_quote': 'currency',
   'quote': 'ZEUR',
   'lot': 'unit',
   'cost_decimals': 5,
   'pair_decimals': 3,
   'lot_decimals': 8,
   'lot_multiplier': 1,
   'leverage_buy': [],
   'leverage_sell': [],
   'fees': [[0, 0.4],
    [10000, 0.35],
    [50000, 0.24],
    [100000, 0.22],
    [250000, 0.2],
    [500000, 0.18],
    [1000000, 0.16],
    [2500000, 0.14],
    [5000000, 0.12],
    [10000000, 0.1]],
   'fees_maker': [[0, 0.25],
    [10000, 0.2],
    [50000, 0.14],
    [100000, 0.12],
    [250000, 0.1],
    [500000, 0.08],
    [1000000, 0.06],
    [2500000, 0.04],
    [5000000, 0.02],
    [10000000, 0.0]],
   'fee_volume_currency': 'ZUSD',
   'margin_call': 80,
   'margin_stop': 40,
   'ordermin': '11',
   'costmin': '0.45',
   'tick_size': '0.001',
   'status': 'online'},
  '1INCHUSD': {'altname': '1INCHUSD',
   'wsname': '1INCH/USD',
   'ac

In [22]:
valid_symbols = {pair for pair in kraken_info['result'] if pair.endswith('USD')}
valid_symbols

{'1INCHUSD',
 'AAVEUSD',
 'ACAUSD',
 'ACHUSD',
 'ADAUSD',
 'ADXUSD',
 'AEVOUSD',
 'AGLDUSD',
 'AIRUSD',
 'AKTUSD',
 'ALCXUSD',
 'ALGOUSD',
 'ALICEUSD',
 'ALPHAUSD',
 'ALTUSD',
 'ANKRUSD',
 'APEUSD',
 'API3USD',
 'APTUSD',
 'APUUSD',
 'ARBUSD',
 'ARKMUSD',
 'ARPAUSD',
 'ASTRUSD',
 'ATHUSD',
 'ATLASUSD',
 'ATOMUSD',
 'AUCTIONUSD',
 'AUDIOUSD',
 'AUDUSD',
 'AVAXUSD',
 'AXSUSD',
 'BADGERUSD',
 'BALUSD',
 'BANDUSD',
 'BATUSD',
 'BCHUSD',
 'BEAMUSD',
 'BICOUSD',
 'BIGTIMEUSD',
 'BITUSD',
 'BLURUSD',
 'BLZUSD',
 'BNCUSD',
 'BNTUSD',
 'BOBAUSD',
 'BODENUSD',
 'BONDUSD',
 'BONKUSD',
 'BRICKUSD',
 'BSXUSD',
 'BTTUSD',
 'C98USD',
 'CELRUSD',
 'CFGUSD',
 'CHRUSD',
 'CHZUSD',
 'CLOUDUSD',
 'COMPUSD',
 'COTIUSD',
 'CPOOLUSD',
 'CQTUSD',
 'CRVUSD',
 'CSMUSD',
 'CTSIUSD',
 'CVCUSD',
 'CVXUSD',
 'CXTUSD',
 'DAIUSD',
 'DASHUSD',
 'DBRUSD',
 'DENTUSD',
 'DOTUSD',
 'DRIFTUSD',
 'DYDXUSD',
 'DYMUSD',
 'EGLDUSD',
 'EIGENUSD',
 'ENAUSD',
 'ENJUSD',
 'ENSUSD',
 'EOSUSD',
 'ETHFIUSD',
 'ETHPYUSD',
 'ETHWUSD',


In [23]:
'XXBTZUSD' in valid_symbols

True

In [24]:
start_time = dt.datetime.now().date() - dt.timedelta(hours = 2001)
end_time = dt.datetime.now()
data_instance= Data(symbols, exchange = 'kraken', interval = '1h', get_data=True, start_time=start_time, end_time=end_time)

['BTCUSDT', 'ETHUSDT']
<generator object Executor.map.<locals>.result_iterator at 0x000001F59AEA0310>


In [26]:
data_instance.df

Unnamed: 0_level_0,Unnamed: 1_level_0,close,creturns,high,log_return,low,open,price,returns,volume,volume_in_dollars
date,coin,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2024-10-21 01:00:00,BTCUSDT,69210.89,1.000736,69480.00,0.000736,69106.00,69160.00,69210.89,0.000736,726.66974,5.029346e+07
2024-10-21 01:00:00,ETHUSDT,2745.54,0.999741,2761.13,-0.000259,2742.44,2746.24,2745.54,-0.000259,16255.89960,4.463122e+07
2024-10-21 02:00:00,BTCUSDT,68710.00,0.993493,69266.64,-0.007263,68709.87,69210.88,68710.00,-0.007237,1082.59587,7.438516e+07
2024-10-21 02:00:00,ETHUSDT,2731.19,0.994516,2748.54,-0.005240,2728.44,2745.55,2731.19,-0.005227,19853.18780,5.422283e+07
2024-10-21 03:00:00,BTCUSDT,68939.99,0.996819,68940.00,0.003342,68588.51,68709.99,68939.99,0.003347,704.09672,4.854042e+07
...,...,...,...,...,...,...,...,...,...,...,...
2025-01-11 21:00:00,ETHUSDT,3307.10,1.204224,3320.18,0.002289,3299.22,3299.54,3307.10,0.002291,12280.45780,4.061270e+07
2025-01-11 22:00:00,BTCUSDT,94593.80,1.367753,95005.00,-0.003661,94561.00,94940.75,94593.80,-0.003654,532.69589,5.038973e+07
2025-01-11 22:00:00,ETHUSDT,3288.67,1.197513,3308.50,-0.005588,3286.25,3307.10,3288.67,-0.005573,5714.07290,1.879170e+07
2025-01-11 23:00:00,BTCUSDT,94599.99,1.367843,94753.37,0.000065,94541.66,94593.80,94599.99,0.000065,231.83722,2.193180e+07
