In [1]:
import numpy as np
import pandas as pd
import requests
from concurrent.futures import ThreadPoolExecutor
import datetime as dt
import os
from functools import reduce
from fetch_symbols import get_symbols
import ccxt
import random


class Data:
    def __init__(self, symbols, interval = '1h', start_time = dt.datetime(2024, 7, 1), end_time = dt.datetime(2025, 1, 2), get_data = True,
                 exchange = 'binance'):
        self.symbols = symbols
        self.interval = interval
        self.start_time = start_time
        self.end_time = end_time
        if exchange == 'binance':
            if not self.symbols[0].endswith('T'):
                self.symbols = [s + 'T' for s in self.symbols]
            self.available_symbols = self.binance_symbols()
        elif exchange == 'kraken':
            self.available_symbols = self.kraken_symbols()
            
        if get_data:
            self.df = self.get_data()

    def binance_symbols(self):
        """Fetch available symbols from Binance API."""
        response = requests.get("https://api.binance.com/api/v3/exchangeInfo")
        exchange_info = response.json()
        valid_symbols = {s['symbol'] for s in exchange_info['symbols']}
        return [s for s in self.symbols if s in valid_symbols]
    
    def kraken_symbols(self):
        """Fetch available symbols from Kraken API."""
        exchange = ccxt.kraken()
        markets = exchange.load_markets()
        valid_symbols_ = {market['symbol'] for market in markets.values()}
        valid_symbols_ = [s.replace("/USD", "USD") for s in valid_symbols_ if s.endswith('USD')]
        valid_symbols_.sort()
        return [s for s in self.symbols if s in valid_symbols_]

    def fetch_symbol_binance_data(self, symbol, date_list, url, limit):
        """Fetch kline data for a single symbol."""
        all_data = []
        for i in range(len(date_list) - 1):
            params = {
                'symbol': symbol,
                'interval': self.interval,
                'startTime': int(date_list[i].timestamp() * 1000),
                'endTime': int((date_list[i + 1] - dt.timedelta(seconds=1)).timestamp() * 1000),
                'limit': limit,
            }
            response = requests.get(url, params=params)
            data = response.json()
            if isinstance(data, list) and data:
                all_data.extend(data)
        return symbol, all_data

    def get_binance_klines(self, limit=1000):
        """Fetch historical kline data for all symbols in parallel."""
        url = "https://api.binance.com/api/v3/klines"
        date_list = pd.date_range(start=self.start_time, end=self.end_time, freq='D').tolist()
        
        if not self.available_symbols[0].endswith('T'):
            self.available_symbols = [s + 'T' for s in self.available_symbols]

        
        # Use ThreadPoolExecutor for parallel fetching
        with ThreadPoolExecutor(max_workers=10) as executor:
            results = executor.map(
                lambda symbol: self.fetch_symbol_binance_data(symbol, date_list, url, limit),
                self.available_symbols,
            )

        
        # Process and combine results
        data_frames = {}
        for symbol, data in results:
            if not data:
                continue
            df = pd.DataFrame(data)
            df = df.iloc[:, 0:6]
            df.columns = ['Open Time', 'open', 'high', 'low', 'close', 'volume']
            df.index = pd.to_datetime(df['Open Time'], unit='ms')
            df.drop('Open Time', axis=1, inplace=True)
            data_frames[symbol] = df

        if not data_frames:
            return None

        combined_df = pd.concat(data_frames, axis=1)
        combined_df = combined_df.swaplevel(axis=1).sort_index(axis=1)
        combined_df = combined_df.apply(pd.to_numeric, errors='coerce')

        return combined_df

    def prepare_data(self, df):
        """Prepare data for analysis."""
        _df = df.copy()
        for coin in df.columns.levels[1]:
            _df['returns', coin] = _df['close', coin].pct_change(fill_method=None)
            _df['log_return', coin] = np.log(_df['returns', coin] + 1)
            _df["creturns", coin] = _df["log_return", coin].cumsum().apply(np.exp)
            _df['price', coin] = _df['close', coin]
            _df['volume_in_dollars', coin] = _df['close', coin] * _df['volume', coin]

        df = _df.stack(future_stack=True).copy()
        df.sort_index(axis=1, inplace=True)
        df.index.names = ['date', 'coin']
        df.dropna(inplace=True)

        return df

    def upload_data(self, df, filename):
        """Save data to a CSV file."""
        df.to_csv(filename)

    def get_data(self):
        """Main function to fetch, prepare, and save data."""
        df = self.get_binance_klines()
        if df is not None:
            df = self.prepare_data(df)
            self.upload_data(df, 'data.csv')
        return df
    



class CSV_Data:
    def __init__(self, folder_path, symbols):
        self.folder_path = folder_path
        self.symbols = symbols
        self.df = self.process_folder(folder_path, symbols)
        self.df = self.prepare_data()
        self.upload_data_to_csv(self.df)
        
    
    def prepare_data(self):
        df = self.df.copy()
        for coin in df.columns.levels[1]:
            df['returns', coin] = df['close', coin].pct_change()
            df['log_return', coin] = np.log(df['returns', coin])
            df["creturns", coin] = df["log_return", coin].cumsum().apply(np.exp)
            df['price', coin] = df['close', coin]
            df['volume_in_dollars', coin] = df['close', coin] * df['volume', coin]

        df = df.stack(level=1, future_stack=True)
        df.sort_index(axis=1, inplace=True)
        df.index.names = ['date', 'coin']
        df.dropna(inplace=True)

        return df
    
    def get_data(self, file_path, symbols):
        df = pd.read_csv(file_path)
        df = df.drop(columns = df.columns[-1]).reset_index()
        df.drop(columns = df.columns[0], inplace = True)
        df.drop(index = 0, inplace = True)
        df.columns = ['date', 'coin', 'open', 'high', 'low', 'close', 'volume', 'volume_in_dollars']

        if not df['coin'].iloc[0] in symbols:
            return
        # Clean the date column by stripping whitespace
        df['date'] = df['date'].str.strip()
        # Parse the date column with mixed format
        df['date'] = pd.to_datetime(df['date'], format='mixed', errors='coerce')
        
        df.set_index([df.columns[0], df.columns[1]], inplace = True)
        df = df.unstack()
        return df
    
    def process_folder(self, folder_path, symbols):
        # Get all CSV files in the folder
        csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
        
        dfs = []
        
        for file in csv_files:
            file_path = os.path.join(folder_path, file)
            df = self.get_data(file_path, symbols)
            if df is not None:
                dfs.append(df)
        

        # Get the union of all indices (dates) to align the data
        all_dates = reduce(pd.Index.union, [df.index.get_level_values(0) for df in dfs])

        # Reindex all DataFrames to the same set of dates (adding NaNs where data is missing)
        dfs_aligned = [df.reindex(all_dates, level=0, fill_value=None) for df in dfs]

        # Concatenate all DataFrames
        concatenated_df = pd.concat(dfs_aligned, axis=1)
        concatenated_df = concatenated_df.sort_index(axis=1)
        concatenated_df = concatenated_df.apply(pd.to_numeric, errors='coerce', downcast='float') #Essential to perform calculations
        
        return concatenated_df

    def upload_data_to_csv(self, df):
        # Upload the data to CSV file
        df.to_csv('all_data.csv')
    
    
def get_symbols_for_bot():
    """Get the symbols for the bot to trade"""
    symbols = ['MANAUSD','BONKUSD','BANDUSD','PHAUSD','POLUSD','STORJUSD','ETHUSD','SCUSD','OCEANUSD','TNSRUSD',
               'ATOMUSD','RLCUSD','GMTUSD','LTCUSD','ALICEUSD','DYMUSD','SEIUSD','QTUMUSD','MASKUSD','CTSIUSD',
               'TONUSD','OPUSD','ARKMUSD','FORTHUSD','CHRUSD','RUNEUSD','ZROUSD','HNTUSD','ENJUSD','SAGAUSD',
               'ZECUSD','ENSUSD','SUIUSD','SHIBUSD','ETHFIUSD','MULTIUSD','KP3RUSD','CELRUSD','REZUSD','NEIROUSD',
               'ZKUSD','APTUSD','LINKUSD','ICXUSD','APEUSD','EGLDUSD','API3USD','DASHUSD','MATICUSD','STRKUSD','ICPUSD',
               'SANDUSD','FLOWUSD','ALTUSD','MINAUSD','TURBOUSD','CVCUSD','FETUSD','JASMYUSD','RENDERUSD','OGNUSD',
               'NEARUSD','COTIUSD','STGUSD','IMXUSD','WIFUSD','DOTUSD','GRTUSD','SYNUSD','MEMEUSD','PEPEUSD','LSKUSD',
               'AVAXUSD','LDOUSD','BTCUSD','FXSUSD','TAOUSD','LUNAUSD','BCHUSD','LPTUSD','AUDIOUSD','MOVRUSD','ETCUSD',
               'ADAUSD','AGLDUSD','BLZUSD','STXUSD','GALUSD','XMRUSD','TVKUSD','LRCUSD','FTMUSD','KSMUSD','FILUSD',
               'BTTUSD','EIGENUSD','PONDUSD','RAREUSD','PNUTUSD','OMNIUSD','ALGOUSD','ANKRUSD','TRXUSD','DENTUSD',
               'XTZUSD','DOGEUSD','OXTUSD','SOLUSD','ZRXUSD','GLMRUSD','ARBUSD','TIAUSD','KEYUSD','FIDAUSD','RADUSD',
               'BLURUSD']
    return random.sample(symbols, 50)

    



# Example usage
# symbols = ['BTCUSD', 'ETHUSD']
# symbols = get_symbols()
# # Add the symbol to each string in the list
# updated_symbols = [s + 'T' for s in symbols]
# interval = '1h'
# start_time = dt.datetime(2020, 1, 1)
# end_time = dt.datetime(2020, 1, 7)
# df = Data(updated_symbols, interval, start_time, end_time).df
# print(df)


#Use the below for uploading full data (uploaded to csv)
# symbols = get_symbols()
# binance_symbols = Data(symbols)
# folder_path = r'C:\Users\yassi\OneDrive\Documents\Trading\Algo Trading Projects\Algo Business\data\Binance Data (CSV)'
# df = CSV_Data(folder_path, symbols).df

In [31]:
symbols = ['MANAUSD','BONKUSD','BANDUSD','PHAUSD','POLUSD','STORJUSD','ETHUSD','SCUSD','OCEANUSD','TNSRUSD',
            'ATOMUSD','RLCUSD','GMTUSD','LTCUSD','ALICEUSD','DYMUSD','SEIUSD','QTUMUSD','MASKUSD','CTSIUSD',
            'TONUSD','OPUSD','ARKMUSD','FORTHUSD','CHRUSD','RUNEUSD','ZROUSD','HNTUSD','ENJUSD','SAGAUSD',
            'ZECUSD','ENSUSD','SUIUSD','SHIBUSD','ETHFIUSD','MULTIUSD','KP3RUSD','CELRUSD','REZUSD','NEIROUSD',
            'ZKUSD','APTUSD','LINKUSD','ICXUSD','APEUSD','EGLDUSD','API3USD','DASHUSD','MATICUSD','STRKUSD','ICPUSD',
            'SANDUSD','FLOWUSD','ALTUSD','MINAUSD','TURBOUSD','CVCUSD','FETUSD','JASMYUSD','RENDERUSD','OGNUSD',
            'NEARUSD','COTIUSD','STGUSD','IMXUSD','WIFUSD','DOTUSD','GRTUSD','SYNUSD','MEMEUSD','PEPEUSD','LSKUSD',
            'AVAXUSD','LDOUSD','BTCUSD','FXSUSD','TAOUSD','LUNAUSD','BCHUSD','LPTUSD','AUDIOUSD','MOVRUSD','ETCUSD',
            'ADAUSD','AGLDUSD','BLZUSD','STXUSD','GALUSD','XMRUSD','TVKUSD','LRCUSD','FTMUSD','KSMUSD','FILUSD',
            'BTTUSD','EIGENUSD','PONDUSD','RAREUSD','PNUTUSD','OMNIUSD','ALGOUSD','ANKRUSD','TRXUSD','DENTUSD',
            'XTZUSD','DOGEUSD','OXTUSD','SOLUSD','ZRXUSD','GLMRUSD','ARBUSD','TIAUSD','KEYUSD','FIDAUSD','RADUSD',
            'BLURUSD']

In [1]:
symbols = ['MANAUSD','BONKUSD','BANDUSD','PHAUSD','POLUSD','STORJUSD','ETHUSD','SCUSD','TNSRUSD',
            'ATOMUSD','RLCUSD','GMTUSD','LTCUSD','ALICEUSD','DYMUSD','SEIUSD','QTUMUSD','MASKUSD','CTSIUSD',
            'TONUSD','OPUSD','ARKMUSD','FORTHUSD','CHRUSD','RUNEUSD','ZROUSD','ENJUSD','SAGAUSD',
            'ZECUSD','ENSUSD','SUIUSD','SHIBUSD','ETHFIUSD','CELRUSD','REZUSD','NEIROUSD',
            'ZKUSD','APTUSD','LINKUSD','ICXUSD','APEUSD','EGLDUSD','API3USD','DASHUSD','MATICUSD','STRKUSD','ICPUSD',
            'SANDUSD','FLOWUSD','ALTUSD','MINAUSD','TURBOUSD','CVCUSD','FETUSD','JASMYUSD','RENDERUSD','OGNUSD',
            'NEARUSD','COTIUSD','STGUSD','IMXUSD','WIFUSD','DOTUSD','GRTUSD','SYNUSD','MEMEUSD','PEPEUSD','LSKUSD',
            'AVAXUSD','LDOUSD','BTCUSD','FXSUSD','TAOUSD','LUNAUSD','BCHUSD','LPTUSD','AUDIOUSD','MOVRUSD','ETCUSD',
            'ADAUSD','AGLDUSD','STXUSD','LRCUSD','FTMUSD','KSMUSD','FILUSD',
            'EIGENUSD','PONDUSD','RAREUSD','PNUTUSD','OMNIUSD','ALGOUSD','ANKRUSD','TRXUSD','DENTUSD',
            'XTZUSD','DOGEUSD','OXTUSD','SOLUSD','ZRXUSD','GLMRUSD','ARBUSD','TIAUSD','FIDAUSD','RADUSD',
            'BLURUSD']

In [2]:
symbols = ['BTCUSDT', 'MASKUSDT', 'CELRUSDT', 'XMRUSDT', 'FILUSDT', 'FLOWUSDT', 'FORTHUSDT', 'FTMUSDT', 'IMXUSDT', 'LUNAUSDT']

In [3]:
data_instance = Data(symbols, get_data = True)

In [4]:
data_instance.df

Unnamed: 0_level_0,Unnamed: 1_level_0,close,creturns,high,log_return,low,open,price,returns,volume,volume_in_dollars
date,coin,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2024-07-01 01:00:00,BTCUSDT,63640.82000,1.011263,63730.00000,0.011200,62901.71000,62931.98000,63640.82000,0.011263,2.570402e+03,1.635825e+08
2024-07-01 01:00:00,CELRUSDT,0.01624,1.016907,0.01626,0.016765,0.01592,0.01597,0.01624,0.016907,2.293067e+06,3.723941e+04
2024-07-01 01:00:00,FILUSDT,4.55200,1.016979,4.55600,0.016837,4.46800,4.47600,4.55200,0.016979,3.236500e+05,1.473255e+06
2024-07-01 01:00:00,FLOWUSDT,0.64200,1.017433,0.64500,0.017282,0.62900,0.63100,0.64200,0.017433,2.679289e+05,1.720103e+05
2024-07-01 01:00:00,FORTHUSDT,3.09300,1.006508,3.10000,0.006487,3.06800,3.07300,3.09300,0.006508,1.067105e+04,3.300556e+04
...,...,...,...,...,...,...,...,...,...,...,...
2025-01-01 23:00:00,FORTHUSDT,5.29200,1.722096,5.33000,0.000000,5.28100,5.29600,5.29200,0.000000,5.710300e+03,3.021891e+04
2025-01-01 23:00:00,FTMUSDT,0.79970,1.341779,0.82070,-0.022626,0.79680,0.81800,0.79970,-0.022372,5.813293e+06,4.648890e+06
2025-01-01 23:00:00,IMXUSDT,1.35200,0.859559,1.35700,-0.003691,1.34700,1.35700,1.35200,-0.003685,1.117492e+05,1.510849e+05
2025-01-01 23:00:00,LUNAUSDT,0.43050,1.004667,0.43290,-0.003478,0.42940,0.43210,0.43050,-0.003472,3.080697e+05,1.326240e+05
