#### For a given ticker, load history of data


In [1]:
import ccxt
import pandas as pd
import os

# GLOBAL VARIABLES
# Initialize the specific exchange 
EXCHANGE_NAME = "kraken"
EXCHANGE = getattr(ccxt, EXCHANGE_NAME)()  # i.e. ccxt.kraken()
markets = EXCHANGE.load_markets() 

TICKER_DATA_PATH = r"C:\Users\Damja\CODING_LOCAL\trading\ticker_specific_data"

def fetch_ohlcv_data_per_symbol(symbol, timeframe='1h', **kwargs):
    try:
        # Fetch OHLCV data for the current pairs
        ohlcv = EXCHANGE.fetch_ohlcv(symbol, timeframe=timeframe, **kwargs)
        # Convert to a DataFrame
        df = pd.DataFrame(ohlcv, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
        df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
        df['usd_volume'] = df['close'] * df['volume']
        return df
    except Exception as e:
        print(f"Error fetching data for {symbol}: {e}")


def update_ohlcv_data_per_symbol(symbol, timeframe='1h', timestamp=None, **kwargs):
    '''
    Update data from last 
    arguments:
        symbol: str (as given by exchange, i.e. 'BTC/USD', needs to be converted to 'BTCUSD' for data folder)
        timeframe: str
        timestamp: datetime
        kwargs: other arguments for fetch_ohlcv_data_per_symbol
    '''
    assert timestamp is not None, "Timestamp must be provided"
    data_folder_symbol = symbol.replace('/', '')
    df = load_ohlcv_data_per_symbol(data_folder_symbol)
    df_since_timestamp = fetch_ohlcv_data_per_symbol(symbol=symbol, timeframe='1h', since=int(timestamp.timestamp()*1000))
    df_new = pd.concat([df, df_since_timestamp]).drop_duplicates(keep='first').reset_index(drop=True)    
    return df_new
        

def save_ohlcv_data_per_symbol(df, symbol):
    df.to_csv(TICKER_DATA_PATH + f"/{symbol}.csv", index=False)


def load_ohlcv_data_per_symbol(symbol):
    pd.read_csv(TICKER_DATA_PATH + f"/{symbol}.csv")

    

### Create a one-year history for the pairs of interest

In [105]:
pair = 'BTC/USD'
interval = '30m'
start_date = pd.to_datetime('2024-01-01')
start_date_in_ms = int(start_date.timestamp() * 1000)

df = fetch_ohlcv_data_per_symbol(symbol=pair, timeframe=interval, since=start_date_in_ms, limit=20)

df

Unnamed: 0,timestamp,open,high,low,close,volume,usd_volume
0,2025-01-19 09:30:00,104174.1,105200.6,104140.0,105066.0,67.375883,7078915.0
1,2025-01-19 10:00:00,105066.1,105084.1,104272.2,104311.5,25.431066,2652753.0
2,2025-01-19 10:30:00,104322.3,104963.5,104281.8,104614.5,52.371845,5478854.0
3,2025-01-19 11:00:00,104614.6,105127.8,104614.6,104761.0,32.090647,3361848.0
4,2025-01-19 11:30:00,104761.0,104801.9,104021.8,104141.4,46.707514,4864186.0
5,2025-01-19 12:00:00,104141.3,104650.0,104128.4,104245.4,19.403075,2022681.0
6,2025-01-19 12:30:00,104245.4,104817.1,104153.3,104817.0,103.347815,10832610.0
7,2025-01-19 13:00:00,104817.1,105125.0,104740.0,104825.7,97.659818,10237260.0
8,2025-01-19 13:30:00,104825.6,104996.7,104700.2,104798.5,9.926373,1040269.0
9,2025-01-19 14:00:00,104798.5,105448.6,104603.0,105258.0,49.938482,5256425.0


#### After downloading the data from the kraken website, we change some small things and add some cols
Needs to be done only once, unless data is older than 720 datapoints (depends on the timeframe)

In [104]:
# load data from csv
NUM_PAIRS_TO_LOAD = 50
TICKER_DATA_PATH_DOWNLOADED = r"C:\Users\Damja\CODING_LOCAL\trading\ticker_specific_data\data_downloaded"
TICKER_DATA_PATH_OUTPUT = r"C:\Users\Damja\CODING_LOCAL\trading\ticker_specific_data\data_updated"

pairs = pd.read_csv("pairs.csv")
pairs = pairs.iloc[:NUM_PAIRS_TO_LOAD, 0].values
TIMEFRAME = '60'

# load each pair
for symbol in pairs:
    data_folder_symbol = symbol.replace("/", "")
    data_folder_symbol = data_folder_symbol + "_" + TIMEFRAME

    # check if file in ticker_specific_data exists
    if not os.path.exists(f'{TICKER_DATA_PATH_DOWNLOADED}\\{data_folder_symbol}.csv'):
        print(f"file for {symbol} does not exist")
        continue
    else:
        print(f"Changing file for symbol {symbol}")
        try:
            df = pd.read_csv(f'{TICKER_DATA_PATH_DOWNLOADED}\\{data_folder_symbol}.csv', header=None, index_col=0)
        except pd.errors.EmptyDataError:
            print(f"file for {symbol} is empty")
            continue

        # the date downloaded from kraken is the OHLCVT data, we discard the 'Trades' column
        assert len(df.columns) == 6, "The dataframe should have 6 columns"
        df = df.iloc[:, :5] # remove the 'Trades' column

        # keep only 5 years of data
        df = df.iloc[-365 * 24 * 5:]

        # check if volume_usd exists, otherwise create it
        if 'usd_volumne' not in df.columns:
            df.columns = columns=['open', 'high', 'low', 'close', 'volume']
            df['usd_volume'] = df['close'] * df['volume']
 
        # check if date is pd.datetime otherwise convert it
        df.index.name = 'Date'
        df.index = pd.to_datetime(df.index, unit='s')
        df.index = pd.to_datetime(df.index)

        df.to_csv(f'{TICKER_DATA_PATH_OUTPUT}\\{data_folder_symbol}.csv')

file for BTC/USD does not exist
Changing file for symbol PEPE/USD
Changing file for symbol XRP/USD
Changing file for symbol WIF/USD
Changing file for symbol USDT/USD
file for DOGE/USD does not exist
Changing file for symbol SOL/USD
Changing file for symbol EUR/USD
Changing file for symbol TURBO/USD
Changing file for symbol ETH/USD
Changing file for symbol SUI/USD
Changing file for symbol XLM/USD
Changing file for symbol WOO/USD
Changing file for symbol NEAR/USD
Changing file for symbol FTM/USD
Changing file for symbol SHIB/USD
Changing file for symbol USDC/USD
Changing file for symbol LINK/USD
Changing file for symbol BONK/USD
Changing file for symbol POPCAT/USD
Changing file for symbol GALA/USD
Changing file for symbol W/USD
Changing file for symbol TAO/USD
Changing file for symbol ALGO/USD
Changing file for symbol FET/USD
Changing file for symbol GBP/USD
Changing file for symbol TRX/USD
Changing file for symbol SUSHI/USD
file for KAS/USD does not exist
Changing file for symbol FLOKI/

Unnamed: 0_level_0,open,high,low,close,volume,usd_volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-03-29 16:00:00,13.010,13.010,13.000,13.000,468.276010,6087.588131
2022-03-29 17:00:00,22.999,31.999,13.869,25.969,37.811371,981.923499
2022-03-29 18:00:00,13.861,13.861,13.000,13.000,1356.651270,17636.466510
2022-03-29 19:00:00,15.422,15.422,12.998,13.400,621.512079,8328.261859
2022-03-29 20:00:00,13.400,13.400,13.332,13.399,835.888319,11200.067589
...,...,...,...,...,...,...
2024-09-30 13:00:00,0.426,0.426,0.426,0.426,21.052632,8.968421
2024-09-30 14:00:00,0.426,0.432,0.422,0.432,908.525242,392.482905
2024-09-30 18:00:00,0.425,0.432,0.424,0.432,210.009281,90.724009
2024-09-30 19:00:00,0.427,0.427,0.423,0.423,159.850781,67.616880
