In [1]:
import pandas as pd
import glob
import os

# Percorso ai file CSV delle strategie
csv_files = glob.glob("./DATA/*.csv")

# Funzione per leggere e pulire ogni file
frames = []
for file in csv_files:
    # Estraggo il nome strategia dal nome file
    strategy = os.path.basename(file).replace('.csv', '')
    # Leggo il file gestendo possibili codifiche e separatori
    try:
        df = pd.read_csv(file, sep='\t', encoding='utf-16', engine='python')
    except Exception:
        df = pd.read_csv(file, sep='\t', encoding='utf-8', engine='python')
    # Rimuovo eventuali colonne con nomi strani
    df.columns = [c.strip('<> \r\n\t').upper() for c in df.columns]
    # Prendo solo DATE e BALANCE
    df = df[[col for col in df.columns if 'DATE' in col or 'BALANCE' in col]]
    # Rinomino le colonne
    df = df.rename(columns={df.columns[0]: 'DATE', df.columns[1]: f'BALANCE_{strategy}'})
    # Pulizia e conversione timestamp
    df['DATE'] = df['DATE'].astype(str).str.strip()
    frames.append(df)

# Merge progressivo su DATE
from functools import reduce
merged = reduce(lambda left, right: pd.merge(left, right, on='DATE', how='outer'), frames)

# Ordina per data
merged = merged.sort_values('DATE').reset_index(drop=True)

# Mostra le prime righe
merged.head()

Unnamed: 0,DATE,"BALANCE_usdcad_1m_1440,1","BALANCE_usdcad_15m_1440,0.1","BALANCE_usdcad_5m_120,2","BALANCE_usdcad_1m_120,2","BALANCE_usdcad_1m_7200,0.5","BALANCE_usdcad_5m_1440,0.5"
0,2024.01.01 00:00,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
1,2024.01.02 00:26,10018.1,,,,10021.6,
2,2024.01.02 00:31,,,,,,10024.4
3,2024.01.02 03:04,10018.1,,,,,
4,2024.01.02 03:05,10018.1,,,,10021.6,


In [2]:
# Applica forward fill per gestire i mismatch temporali
merged_ffill = merged.ffill()
merged_ffill.head()

Unnamed: 0,DATE,"BALANCE_usdcad_1m_1440,1","BALANCE_usdcad_15m_1440,0.1","BALANCE_usdcad_5m_120,2","BALANCE_usdcad_1m_120,2","BALANCE_usdcad_1m_7200,0.5","BALANCE_usdcad_5m_1440,0.5"
0,2024.01.01 00:00,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
1,2024.01.02 00:26,10018.1,10000.0,10000.0,10000.0,10021.6,10000.0
2,2024.01.02 00:31,10018.1,10000.0,10000.0,10000.0,10021.6,10024.4
3,2024.01.02 03:04,10018.1,10000.0,10000.0,10000.0,10021.6,10024.4
4,2024.01.02 03:05,10018.1,10000.0,10000.0,10000.0,10021.6,10024.4
