In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
from pandas.tseries.holiday import USFederalHolidayCalendar, AbstractHolidayCalendar, Holiday
from pandas.tseries.offsets import CustomBusinessDay
from numba import jit
import warnings
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import os
warnings.filterwarnings('ignore')


class FrenchHolidayCalendar(AbstractHolidayCalendar):
    rules = [
        Holiday('New Years Day', month=1, day=1),
        Holiday('Labor Day', month=5, day=1),
        Holiday('Victory in Europe Day', month=5, day=8),
        Holiday('Bastille Day', month=7, day=14),
        Holiday('Assumption Day', month=8, day=15),
        Holiday('All Saints Day', month=11, day=1),
        Holiday('Armistice Day', month=11, day=11),
        Holiday('Christmas', month=12, day=25)
    ]


def create_trading_calendar(start: str, end: str) -> pd.DatetimeIndex:
    """
    Crée un calendrier de trading en excluant les jours fériés américains et français.
    """
    us_cal = USFederalHolidayCalendar()
    fr_cal = FrenchHolidayCalendar()
    holidays = us_cal.holidays(start, end).union(fr_cal.holidays(start, end))
    return pd.date_range(start, end, freq=CustomBusinessDay(holidays=holidays))


def load_data(data_dir: str, fill_limit: int = 5) -> tuple[dict, pd.DatetimeIndex]:
    """
    Charge les données et calcule les dates disponibles en une seule passe.
    Retourne les données ET le calendrier de trading.
    """
    data = {}
    all_dates = set()
    files = list(Path(data_dir).glob('*.csv'))
    
    for f in tqdm(files, desc="Chargement des données"):
        df = pd.read_csv(f, parse_dates=['Date'])
        all_dates.update(df['Date'])
        df = df.set_index('Date').sort_index()
        data[f.stem] = df
    
    # Calcul des dates extrêmes
    start_date, end_date = min(all_dates), max(all_dates)
    trading_days = create_trading_calendar(start_date, end_date)
    
    # Réindexation et forward-fill
    for ticker, df in data.items():
        df = df.reindex(trading_days)
        for col in ['Close', 'Volume', 'Momentum360']:
            if col in df.columns:
                df[col] = df[col].ffill(limit=fill_limit)
        data[ticker] = df
    
    return data, trading_days

@jit(nopython=True)
def calculate_position_changes(prev_holdings, top_10_mask, close_prices, cash, position_size, fee_rate):
    """
    Calcule les changements de positions : vente des positions sortantes 
    et achat des nouvelles positions dans le top_10_mask.
    """
    new_holdings = prev_holdings.copy()
    total_fees = 0.0

    # Vendre les positions qui ne sont plus dans le top 10
    for i in range(len(prev_holdings)):
        if prev_holdings[i] > 0 and not top_10_mask[i]:
            sale_value = prev_holdings[i] * close_prices[i]
            cash += sale_value * (1 - fee_rate)
            total_fees += sale_value * fee_rate
            new_holdings[i] = 0.0

    # Acheter les nouvelles positions
    buy_candidates = np.where(top_10_mask & (prev_holdings == 0))[0]
    for i in buy_candidates:
        if cash >= position_size:
            shares = position_size / close_prices[i]
            cash -= position_size
            total_fees += position_size * fee_rate
            new_holdings[i] = shares

    return new_holdings, cash, total_fees


def optimized_backtest(data: dict, trading_days: pd.DatetimeIndex,
                       initial_cash: float = 100000, position_size: float = 1000, fee_rate: float = 0.005):
    """
    Exécute le backtest en passant par chaque jour de trading. 
    Si le prix d'un ticker est NaN un jour donné, on solde la position (fin des données ou trou trop grand).
    """
    # Construire les DataFrames close, volume, momentum à partir du dictionnaire data
    close = pd.DataFrame({ticker: df['Close'] for ticker, df in data.items()})
    volume = pd.DataFrame({ticker: df['Volume'] for ticker, df in data.items()})
    momentum = pd.DataFrame({ticker: df['Momentum360'] for ticker, df in data.items()})
    
    # Assurer l'ordre des index = trading_days
    close = close.reindex(trading_days)
    volume = volume.reindex(trading_days)
    momentum = momentum.reindex(trading_days)

    holdings = pd.DataFrame(0.0, index=trading_days, columns=close.columns)
    portfolio = pd.DataFrame({
        'cash': initial_cash,
        'fees': 0.0,
        'portfolio_value': initial_cash
    }, index=trading_days)

    # Boucle avec barre de progression
    for i in tqdm(range(1, len(trading_days)), desc="Calcul du backtest"):
        date = trading_days[i]
        prev_date = trading_days[i - 1]

        # Récupération des prix du jour
        day_close = close.loc[date]
        
        # 1) Fermer la position des tickers dont le prix est NaN (fin des données ou trou > 5 j)
        #    => On les sort explicitement du top_10_mask.
        na_mask = day_close.isna()  # Tickers dont Close est NaN
        # On forcera la vente de ces tickers en les excluant du top_10
        # (mais on va d'abord calculer le top_10 normalement sur les autres tickers valides)
        
        # 2) Calcul du top_10 sur les tickers valides (momentum & volume non-NaN)
        valid_data = ~np.isnan(momentum.loc[date]) & ~np.isnan(volume.loc[date]) & ~na_mask
        if valid_data.sum() > 0:
            vol_threshold = np.nanpercentile(volume.loc[date][valid_data], 50)
        else:
            vol_threshold = 0
        volume_filter = (volume.loc[date] >= vol_threshold) & valid_data
        
        # Ranking momentum
        mom_rank = momentum.loc[date][volume_filter].rank(ascending=False)
        top_10 = pd.Series(False, index=close.columns)
        top_10[mom_rank[mom_rank <= 10].index] = True
        
        # Forcer la sortie (vente) des tickers en NaN
        # => On met top_10_mask à False pour ces tickers
        top_10[na_mask] = False

        # 3) Calcul des changements de position
        new_holdings, new_cash, fees = calculate_position_changes(
            holdings.loc[prev_date].values,
            top_10.values,             # top_10_mask final
            day_close.fillna(0).values,  # on passe 0 comme prix pour les NaN (on ne va pas vraiment acheter)
            portfolio.loc[prev_date, 'cash'],
            position_size,
            fee_rate
        )

        # 4) Mise à jour holdings et portfolio
        holdings.loc[date] = new_holdings
        holdings_value = (holdings.loc[date] * day_close.fillna(0)).sum()  # Les NaN sont traités comme 0
        portfolio.loc[date, 'cash'] = new_cash
        portfolio.loc[date, 'fees'] = portfolio.loc[prev_date, 'fees'] + fees
        portfolio.loc[date, 'portfolio_value'] = float(new_cash + holdings_value)

    return portfolio, holdings


def create_detailed_report(portfolio: pd.DataFrame, holdings: pd.DataFrame, 
                           data: dict, trading_days: pd.DatetimeIndex) -> pd.DataFrame:
    """
    Crée un rapport détaillé du backtest.
    """
    close = pd.DataFrame({ticker: df['Close'] for ticker, df in data.items()})
    close = close.reindex(trading_days)
    
    report = pd.DataFrame(index=trading_days)
    report['Cash'] = portfolio['cash']
    report['Portfolio Value'] = portfolio['portfolio_value']
    report['Daily Fees'] = portfolio['fees']
    report['Cumulative Fees'] = portfolio['fees'].cumsum()

    ticker_values = holdings * close.fillna(0)
    report['Total Holdings Value'] = ticker_values.sum(axis=1)

    for ticker in holdings.columns:
        if (holdings[ticker] > 0).any():
            report[f'{ticker} Value'] = ticker_values[ticker]

    report['Tickers Held'] = holdings.apply(
        lambda x: ', '.join(sorted(x.index[x > 0])), axis=1
    )
    holdings_diff = holdings.diff().fillna(0)
    report['Tickers Bought'] = holdings_diff.apply(
        lambda x: ', '.join(sorted(x.index[x > 0])), axis=1
    )
    report['Tickers Sold'] = holdings_diff.apply(
        lambda x: ', '.join(sorted(x.index[x < 0])), axis=1
    )

    base_cols = [
        'Cash', 'Total Holdings Value', 'Portfolio Value', 
        'Daily Fees', 'Cumulative Fees', 'Tickers Held', 
        'Tickers Bought', 'Tickers Sold'
    ]
    value_cols = sorted([
        col for col in report.columns 
        if col.endswith(' Value') and col not in ['Portfolio Value', 'Total Holdings Value']
    ])
    report = report[base_cols + value_cols]

    return report



def get_available_dates(data_dir):
    all_dates = set()
    
    files = [f for f in os.listdir(data_dir) if f.endswith(".csv")]

    for file in tqdm(files, desc="Extraction des dates disponibles"):
        df = pd.read_csv(os.path.join(data_dir, file), usecols=["Date"])
        all_dates.update(pd.to_datetime(df["Date"]).unique())

    return sorted(all_dates)


# ========== EXEMPLE D'UTILISATION ==========

if __name__ == "__main__":
    data_dir = "datasets_technicals/1d"
    data, trading_days = load_data(data_dir)
    portfolio, holdings = optimized_backtest(data, trading_days)

Chargement des données:  65%|██████▍   | 661/1019 [00:20<00:10, 32.90it/s]

In [None]:

    # Paramètres de backtest
    start_date = "2023-01-01"
    end_date = "2024-12-31"
    trading_days = create_trading_calendar(start_date, end_date)
    
    # 2) Lancement du backtest
    portfolio, holdings = optimized_backtest(
        data, trading_days, initial_cash=100000, position_size=1000, fee_rate=0.005
    )

    # 3) Visualisation
    plt.figure(figsize=(12, 6))
    sns.set_style("whitegrid")
    plt.plot(portfolio.index, portfolio['portfolio_value'], label="Portfolio Value")
    plt.title('Évolution de la valeur du portefeuille')
    plt.xlabel('Date')
    plt.ylabel('Valeur (€)')
    plt.xticks(rotation=45)
    plt.legend()
    plt.tight_layout()
    plt.show()

    # 4) Rapport détaillé
    detailed_report = create_detailed_report(portfolio, holdings, data, trading_days)
    detailed_report.tail(30)
    detailed_report.to_csv('toto.csv')


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from pathlib import Path
import time
from matplotlib.colors import to_rgb


start_time1=time.time()
# Fixer le seed pour la reproductibilité
np.random.seed()

# Paramètres globaux
START_DATE = "2011-01-01"
END_DATE = "2025-01-01"
SIMULATION_YEARS = 2
N_SIMULATIONS = 300
INITIAL_CASH = 10000
POSITION_SIZE = 1000
FEE_RATE = 0.0035




def get_color_scheme(date):
    year = date.year
    quarter = (date.month - 1) // 3
    
    # Palette de couleurs par année
    base_colors = {
        1980: '#2F4F4F',   # Gris-vert
        1981: '#A52A2A',   # Marron
        1982: '#FF6347',   # Tomate
        1983: '#40E0D0',   # Turquoise
        1984: '#8A2BE2',   # Bleu violet
        1985: '#5F9EA0',   # Gris-bleu
        1986: '#D2691E',   # Chocolat
        1987: '#CD5C5C',   # Rosé
        1988: '#DAA520',   # Or
        1989: '#B8860B',   # Jaune foncé
        1990: '#20B2AA',   # Vert-de-mer
        1991: '#00008B',   # Bleu marine
        1992: '#008B8B',   # Bleu-vert foncé
        1993: '#A9A9A9',   # Gris foncé
        1994: '#006400',   # Vert foncé
        1995: '#FF4500',   # Orange rougeâtre
        1996: '#2E8B57',   # Vert
        1997: '#7FFF00',   # Vert clair
        1998: '#D2691E',   # Chocolat
        1999: '#9ACD32',   # Jaune-vert
        2000: '#8B0000',   # Rouge foncé
        2001: '#B0C4DE',   # Bleu clair
        2002: '#A9A9A9',   # Gris
        2003: '#8B008B',   # Magenta foncé
        2004: '#FF8C00',   # Orange foncé
        2005: '#D3D3D3',   # Gris clair
        2006: '#C71585',   # Violet foncé
        2007: '#FF1493',   # Rose vif
        2008: '#ADFF2F',   # Vert jaunâtre
        2009: '#F0E68C',   # Jaune pâle
        2010: '#B22222',   # Rouge brique
        2011: '#FFD700',   # Or
        2012: '#1f77b4',   # Bleu
        2013: '#2ca02c',   # Vert
        2014: '#ff7f0e',   # Orange
        2015: '#d62728',   # Rouge
        2016: '#9467bd',   # Violet
        2017: '#8c564b',   # Marron
        2018: '#e377c2',   # Rose
        2019: '#7f7f7f',   # Gris
        2020: '#bcbd22',   # Jaune-vert
        2021: '#17becf',   # Cyan
        2022: '#1a55FF',   # Bleu royal
        2023: '#FF1a1a',   # Rouge vif
        2024: '#B0E0E6',   # Bleu pâle
        2025: '#4682B4',   # Bleu acier
        2026: '#D2691E',   # Chocolat
        2027: '#FF6347',   # Tomate
        2028: '#A52A2A',   # Marron
        2029: '#8B4513',   # Sienne brune
        2030: '#2F4F4F'    # Gris-vert
    }
    
    # Ajustement de la luminosité selon le trimestre
    brightness_factor = 0.6 + (quarter * 0.1)  # 0.6, 0.7, 0.8, 0.9
    
    base_color = np.array(to_rgb(base_colors[year]))
    adjusted_color = base_color * brightness_factor
    adjusted_color = np.clip(adjusted_color, 0, 1)
    
    return adjusted_color



# Création du calendrier de trading
trading_days = create_trading_calendar(START_DATE, END_DATE)

# Sélection de 10 dates de départ aléatoires
start_indices = np.random.choice(len(trading_days) - SIMULATION_YEARS * 252, N_SIMULATIONS, replace=False)
start_dates = [trading_days[i] for i in start_indices]

# Stocker les résultats des simulations
simulations = {}

# Lancer les simulations
for i, start_date in enumerate(tqdm(start_dates, desc="Simulations")):
    # Définir la période de la simulation
    end_date = trading_days[start_indices[i] + int(SIMULATION_YEARS * 252)]

    # Filtrer les jours de trading correspondants
    sim_trading_days = trading_days[(trading_days >= start_date) & (trading_days <= end_date)]

    # Exécuter le backtest
    portfolio, _ = optimized_backtest(data, sim_trading_days, INITIAL_CASH, POSITION_SIZE, FEE_RATE)

    # Stocker la valeur totale du portefeuille
    simulations[f"Sim {i+1}"] = portfolio['portfolio_value']

    
    
    
# Stocker les résultats avec un index commun basé sur le nombre de jours écoulés
aligned_simulations = {}
for i, (label, values) in enumerate(simulations.items()):
    # Créer un nouvel index basé sur le nombre de jours écoulés
    days_elapsed = np.arange(len(values))
    aligned_simulations[label] = pd.Series(values.values, index=days_elapsed)
    
    
# Premier graphique avec dates réelles
plt.figure(figsize=(15, 12))
sns.set_style("whitegrid")

for label, values in simulations.items():
    start_date = values.index[0]
    color = get_color_scheme(start_date)
    year_quarter = f"{start_date.year}-Q{(start_date.month-1)//3 + 1}"
    plt.plot(values.index, values, label=f"{label} ({year_quarter})", 
             color=color, alpha=0.2)

plt.title(f"Évolution du portefeuille sur {N_SIMULATIONS} simulations ({SIMULATION_YEARS} ans chacune)")
plt.xlabel("Date")
plt.ylabel("Total Portfolio Value (€)")
#plt.legend(loc='upper left', fontsize=8, ncol=2)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

#####@#####@#####@#####@#####@#####@#####@#####@#####@

# Deuxième graphique avec jours alignés
plt.figure(figsize=(15, 12))
sns.set_style("whitegrid")

for label, values in aligned_simulations.items():
    start_date = simulations[label].index[0]
    color = get_color_scheme(start_date)
    year_quarter = f"{start_date.year}-Q{(start_date.month-1)//3 + 1}"
    plt.plot(values.index, values, label=f"{label} ({year_quarter})", 
             color=color, alpha=0.1)

plt.title(f"Évolution du portefeuille alignée sur {N_SIMULATIONS} simulations ({SIMULATION_YEARS} ans chacune)")
plt.xlabel("Jours écoulés")
plt.ylabel("Total Portfolio Value (€)")
#plt.legend(loc='upper left', fontsize=8, ncol=2)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()



#####@#####@#####@#####@#####@#####@#####@#####@#####@


import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

plt.figure(figsize=(15, 12))
sns.set_style("whitegrid")

df_aligned = pd.DataFrame(aligned_simulations)

mean_values = df_aligned.mean(axis=1)
min_values = df_aligned.min(axis=1)
max_values = df_aligned.max(axis=1)
p5_values = df_aligned.quantile(0.05, axis=1)
p10_values = df_aligned.quantile(0.10, axis=1)
p25_values = df_aligned.quantile(0.25, axis=1)
p50_values = df_aligned.quantile(0.50, axis=1)
p75_values = df_aligned.quantile(0.75, axis=1)
p90_values = df_aligned.quantile(0.90, axis=1)
p95_values = df_aligned.quantile(0.95, axis=1)

# Canal valeurs extrêmes (min et max)
plt.fill_between(min_values.index, min_values, max_values, 
                 color='DarkSlateBlue', alpha=1, label="Min/Max")

# Canal 5% - 95%
plt.fill_between(p5_values.index, p5_values, p95_values, 
                 color='Salmon', alpha=1, label="5-95%")

# Canal 10% - 90%
plt.fill_between(p10_values.index, p10_values, p90_values, 
                 color='Pink', alpha=1, label="10-90%")

# Canal quartiles (Q1 et Q3)
plt.fill_between(p25_values.index, p25_values, p75_values, 
                 color='PeachPuff', alpha=1, label="25-75%")

# Médiane
plt.plot(p50_values.index, p50_values.values, 
        label="Médiane", 
        color='DarkSlateGray', linewidth=2, alpha=0.8)

# Moyenne
plt.plot(mean_values.index, mean_values.values, 
        label="Moyenne",
        color='black', linewidth=3, alpha=0.8)

plt.title(f"Évolution du portefeuille alignée sur {N_SIMULATIONS} simulations ({SIMULATION_YEARS} ans chacune)")
plt.xlabel("Jours écoulés")
plt.ylabel("Total Portfolio Value (€)")
plt.xticks(rotation=45)
plt.legend(bbox_to_anchor=(1.05, 1), loc='center left')
plt.tight_layout()
plt.show()




print(f"Temps total d'exécution: {time.time() - start_time1:.2f} secondes")