In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import bt
import datetime as dt
import yfinance as yf
from sklearn.preprocessing import StandardScaler

from tensorflow.python.keras.saving.save import load_model

# Import des données

In [2]:
daily_data = pd.DataFrame()
data_SP500 = pd.read_parquet('/Users/forget/Library/Mobile Documents/com~apple~CloudDocs/Project Stock Market Deep Learning/Data/data_SP500.parquet')
data_NDX = pd.read_parquet('/Users/forget/Library/Mobile Documents/com~apple~CloudDocs/Project Stock Market Deep Learning/Data/data_NASDAQ.parquet')
data_MP = pd.read_parquet('/Users/forget/Library/Mobile Documents/com~apple~CloudDocs/Project Stock Market Deep Learning/Data/data_MP.parquet')
data_Crypto = pd.read_parquet('/Users/forget/Library/Mobile Documents/com~apple~CloudDocs/Project Stock Market Deep Learning/Data/data_Crypto.parquet')
daily_data = pd.concat([data_SP500, data_NDX, data_MP, data_Crypto], ignore_index=True)
daily_data = daily_data.drop_duplicates(subset=['Ticker', 'Date'])

daily_data = daily_data.drop(columns=['Adj Close'])
# trier la data pour préparer la fusion
daily_data = daily_data.sort_values(by=['Date'], ascending=[True])

#Date au format Date
daily_data['Date'] = pd.to_datetime(daily_data['Date'])

# Définir la colonne 'Date' comme index
daily_data.set_index('Date', inplace=True)
daily_data

Price,Close,High,Low,Open,Volume,Ticker
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1976-07-01,0.409758,0.414986,0.409758,0.414986,1.274248e+06,BMY
1976-07-01,0.385818,0.391612,0.385818,0.386977,3.443590e+05,ETN
1976-07-01,0.484358,0.512849,0.484358,0.484358,1.697500e+04,PNR
1976-07-01,0.375415,0.378107,0.374070,0.375415,3.724800e+06,XOM
1976-07-01,2.685385,2.718860,2.685385,2.711421,6.024000e+05,CAT
...,...,...,...,...,...,...
2025-10-01,61.599998,62.889999,61.570000,62.459999,9.756500e+04,CL=F
2025-10-01,47.575001,47.825001,46.814999,46.834999,5.440100e+04,SI=F
2025-10-01,3896.100098,3922.699951,3880.300049,3887.699951,1.647970e+05,GC=F
2025-10-01,4.847000,4.888000,4.816000,4.879000,1.650100e+04,HG=F


In [3]:
weekly_data = daily_data.groupby("Ticker").resample('W').agg({
    'Open': 'first',  # Premier prix d'ouverture du mois
    'High': 'max',  # Plus haut du mois
    'Low': 'min',  # Plus bas du mois
    'Close': 'last',  # Dernier prix de clôture du mois
    'Volume': 'sum',  # Somme du volume sur le mois
}).reset_index()

# Features

In [4]:
# Calculer le rendement pour chaque Ticker avec un décalage d'un mois
weekly_data['Return'] = (weekly_data['Close'] / weekly_data['Open']) - 1

# Paramètres
liste_ma = [9, 20, 25, 50, 100]

# Calcul des moyennes mobiles pour chaque fenêtre
for window in liste_ma:
    # Calcul de la moyenne mobile pour chaque 'Ticker'
    ma_column = f'ma_{window}'
    weekly_data[ma_column] = weekly_data.groupby('Ticker')['Close'].transform(
        lambda s: s.rolling(window, min_periods=1).mean())

liste_ma_1 = [9, 25, 50, 100]
liste_ma_2 = [9, 25, 50, 100]

for window in liste_ma_1:
    for window_2 in liste_ma_2:

        # Calcul de la moyenne mobile pour chaque 'Ticker'
        ma_column_1 = f'ma_{window}'
        ma_column_2 = f'ma_{window_2}'
        distance_ma_column = f'distance_ma_{window_2}/{window}'

        if ma_column_1 != ma_column_2:
            # Calcul du pourcentage d'écart entre le prix et la moyenne mobile
            weekly_data[distance_ma_column] = weekly_data[ma_column_2] / weekly_data[ma_column_1]

list_window = [4, 8, 12, 26, 52]  # Liste de valeurs de top_n pour la sélection des meilleurs

# Calcul des performances pour chaque fenêtre
for window in list_window:
    # Nom dynamique de la colonne pour la performance
    roc_column = f'roc_{window}'

    # Calcul de la performance pour chaque 'Ticker' sur la fenêtre spécifiée
    weekly_data[roc_column] = weekly_data.groupby('Ticker')['Close'].pct_change(window)

liste_ma = [9, 25]
for window in liste_ma:
    # Calcul de la moyenne mobile pour chaque 'Ticker'
    distance_price_column = f'distance_price_{window}'
    ma_column = f'ma_{window}'

    # Calcul du pourcentage d'écart entre le prix et la moyenne mobile
    weekly_data[distance_price_column] = weekly_data['Close'] / weekly_data[ma_column]

weekly_data['Year'] = weekly_data['Date'].dt.year

# Close d’ancrage (première observation de l'année par Ticker)
anchor_close = weekly_data.groupby(['Ticker', 'Year'])['Close'].transform('first')

# Perf YTD (depuis le début d’année)
weekly_data['perf_ytd'] = weekly_data['Close'] / anchor_close - 1

list_window = [4, 8, 12, 26, 52]  # Liste de valeurs de top_n pour la sélection des meilleurs

# Calcul des performances pour chaque fenêtre
for window in list_window:
    # Nom dynamique de la colonne pour la performance
    MAV_column = f'Momentum_Ajusted_Vol_{window}'
    Perf_column = f'Perf_{window}'
    Vol_column = f'Vol_{window}'

    # Calcul de la performance pour chaque 'Ticker' sur la fenêtre spécifiée
    weekly_data[Perf_column] = weekly_data.groupby('Ticker')['Close'].pct_change(window)
    weekly_data[Vol_column] = weekly_data.groupby('Ticker')['Return'].rolling(window=window).std().reset_index(level=0,
                                                                                                               drop=True)
    weekly_data[MAV_column] = weekly_data[Perf_column] / weekly_data[Vol_column]

period = 25

# Variation quotidienne
weekly_data['delta'] = weekly_data.groupby('Ticker')['Close'].diff()

# Gains et pertes
weekly_data['gains'] = weekly_data['delta'].clip(lower=0)
weekly_data['losses'] = -weekly_data['delta'].clip(upper=0)

# Moyenne mobile simple sur 'period' jours
weekly_data['avg_gain'] = (
    weekly_data.groupby('Ticker')['gains']
    .transform(lambda x: x.rolling(window=period, min_periods=period).mean())
)
weekly_data['avg_losses'] = (
    weekly_data.groupby('Ticker')['losses']
    .transform(lambda x: x.rolling(window=period, min_periods=period).mean())
)

# RS et RSI
weekly_data['rs'] = weekly_data['avg_gain'] / weekly_data['avg_losses']
weekly_data['rsi'] = 100 - (100 / (1 + weekly_data['rs']))

weekly_data = weekly_data.dropna()

  weekly_data[roc_column] = weekly_data.groupby('Ticker')['Close'].pct_change(window)
  weekly_data[roc_column] = weekly_data.groupby('Ticker')['Close'].pct_change(window)
  weekly_data[roc_column] = weekly_data.groupby('Ticker')['Close'].pct_change(window)
  weekly_data[roc_column] = weekly_data.groupby('Ticker')['Close'].pct_change(window)
  weekly_data[roc_column] = weekly_data.groupby('Ticker')['Close'].pct_change(window)
  weekly_data[Perf_column] = weekly_data.groupby('Ticker')['Close'].pct_change(window)
  weekly_data[Perf_column] = weekly_data.groupby('Ticker')['Close'].pct_change(window)
  weekly_data[Perf_column] = weekly_data.groupby('Ticker')['Close'].pct_change(window)
  weekly_data[Perf_column] = weekly_data.groupby('Ticker')['Close'].pct_change(window)
  weekly_data[Perf_column] = weekly_data.groupby('Ticker')['Close'].pct_change(window)


In [5]:
start_date = "2023-01-01"
daily_data = daily_data.reset_index()

daily_data = daily_data[daily_data['Date'] >= start_date]
weekly_data = weekly_data[weekly_data['Date'] >= start_date]

weekly_data = weekly_data.replace([np.inf, -np.inf], 0)
#weekly_data = weekly_data.drop(columns=['Close', 'Open', 'High', 'Low', 'Volume'])
# Remplacer les valeurs NaN par 0
weekly_data = weekly_data.dropna()

weekly_data

Price,Ticker,Date,Open,High,Low,Close,Volume,Return,ma_9,ma_20,...,Perf_52,Vol_52,Momentum_Ajusted_Vol_52,delta,gains,losses,avg_gain,avg_losses,rs,rsi
1206,A,2023-01-01,146.293955,148.911432,144.401934,146.924484,3218000.0,0.004310,146.456612,135.914009,...,-0.055227,0.038785,-1.423941,0.630508,0.630508,-0.000000,2.812483,1.731191,1.624595,61.898878
1207,A,2023-01-08,149.192433,151.823620,140.405427,144.980560,6821300.0,-0.028231,147.743044,136.429072,...,0.024055,0.036933,0.651316,-1.943924,0.000000,1.943924,2.812483,1.664893,1.689287,62.815425
1208,A,2023-01-15,146.963795,155.711495,144.519139,154.062103,5808000.0,0.048300,148.706380,137.777431,...,0.091737,0.037442,2.450088,9.081543,9.081543,-0.000000,2.932653,1.664893,1.761466,63.787345
1209,A,2023-01-22,153.826435,155.279515,149.369176,153.080322,5114000.0,-0.004850,149.791550,139.167714,...,0.141342,0.037063,3.813527,-0.981781,0.000000,0.981781,2.577996,1.704165,1.512762,60.203157
1210,A,2023-01-29,153.158879,156.683493,150.046603,152.854507,4255100.0,-0.001987,149.678507,140.075985,...,0.143400,0.037076,3.867760,-0.225815,0.000000,0.225815,2.577996,1.694016,1.521825,60.346181
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
811140,ZTS,2025-09-07,154.979996,155.380005,151.350006,153.320007,7327400.0,-0.010711,152.260398,156.793997,...,-0.179912,0.028696,-6.269657,-3.079987,0.000000,3.079987,1.600110,1.874591,0.853578,46.050290
811141,ZTS,2025-09-14,152.500000,152.970001,146.490005,148.199997,14305100.0,-0.028197,151.566667,156.556070,...,-0.215014,0.028891,-7.442213,-5.120010,0.000000,5.120010,1.529783,2.079391,0.735688,42.385958
811142,ZTS,2025-09-21,148.080002,149.750000,145.149994,145.880005,15159000.0,-0.014857,151.264445,156.004305,...,-0.233520,0.028920,-8.074670,-2.319992,0.000000,2.319992,1.526604,2.172191,0.702795,41.273007
811143,ZTS,2025-09-28,146.550003,146.710007,139.339996,143.500000,15582400.0,-0.020812,150.306668,155.242344,...,-0.254423,0.028938,-8.791912,-2.380005,0.000000,2.380005,1.526604,1.815234,0.840996,45.681571


# Model

In [6]:
from tensorflow.keras.models import load_model

In [7]:
scaler = StandardScaler()
best_model = load_model("bot/best_model.h5")

X = weekly_data[[
    'distance_ma_50/9',
    'distance_ma_100/9',
    'distance_ma_9/25',
    'distance_ma_50/25',
    'distance_ma_100/25',
    'distance_ma_25/50',
    'distance_ma_100/50',
    'roc_8',
    'roc_52',
    'Vol_52',
    'Momentum_Ajusted_Vol_52',
    'perf_ytd']]

X_scale_test = scaler.fit_transform(X)
weekly_data['Proba'] = best_model.predict(X_scale_test).ravel()




[1m2365/2365[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 370us/step


In [8]:
weekly_data['Prediction'] = (weekly_data['Proba'] > 0.8).astype(int)
weekly_data['Prediction'].value_counts()

Prediction
0    74086
1     1567
Name: count, dtype: int64

# Backtest

### Preparation

In [9]:
# Pivot pour obtenir les prix hebdomadaires par ticker
price_data = weekly_data.pivot(index='Date', columns='Ticker', values='Close').sort_index()

# Pivot pour obtenir les signaux hebdomadaires par ticker
signals = weekly_data.pivot(index='Date', columns='Ticker', values='Prediction').fillna(0)

### Backtest

In [24]:
import bt
import pandas as pd

# Exemple : ton DataFrame de signaux binaires par date et ticker
# signals = pd.DataFrame(...)

class BinarySignalAlgo(bt.Algo):
    def __call__(self, target):
        if target.now not in signals.index:
            return False

        signal_today = signals.loc[target.now]
        selected = signal_today[signal_today == 1.0]

        if not selected.empty:
            weights = pd.Series(1 / len(selected), index=selected.index)
        else:
            weights = pd.Series(0, index=signal_today.index)

        print(f"[{target.now.date()}] Weights:\n{weights}\n")
        target.temp['weights'] = weights
        return True

# === ALGO POUR VIDER LE PORTEFEUILLE À CHAQUE PÉRIODE ===

class ClearPositions(bt.Algo):
    def __call__(self, target):
        tickers = target.universe
        weights = pd.Series(0, index=tickers)
        target.temp['weights'] = weights
        return True

# === STRATÉGIE AVEC LIQUIDATION À CHAQUE REBALANCEMENT ===

strategy = bt.Strategy(
    'BinarySignalStrategy',
    [
        bt.algos.RunWeekly(),
        bt.algos.SelectAll(),
        ClearPositions(),           # on vend tout chaque semaine
        BinarySignalAlgo(),         # on génère les nouveaux poids
        bt.algos.WeighSpecified(),  # on applique ces poids
        bt.algos.Rebalance()        # on exécute les ordres
    ]
)

# === BACKTEST ===

portfolio = bt.Backtest(strategy, price_data, initial_capital=1000)
result = bt.run(portfolio)

# === AFFICHAGE ===

# Résumé
result.display()

# Courbe de performance
result.plot()

# Transactions
transactions = result.get_transactions('BinarySignalStrategy')
print("\n🧾 Transactions effectuées :")
print(transactions.dropna(how='all').head())

  0%|          | 0/1 [00:00<?, ?it/s]


ValueError: Index data must be 1-dimensional