In [64]:
import yfinance as yf
import matplotlib as plt
import indicators
from indicators import *
import params
from params import *
import os
import importlib
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np

importlib.reload(params)
importlib.reload(indicators)

<module 'indicators' from '/Users/romaindesmeulemester/ProjetMachineLearning/ProjetMachineLearning/indicators.py'>

In [66]:
def extract_data(ticker, name, start_date, end_date):
    data = yf.download(ticker, start=start_date, end=end_date, interval="1d")
    data = data.dropna()
    data = data[data["Volume"] > 0]
    data["Variation"] = (data["Close"].diff() / data["Close"].shift()) * 100
    if isinstance(data.columns, pd.MultiIndex):
        data.columns = data.columns.get_level_values(0)
    data["Date"] = data.index
    data = data.dropna()
    data.columns.name = None
    data.insert(0, 'Indice', name)

    return data

In [65]:
def add_indicators(data, params_ma, params_ema, params_rsi, params_macd, params_std, params_bollinger):
    for x in params_ma:
        col_name = "MA_" + str(x)
        data[col_name] = MA(data["Close"], x)
    for x in params_ema:
        col_name = "EMA_" + str(x)
        data[col_name] = EMA(data["Close"], x)
    for x in params_rsi:
        col_name = "RSI_" + str(x)
        data[col_name] = RSI(data["Close"], x)
    for x in params_std:
        col_name = "STD_" + str(x)
        data[col_name] = STD(data["Close"], x)
    data["MACD"], data["Signal"] = MACD(data["Close"], params_macd[0], params_macd[1], params_macd[2])
    data["BollingerBandUpper"], data["BollingerBandLower"] = BollingerBands(data["Close"], params_bollinger[0], params_bollinger[1])

    return data

In [68]:
if isimport:
    base_list = []
    for name, ticker in indices.items():
        data = extract_data(ticker, name, start_date, end_date)
        data = add_indicators(data, params_ma, params_ema, params_rsi, params_macd, params_std, params_bollinger)
        data = data.dropna()
        base_list.append(data)

    base_clean = pd.concat(base_list)
    base_clean = base_clean.reset_index(drop=True)

    base_dir = os.path.join(os.getcwd(), "base")
    file_path = osgit.path.join(base_dir, "base_clean.csv")
    base_clean.to_csv(file_path)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


R√©gression lin√©aire

In [42]:
BaseTotal = BaseTotal.reset_index(drop=True)

# S√©paration des features et de la cible
X = BaseTotal.drop(columns=["V_t", "Indice", "Date"])
Y = BaseTotal["V_t"]

# Division en train et test
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)

# Cr√©ation et entra√Ænement du mod√®le
model = LinearRegression()
model.fit(X_train, Y_train)

# Affichage des coefficients
coefficients_lin√©aire = pd.DataFrame(np.append(model.intercept_, model.coef_),
                                    index=["Intercept"] + list(X_train.columns),
                                    columns=["Coefficients (Sklearn)"])

# Pr√©dictions avec Scikit-Learn
Y_pred = model.predict(X_test)

# Affichage des premi√®res valeurs r√©elles vs pr√©dites
resultats = pd.DataFrame({"Valeur r√©elle": Y_test.values, "Pr√©diction": Y_pred})
display(resultats.head(10))

mse = mean_squared_error(Y_test, Y_pred)
mae = mean_absolute_error(Y_test, Y_pred)
r2 = r2_score(Y_test, Y_pred)

print(f"MSE : {mse:.4f}")
print(f"MAE : {mae:.4f}")
print(f"R¬≤ : {r2:.4f}")

Unnamed: 0,Valeur r√©elle,Pr√©diction
0,0.256694,0.201182
1,0.272819,-0.254252
2,-1.194333,-0.404818
3,1.393708,-0.156965
4,0.40889,0.067969
5,1.767805,2.741704
6,1.010118,1.991248
7,0.611322,0.218438
8,-0.04719,0.008137
9,5.458929,0.937261


MSE : 0.9071
MAE : 0.6095
R¬≤ : 0.4504


In [43]:
# Ajout de la colonne de biais (intercept)
X_train_b = np.c_[np.ones((X_train.shape[0], 1)), X_train]
X_test_b = np.c_[np.ones((X_test.shape[0], 1)), X_test]

# Calcul des coefficients via OLS
theta_best = np.linalg.inv(X_train_b.T @ X_train_b) @ X_train_b.T @ Y_train

# Affichage des coefficients
coefficients_OLS = pd.DataFrame(theta_best, index=["Intercept"] + list(X_train.columns), columns=["Coefficients"])

# Pr√©diction sur l'ensemble de test
Y_pred_OLS = X_test_b @ theta_best

# Affichage des premi√®res pr√©dictions vs vraies valeurs
resultats_OLS = pd.DataFrame({"Valeur r√©elle": Y_test.values, "Pr√©diction OLS": Y_pred_OLS.flatten()})

display(resultats_OLS.head(10))

# Calcul des m√©triques d'√©valuation
mse_OLS = mean_squared_error(Y_test, Y_pred_OLS)
mae_OLS = mean_absolute_error(Y_test, Y_pred_OLS)
r2_OLS = r2_score(Y_test, Y_pred_OLS)

# Affichage des r√©sultats
print(f"üìä Performance du mod√®le OLS :")
print(f"MSE (Erreur quadratique moyenne) : {mse_OLS:.4f}")
print(f"MAE (Erreur absolue moyenne) : {mae_OLS:.4f}")
print(f"R¬≤ Score : {r2_OLS:.4f}")

Unnamed: 0,Valeur r√©elle,Pr√©diction OLS
0,0.256694,0.201174
1,0.272819,-0.254344
2,-1.194333,-0.404947
3,1.393708,-0.15705
4,0.40889,0.067881
5,1.767805,2.741773
6,1.010118,1.99127
7,0.611322,0.218396
8,-0.04719,0.00812
9,5.458929,0.937186


üìä Performance du mod√®le OLS :
MSE (Erreur quadratique moyenne) : 0.9071
MAE (Erreur absolue moyenne) : 0.6095
R¬≤ Score : 0.4504


In [147]:
# üîπ 1. Pr√©paration des donn√©es
BaseTotal = BaseTotal.reset_index(drop=True)

# S√©paration des features et de la cible
X = BaseTotal.drop(columns=["V_t", "Indice", "Date"])  # Features
Y = BaseTotal["V_t"]  # Cible

# Division en train et test
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)

# Standardisation des features pour √©viter l'explosion des valeurs
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)  
X_test_scaled = scaler.transform(X_test)

# Ajout de la colonne de biais (intercept)
X_train_b = np.c_[np.ones((X_train_scaled.shape[0], 1)), X_train_scaled]
X_test_b = np.c_[np.ones((X_test_scaled.shape[0], 1)), X_test_scaled]

# üîπ 2. Initialisation des poids (param√®tres)
n_features = X_train_b.shape[1]  # Nombre de variables + biais
theta = np.random.randn(n_features, 1) * 0.01  # Petites valeurs pour √©viter la divergence

# Conversion de Y_train en matrice NumPy
Y_train_np = Y_train.values.reshape(-1, 1)

# üîπ 3. Descente de gradient
alpha = 0.01  # Taux d'apprentissage
n_iterations = 5000  # Nombre d'it√©rations
m = X_train_b.shape[0]  # Nombre d'exemples
losses = []  # Liste pour stocker l'√©volution de la perte

for iteration in range(n_iterations):
    # Calcul des pr√©dictions
    Y_pred_train = X_train_b @ theta  
    
    # Calcul des erreurs
    errors = Y_pred_train - Y_train_np  
    
    # Calcul des gradients
    gradients = (2/m) * X_train_b.T @ errors  
    
    # Mise √† jour des poids
    theta -= alpha * gradients  
    
    # Calcul de la perte (MSE)
    mse = np.mean(errors**2)
    losses.append(mse)
    
    # Arr√™t pr√©coce si la perte devient stable
    if iteration > 1 and abs(losses[-1] - losses[-2]) < 1e-10:
        print(f"‚úÖ Convergence atteinte √† l'it√©ration {iteration}")
        break

# üîπ 4. Pr√©diction avec le mod√®le entra√Æn√©
Y_pred_GD = X_test_b @ theta  

# üîπ 5. √âvaluation du mod√®le
mse_GD = mean_squared_error(Y_test, Y_pred_GD)
mae_GD = mean_absolute_error(Y_test, Y_pred_GD)
r2_GD = r2_score(Y_test, Y_pred_GD)

print(f"\nüìä Performance du mod√®le (Descente de Gradient) :")
print(f"MSE : {mse_GD:.4f}")
print(f"MAE : {mae_GD:.4f}")
print(f"R¬≤ : {r2_GD:.4f}")

# üîπ Affichage des coefficients trouv√©s
coefficients_GD = pd.DataFrame(theta, index=["Intercept"] + list(X_train.columns), columns=["Coefficients (GD)"])
display(coefficients_GD)

resultats_GD = pd.DataFrame({"Valeur r√©elle": Y_test.values, "Pr√©diction GD": Y_pred_GD.flatten()})
display(resultats_GD.head(10))


üìä Performance du mod√®le (Descente de Gradient) :
MSE : 0.9189
MAE : 0.6364
R¬≤ : 0.0462


Unnamed: 0,Coefficients (GD)
Intercept,0.03454
Close,0.456194
High,0.094865
Low,0.126505
Open,-0.238394
Volume,-0.024382
MA5,-0.117899
MA15,-0.064718
EMA5,-0.030761
EMA15,-0.08455


Unnamed: 0,Valeur r√©elle,Pr√©diction GD
0,0.511033,-0.0543
1,-0.1019,0.081085
2,-2.9537,-0.248613
3,0.209032,0.414975
4,1.171289,0.134744
5,-0.93278,-0.373194
6,-0.34814,-0.236436
7,0.516013,0.038935
8,0.438335,-0.259218
9,-0.008946,-0.067578
