Imports

In [1]:
import utils.scrape_no_js as bourso
import utils.scrape_js as euronext
import utils.inputs as inputs
import utils.helper_functions as hf

from datetime import datetime, date, timedelta
import pandas as pd
import numpy as np
import yfinance as yf
import tensorflow as tf
from sklearn.preprocessing import StandardScaler

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
np.set_printoptions(threshold=np.inf)

Config

In [2]:
buy_time = 'Open'
loss_limit = 0.997

cumulated_probs_target = 0.85

fee = 0.002

download_days = 500 #should be > 151 = 90 (input days) * 7/5 (days per week) * 1.2 (margin in case of vacations)

target_buy_amount = 2100

base_url_euronext = "https://live.euronext.com/en/product/equities/"
base_url_bourso = "https://www.boursorama.com/cours/"
base_url_fortuneo = "https://bourse.fortuneo.fr/actions/"

model_path = './outputs/classifier_model.keras'
scaler_path = './outputs/scaler.pkl'

In [3]:
tickers_file_path = './db/tickers_euronext_regulated_euro_500k€.xlsx'
df_tickers = pd.read_excel(tickers_file_path)

num_tickers = len(df_tickers)
print(f'number of tickers: {num_tickers}')

number of tickers: 854


Download past data

In [4]:
def get_past_date(days_past):
    today = date.today()
    past_date = today - timedelta(days=days_past)
    formatted_date = past_date.strftime('%Y-%m-%d')
    return formatted_date

def days_difference(date1, date2): # can handle date objects and 'YYYY-MM-DD' strings
    date1 = pd.to_datetime(date1)
    date2 = pd.to_datetime(date2)

    difference = abs((date2 - date1).days)
    return difference

In [5]:
start_date = get_past_date(download_days)
today = get_past_date(0)

print(f'start download date: {start_date}')
print(f'today: {today}')

start download date: 2023-03-14
today: 2024-07-26


In [6]:
yahoo_tickers = df_tickers['yahoo'].iloc[1:num_tickers+1].values.tolist()

download_past_data = yf.download(yahoo_tickers, start=start_date, end=today)
df_download_past = pd.DataFrame(download_past_data)
df_download_past_filled = hf.fillnavalues(df_download_past)
# df_download = df_download.dropna(axis='columns', how='all') #remove columns with only nan values

print(df_download_past_filled.tail(3).to_markdown())


[*********************100%%**********************]  853 of 853 completed

10 Failed downloads:
['VACBT.PA', 'AZRNW.AS', 'VACBS.PA', 'SOLDS.PA', 'ARVBS.PA', 'NACDS.PA']: Exception("%ticker%: Period 'max' is invalid, must be one of ['1d', '5d']")
['BNJW.AS', 'NAIW.AS', 'WTFIN.MI']: Exception('%ticker%: No timezone found, symbol may be delisted')
['ECT.AS']: Exception('%ticker%: Duplicate key 1502866800')


| Date                |   ('Adj Close', 'A2A.MI') |   ('Adj Close', 'AAA.PA') |   ('Adj Close', 'AALB.AS') |   ('Adj Close', 'AB.PA') |   ('Adj Close', 'ABCA.PA') |   ('Adj Close', 'ABEO.PA') |   ('Adj Close', 'ABI.BR') |   ('Adj Close', 'ABN.AS') |   ('Adj Close', 'ABNX.PA') |   ('Adj Close', 'ABO.BR') |   ('Adj Close', 'ABT.MI') |   ('Adj Close', 'ABVX.PA') |   ('Adj Close', 'AC.PA') |   ('Adj Close', 'AC5.MI') |   ('Adj Close', 'ACA.PA') |   ('Adj Close', 'ACAN.PA') |   ('Adj Close', 'ACCB.BR') |   ('Adj Close', 'ACE.MI') |   ('Adj Close', 'ACKB.BR') |   ('Adj Close', 'AD.AS') |   ('Adj Close', 'ADB.MI') |   ('Adj Close', 'ADOC.PA') |   ('Adj Close', 'ADP.PA') |   ('Adj Close', 'ADYEN.AS') |   ('Adj Close', 'AED.BR') |   ('Adj Close', 'AEDES.MI') |   ('Adj Close', 'AEF.MI') |   ('Adj Close', 'AELIS.PA') |   ('Adj Close', 'AF.PA') |   ('Adj Close', 'AFME.PA') |   ('Adj Close', 'AGFB.BR') |   ('Adj Close', 'AGN.AS') |   ('Adj Close', 'AGS.BR') |   ('Adj Close', 'AI.PA') |   ('Adj Clos

In [7]:
last_index = df_download_past_filled.index[-1]
last_download_past_date = last_index.date()
print(f'last download date: {last_download_past_date}')

download_missing_days = days_difference(today, last_download_past_date)
print(f'download missing days: {download_missing_days}')

# if download_missing_days > 1:
#     raise ValueError("Too many download missing days")


last download date: 2024-07-25
download missing days: 1


Download today's data

In [8]:
bourso_tickers = df_tickers['bourso'].iloc[1:num_tickers+1].values.tolist()

df_download_today = bourso.get_theoretical_opening_prices(bourso_tickers)

print(df_download_today)

         ticker  bourso_theor_open_price
0        1rAAGN                      NaN
1       1rAAJAX                      NaN
2      1rAAMUND                   1.2700
3        1rAALX                      NaN
4       1rAAZRN                      NaN
5      1rAAZRNW                      NaN
6       1rABSGR                      NaN
7        1rABNJ                      NaN
8       1rABNJW                      NaN
9       1rABESI                      NaN
10       1rABAI                      NaN
11      1rABAIW                      NaN
12      1rABGHL                      NaN
13      1rABRNL                      NaN
14     1rACABKA                      NaN
15     1rACMCOM                      NaN
16      1rACRBN                      NaN
17      1rACTAC                      NaN
18       1rACVC                      NaN
19       1rADGB                      NaN
20     1rAEAS2P                   0.5780
21      1rAEBUS                      NaN
22      1rAEHCS                   9.1500
23      1rAENVI 

Transform past data

In [9]:
def get_single_level_df(df, ohlcv):
    new_df = df[[ohlcv]]
    new_df = hf.remove_top_column_name(new_df)

    return new_df

def get_ohlcv_dfs(df):
    df_open = get_single_level_df(df, 'Open')
    df_high = get_single_level_df(df, 'High')
    df_low = get_single_level_df(df, 'Low')
    df_close = get_single_level_df(df, 'Close')
    df_volume = get_single_level_df(df, 'Volume')
    
    return {'df_open': df_open, 'df_high': df_high, 'df_low': df_low,
            'df_close': df_close, 'df_volume': df_volume}

In [10]:
list_dfs_ohlcv = get_ohlcv_dfs(df_download_past_filled)
df_transformed_past = get_single_level_df(df_download_past_filled, buy_time)

print(df_transformed_past.tail(3).to_markdown())
print(df_transformed_past.shape)

| Date                |   A2A.MI |   AAA.PA |   AALB.AS |   AB.PA |   ABCA.PA |   ABEO.PA |   ABI.BR |   ABN.AS |   ABNX.PA |   ABO.BR |   ABT.MI |   ABVX.PA |   AC.PA |   AC5.MI |   ACA.PA |   ACAN.PA |   ACCB.BR |   ACE.MI |   ACKB.BR |   AD.AS |   ADB.MI |   ADOC.PA |   ADP.PA |   ADYEN.AS |   AED.BR |   AEDES.MI |   AEF.MI |   AELIS.PA |   AF.PA |   AFME.PA |   AGFB.BR |   AGN.AS |   AGS.BR |   AI.PA |   AIR.PA |   AJAX.AS |   AKE.PA |   AKOM.PA |   AKW.PA |   AKZA.AS |   ALFEN.AS |   ALK.MI |   ALLFG.AS |   ALO.PA |   ALTA.PA |   ALTR.LS |   ALW.MI |   ALX.AS |   AM.PA |   AMG.AS |   AMP.MI |   AMPLI.PA |   AMUN.PA |   AMUND.AS |   ANIM.MI |   ANTIN.PA |   APAM.AS |   ARAMI.PA |   ARCAD.AS |   AREIT.PA |   ARG.PA |   ARGX.BR |   ARIS.MI |   ARN.MI |   ARTE.PA |   ARTO.PA |   ARVBS.PA |   ARVEN.PA |   ASC.MI |   ASCE.BR |   ASM.AS |   ASML.AS |   ASRNL.AS |   ASY.PA |   ATE.PA |   ATEB.BR |   ATEME.PA |   ATLD.PA |   ATO.PA |   AUB.PA |   AUGR.PA |   AURE.PA |   AUTME.MI |   AV.MI 

Transform today's data

In [11]:
mapping_dict = dict(zip(df_tickers['bourso'], df_tickers['yahoo']))

df_download_today['ticker'] = df_download_today['ticker'].map(mapping_dict)

print(df_download_today)


       ticker  bourso_theor_open_price
0      AGN.AS                      NaN
1     AJAX.AS                      NaN
2    AMUND.AS                   1.2700
3      ALX.AS                      NaN
4     AZRN.AS                      NaN
5    AZRNW.AS                      NaN
6     BSGR.AS                      NaN
7      BNJ.AS                      NaN
8     BNJW.AS                      NaN
9     BESI.AS                      NaN
10     BAI.AS                      NaN
11    BAIW.AS                      NaN
12    BGHL.AS                      NaN
13    BRNL.AS                      NaN
14   CABKA.AS                      NaN
15   CMCOM.AS                      NaN
16    CRBN.AS                      NaN
17    CTAC.AS                      NaN
18     CVC.AS                      NaN
19     DGB.AS                      NaN
20   EAS2P.AS                   0.5780
21    EBUS.AS                      NaN
22    EHCS.AS                   9.1500
23    ENVI.AS                      NaN
24     ECT.AS            

In [12]:
df_transformed_today = df_download_today.set_index('ticker').T

today_date = datetime.now().strftime("%Y-%m-%d 00:00:00")
df_transformed_today.index = [today_date]

df_transformed_today = df_transformed_today.reindex(sorted(df_transformed_today.columns), axis='columns')
df_transformed_today = df_transformed_today.rename_axis('Date')

print(df_transformed_today.to_markdown())
print(df_transformed_today.shape)

| Date                |   A2A.MI |   AAA.PA |   AALB.AS |   AB.PA |   ABCA.PA |   ABEO.PA |   ABI.BR |   ABN.AS |   ABNX.PA |   ABO.BR |   ABT.MI |   ABVX.PA |   AC.PA |   AC5.MI |   ACA.PA |   ACAN.PA |   ACCB.BR |   ACE.MI |   ACKB.BR |   AD.AS |   ADB.MI |   ADOC.PA |   ADP.PA |   ADYEN.AS |   AED.BR |   AEDES.MI |   AEF.MI |   AELIS.PA |   AF.PA |   AFME.PA |   AGFB.BR |   AGN.AS |   AGS.BR |   AI.PA |   AIR.PA |   AJAX.AS |   AKE.PA |   AKOM.PA |   AKW.PA |   AKZA.AS |   ALFEN.AS |   ALK.MI |   ALLFG.AS |   ALO.PA |   ALTA.PA |   ALTR.LS |   ALW.MI |   ALX.AS |   AM.PA |   AMG.AS |   AMP.MI |   AMPLI.PA |   AMUN.PA |   AMUND.AS |   ANIM.MI |   ANTIN.PA |   APAM.AS |   ARAMI.PA |   ARCAD.AS |   AREIT.PA |   ARG.PA |   ARGX.BR |   ARIS.MI |   ARN.MI |   ARTE.PA |   ARTO.PA |   ARVBS.PA |   ARVEN.PA |   ASC.MI |   ASCE.BR |   ASM.AS |   ASML.AS |   ASRNL.AS |   ASY.PA |   ATE.PA |   ATEB.BR |   ATEME.PA |   ATLD.PA |   ATO.PA |   AUB.PA |   AUGR.PA |   AURE.PA |   AUTME.MI |   AV.MI 

Transform: Assemble past and today's data

In [13]:
df_transformed = pd.concat([df_transformed_past, df_transformed_today], axis=0)

print(df_transformed.tail(3).to_markdown())

| Date                |   A2A.MI |   AAA.PA |   AALB.AS |   AB.PA |   ABCA.PA |   ABEO.PA |   ABI.BR |   ABN.AS |   ABNX.PA |   ABO.BR |   ABT.MI |   ABVX.PA |   AC.PA |   AC5.MI |   ACA.PA |   ACAN.PA |   ACCB.BR |   ACE.MI |   ACKB.BR |   AD.AS |   ADB.MI |   ADOC.PA |   ADP.PA |   ADYEN.AS |   AED.BR |   AEDES.MI |   AEF.MI |   AELIS.PA |   AF.PA |   AFME.PA |   AGFB.BR |   AGN.AS |   AGS.BR |   AI.PA |   AIR.PA |   AJAX.AS |   AKE.PA |   AKOM.PA |   AKW.PA |   AKZA.AS |   ALFEN.AS |   ALK.MI |   ALLFG.AS |   ALO.PA |   ALTA.PA |   ALTR.LS |   ALW.MI |   ALX.AS |   AM.PA |   AMG.AS |   AMP.MI |   AMPLI.PA |   AMUN.PA |   AMUND.AS |   ANIM.MI |   ANTIN.PA |   APAM.AS |   ARAMI.PA |   ARCAD.AS |   AREIT.PA |   ARG.PA |   ARGX.BR |   ARIS.MI |   ARN.MI |   ARTE.PA |   ARTO.PA |   ARVBS.PA |   ARVEN.PA |   ASC.MI |   ASCE.BR |   ASM.AS |   ASML.AS |   ASRNL.AS |   ASY.PA |   ATE.PA |   ATEB.BR |   ATEME.PA |   ATLD.PA |   ATO.PA |   AUB.PA |   AUGR.PA |   AURE.PA |   AUTME.MI |   AV.MI 

Transform: Fill missing values

In [14]:
df_transformed_filled = hf.fillnavalues(df_transformed)

print(df_transformed_filled.tail(3).to_markdown())
print(df_transformed_filled.shape)

| Date                |   A2A.MI |   AAA.PA |   AALB.AS |   AB.PA |   ABCA.PA |   ABEO.PA |   ABI.BR |   ABN.AS |   ABNX.PA |   ABO.BR |   ABT.MI |   ABVX.PA |   AC.PA |   AC5.MI |   ACA.PA |   ACAN.PA |   ACCB.BR |   ACE.MI |   ACKB.BR |   AD.AS |   ADB.MI |   ADOC.PA |   ADP.PA |   ADYEN.AS |   AED.BR |   AEDES.MI |   AEF.MI |   AELIS.PA |   AF.PA |   AFME.PA |   AGFB.BR |   AGN.AS |   AGS.BR |   AI.PA |   AIR.PA |   AJAX.AS |   AKE.PA |   AKOM.PA |   AKW.PA |   AKZA.AS |   ALFEN.AS |   ALK.MI |   ALLFG.AS |   ALO.PA |   ALTA.PA |   ALTR.LS |   ALW.MI |   ALX.AS |   AM.PA |   AMG.AS |   AMP.MI |   AMPLI.PA |   AMUN.PA |   AMUND.AS |   ANIM.MI |   ANTIN.PA |   APAM.AS |   ARAMI.PA |   ARCAD.AS |   AREIT.PA |   ARG.PA |   ARGX.BR |   ARIS.MI |   ARN.MI |   ARTE.PA |   ARTO.PA |   ARVBS.PA |   ARVEN.PA |   ASC.MI |   ASCE.BR |   ASM.AS |   ASML.AS |   ASRNL.AS |   ASY.PA |   ATE.PA |   ATEB.BR |   ATEME.PA |   ATLD.PA |   ATO.PA |   AUB.PA |   AUGR.PA |   AURE.PA |   AUTME.MI |   AV.MI 

Transform: Fill open, high, low, close, volume today's missing values (nan values)

In [15]:
for key in ['df_high', 'df_low', 'df_close', 'df_volume']:
    list_dfs_ohlcv[key].loc[today_date] = np.nan

print(list_dfs_ohlcv['df_high'].tail(3).to_markdown())
print(list_dfs_ohlcv['df_high'].shape)

| Date                |   A2A.MI |   AAA.PA |   AALB.AS |   AB.PA |   ABCA.PA |   ABEO.PA |   ABI.BR |   ABN.AS |   ABNX.PA |   ABO.BR |   ABT.MI |   ABVX.PA |   AC.PA |   AC5.MI |   ACA.PA |   ACAN.PA |   ACCB.BR |   ACE.MI |   ACKB.BR |   AD.AS |   ADB.MI |   ADOC.PA |   ADP.PA |   ADYEN.AS |   AED.BR |   AEDES.MI |   AEF.MI |   AELIS.PA |   AF.PA |   AFME.PA |   AGFB.BR |   AGN.AS |   AGS.BR |   AI.PA |   AIR.PA |   AJAX.AS |   AKE.PA |   AKOM.PA |   AKW.PA |   AKZA.AS |   ALFEN.AS |   ALK.MI |   ALLFG.AS |   ALO.PA |   ALTA.PA |   ALTR.LS |   ALW.MI |   ALX.AS |   AM.PA |   AMG.AS |   AMP.MI |   AMPLI.PA |   AMUN.PA |   AMUND.AS |   ANIM.MI |   ANTIN.PA |   APAM.AS |   ARAMI.PA |   ARCAD.AS |   AREIT.PA |   ARG.PA |   ARGX.BR |   ARIS.MI |   ARN.MI |   ARTE.PA |   ARTO.PA |   ARVBS.PA |   ARVEN.PA |   ASC.MI |   ASCE.BR |   ASM.AS |   ASML.AS |   ASRNL.AS |   ASY.PA |   ATE.PA |   ATEB.BR |   ATEME.PA |   ATLD.PA |   ATO.PA |   AUB.PA |   AUGR.PA |   AURE.PA |   AUTME.MI |   AV.MI 

Transform: Create inputs

In [16]:
df_inputs = inputs.get_inputs(df_transformed_filled, list_dfs_ohlcv)

last_day_index = df_inputs.index.get_level_values(0)[-1] # Get the last value in the top-level index
df_inputs_last_day = df_inputs.loc[last_day_index]

X_inputs_last_day = df_inputs_last_day.values # Convert DataFrame to numpy array

print(df_inputs_last_day.tail(3).to_markdown())

|         |   input_var_past_90d_future_0d |   input_var_past_60d_future_0d |   input_var_past_30d_future_0d |   input_var_past_10d_future_0d |   input_var_past_5d_future_0d |   input_var_past_2d_future_0d |   input_var_past_1d_future_0d |   input_var_past_close_1d |   input_var_past_high_1d |   input_var_past_low_1d |   input_volume_var_90-1d |   input_volume_var_60-1d |   input_volume_var_30-1d |   input_volume_var_10-1d |   input_volume_var_2-1d |   input_volume_var_3-1d |   input_min_var_past_90d |   input_min_var_past_30d |   input_min_var_past_10d |   input_min_var_past_5d |   input_min_var_past_2d |   input_max_var_past_90d |   input_max_var_past_30d |   input_max_var_past_10d |   input_max_var_past_5d |   input_max_var_past_2d |   input_days_since_min_30d |   input_days_since_min_10d |   input_days_since_max_30d |   input_days_since_max_10d |   input_volatility_30d |   input_volatility_10d |   input_volatility_2d |   input_volume_volatility_90-1d |   input_volume_volatility_30-

In [17]:
scaler = hf.load_object(scaler_path)

X_inputs_last_day_scaled = scaler.transform(X_inputs_last_day)

Load model

In [18]:
model = tf.keras.models.load_model(model_path)

Predict

In [19]:
prediction = model.predict(X_inputs_last_day_scaled)
df_prediction = pd.DataFrame(prediction, columns=['buy_prediction', 'do_not_buy_prediction'], index=df_inputs_last_day.index)
# df_prediction = df_prediction['buy']

df_prediction.drop(columns=['do_not_buy_prediction'], inplace=True)
df_prediction.index.name = 'yahoo_ticker'
df_prediction = df_prediction.sort_values(by='buy_prediction', ascending=False)

print(df_prediction.to_markdown())


[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
| yahoo_ticker   |   buy_prediction |
|:---------------|-----------------:|
| AF.PA          |       0.937324   |
| RCO.PA         |       0.931934   |
| OSE.PA         |       0.921176   |
| NOKIA.PA       |       0.915209   |
| AKZA.AS        |       0.906225   |
| ZEST.MI        |       0.891162   |
| CYAD.BR        |       0.890604   |
| EEMS.MI        |       0.886896   |
| MERY.PA        |       0.871413   |
| IEG.MI         |       0.870548   |
| ADOC.PA        |       0.866305   |
| ERG.MI         |       0.860764   |
| CLARI.PA       |       0.858545   |
| CATG.PA        |       0.855601   |
| ACAN.PA        |       0.846995   |
| ABNX.PA        |       0.813015   |
| ABT.MI         |       0.812117   |
| OVH.PA         |       0.803872   |
| BSD.PA         |       0.799949   |
| ETL.PA         |       0.798389   |
| CPR.MI         |       0.792274   |
| PHIL.MI        |       0.79046    |
| ERF.PA       

In [20]:
df_summary = df_prediction
df_summary['bourso_theor_open_price'] = df_summary.index.map(df_download_today.set_index('ticker')['bourso_theor_open_price'])

df_summary['buy_volume'] = target_buy_amount / df_summary['bourso_theor_open_price']
df_summary['buy_volume'] = df_summary['buy_volume'].fillna(0)
df_summary['buy_volume'] = np.ceil(df_summary['buy_volume']).astype(int)

df_summary['sell_price'] = df_summary['bourso_theor_open_price'] * loss_limit

print(df_summary.head(30).to_markdown())

| yahoo_ticker   |   buy_prediction |   bourso_theor_open_price |   buy_volume |   sell_price |
|:---------------|-----------------:|--------------------------:|-------------:|-------------:|
| AF.PA          |         0.937324 |                   nan     |            0 |   nan        |
| RCO.PA         |         0.931934 |                   nan     |            0 |   nan        |
| OSE.PA         |         0.921176 |                   nan     |            0 |   nan        |
| NOKIA.PA       |         0.915209 |                   nan     |            0 |   nan        |
| AKZA.AS        |         0.906225 |                   nan     |            0 |   nan        |
| ZEST.MI        |         0.891162 |                     0.181 |        11603 |     0.180457 |
| CYAD.BR        |         0.890604 |                   nan     |            0 |   nan        |
| EEMS.MI        |         0.886896 |                     0.203 |        10345 |     0.202391 |
| MERY.PA        |         0.871413 |   

In [21]:
df_summary['bourso'] = df_summary.index.map(df_tickers.set_index('yahoo')['bourso'])
df_summary['euronext'] = df_summary.index.map(df_tickers.set_index('yahoo')['euronext'])
df_summary['fortuneo'] = df_summary.index.map(df_tickers.set_index('yahoo')['fortuneo'])

df_summary['bourso'] = base_url_bourso + df_summary['bourso']
df_summary['euronext'] = base_url_euronext + df_summary['euronext']
df_summary['fortuneo'] = base_url_fortuneo + df_summary['fortuneo']

print(df_summary.head(30).to_markdown())

| yahoo_ticker   |   buy_prediction |   bourso_theor_open_price |   buy_volume |   sell_price | bourso                                      | euronext                                                        | fortuneo                                                                          |
|:---------------|-----------------:|--------------------------:|-------------:|-------------:|:--------------------------------------------|:----------------------------------------------------------------|:----------------------------------------------------------------------------------|
| AF.PA          |         0.937324 |                   nan     |            0 |   nan        | https://www.boursorama.com/cours/1rPAF      | https://live.euronext.com/en/product/equities/FR001400J770-XPAR | https://bourse.fortuneo.fr/actions/cours-air-france-klm-AF-FR001400J770-23        |
| RCO.PA         |         0.931934 |                   nan     |            0 |   nan        | https://www.boursorama.com/c

In [22]:
df_summary_filtered = df_summary[df_summary['buy_prediction'] > cumulated_probs_target]
print(df_summary_filtered.to_markdown())

| yahoo_ticker   |   buy_prediction |   bourso_theor_open_price |   buy_volume |   sell_price | bourso                                     | euronext                                                        | fortuneo                                                                       |
|:---------------|-----------------:|--------------------------:|-------------:|-------------:|:-------------------------------------------|:----------------------------------------------------------------|:-------------------------------------------------------------------------------|
| AF.PA          |         0.937324 |                   nan     |            0 |   nan        | https://www.boursorama.com/cours/1rPAF     | https://live.euronext.com/en/product/equities/FR001400J770-XPAR | https://bourse.fortuneo.fr/actions/cours-air-france-klm-AF-FR001400J770-23     |
| RCO.PA         |         0.931934 |                   nan     |            0 |   nan        | https://www.boursorama.com/cours/1rPRCO 

Theoretical opening price confirmation using Euronext

In [27]:
yahoo_tickers_list = df_summary_filtered.index.to_list()
mapping_dict = dict(zip(df_tickers['yahoo'], df_tickers['euronext']))
euronext_tickers_list =  [mapping_dict.get(ticker) for ticker in yahoo_tickers_list]
print(euronext_tickers_list)

df_euronext = euronext.get_theoretical_opening_prices(euronext_tickers_list)
print(df_euronext)

['FR001400J770-XPAR', 'FR0000130395-XPAR', 'FR0012127173-XPAR', 'FI0009000681-XPAR', 'NL0013267909-XAMS', 'IT0005013013-MTAA', 'BE0974260896-XBRU', 'IT0005577868-MTAA', 'FR0010241638-XPAR', 'IT0003411417-MTAA', 'FR0011184241-XPAR', 'IT0001157020-MTAA', 'FR0000130692-XPAR', 'FR0010193052-XPAR']


In [None]:
mapping_dict = dict(zip(df_tickers['euronext'], df_tickers['yahoo']))
df_euronext['yahoo_ticker'] = df_euronext['ticker'].map(mapping_dict)
df_euronext = df_euronext.set_index('yahoo_ticker')

print(df_euronext)

In [None]:
df_summary_filtered.loc[:, 'euronext_theor_open_price'] = df_summary_filtered.index.map(df_euronext['euronext_theor_open_price'])

cols = list(df_summary_filtered.columns)
cols.insert(2, cols.pop(cols.index('euronext_theor_open_price')))
df_summary_filtered = df_summary_filtered[cols]

print(df_summary_filtered.to_markdown())

Evaluate

In [None]:
# df_open = list_dfs_ohlcv['df_open']
# df_close = list_dfs_ohlcv['df_close']

# df_open_last_day = df_open.iloc[-1]
# df_close_last_day = df_close.iloc[-1]

# df_real = pd.DataFrame()
# df_real['variation'] = df_close_last_day / df_open_last_day

In [None]:
# df_low = list_dfs_ohlcv['df_low']
# df_low_last_day = df_low.loc[last_day_index]
# df_real['lowest_variation'] = df_low_last_day / df_open_last_day

# df_real['profit'] = df_real['variation']
# df_real.loc[df_real['lowest_variation'] < loss_limit, 'profit'] = loss_limit
# df_real['profit'] *= hf.get_fee_coef(fee)

In [None]:
# df_evaluation = pd.concat([df_prediction, df_real], axis='columns')
# print(df_evaluation)

In [None]:
# df_filtered_evaluation = df_evaluation[df_evaluation['buy'] > cumulated_probs_target]

# count = df_filtered_evaluation['profit'].count()
# average = df_filtered_evaluation['profit'].mean()
# median = df_filtered_evaluation['profit'].median()

# print(f'count: {count}')
# print(f'average profit: {average}')
# print(f'median profit: {median}')

In [None]:
# import matplotlib.pyplot as plt

# plt.figure(figsize=(12, 7))
# plt.scatter(x=df_filtered_evaluation['buy'], y=df_filtered_evaluation['variation'], color='blue', alpha=0.3, s=5)
# plt.title('variation vs buy prediction')
# plt.xlabel('buy prediction')
# plt.ylabel('real variation')
# plt.grid(True)
# plt.show()