In [None]:
import pandas as pd
from bs4 import BeautifulSoup
from seleniumbase import Driver
from io import StringIO
import time
from datetime import datetime
from unidecode import unidecode
import database as db
import pymongo
from discord_webhook import DiscordWebhook
import config

In [None]:
urls = {
    'EBM': 'https://live.euronext.com/fr/product/commodities-futures/EBM-DPAR/settlement-prices',
    'EMA': 'https://live.euronext.com/fr/product/commodities-futures/EMA-DPAR/settlement-prices',
    'ECO': 'https://live.euronext.com/fr/product/commodities-futures/ECO-DPAR/settlement-prices'
}

months = {
    'FEV': 'FEB',
    'MAI': 'MAY',
    'JUIN': 'JUN',
    'AOU': 'AUG'
}
RESPONSE = '####### EURONEXT FUTURES DATA #######\n'

In [None]:
def insert_db(df):
    dbname = db.get_database()
    collection_name_france = dbname["futures"]
    data = df.to_dict('records')
    last_doc_france = collection_name_france.find_one(
            sort=[( 'Date', pymongo.DESCENDING )]
        )
    if last_doc_france is not None:
            if not df.empty:
                if df['Date'].iloc[0] != last_doc_france['Date']:
                    inserted = str(collection_name_france.insert_many(data))
                    RESPONSE += inserted
                else:
                    RESPONSE += 'Euronext Futures : Document non inséré, doublon date avec le dernier document en base.'
            else:
                RESPONSE += 'NO DATA TO IMPORT TODAY, EMPTY DATAFRAME'
    return RESPONSE

In [59]:
def maturity_to_expiration(series, months_map=months):
    strs = series.apply(unidecode).str.upper()
    month = strs.str.strip().str.split().str[0]
    year = strs.str.strip().str.split().str[1].str[-2:]
    mapped = month.map(months_map).fillna(month)
    expi = mapped+year
    return expi
    

In [4]:
def scrapper(url):
    driver = Driver(uc=True, incognito=True, headless=True) #set driver
    driver.get(url)
    driver.wait_for_element(".table")
    html = driver.page_source #get html code from page
    soup = BeautifulSoup(html, 'lxml')
    table = soup.find('table', class_='table') #find the datatable
    tmp = pd.read_html(StringIO(str(table)))[0].iloc[:-1] #prep dataframe form table
    driver.quit()
    return tmp

In [None]:
def clean_scrapped(urls):
    df_lists = []
    for idx, item in urls.items(): #loop throught tickers and URLs
        retry = 0
        while retry < 5:  # Retry up to 5 times
            tmp = scrapper(item) #set scrapped df in tmp
            if not tmp.empty: # if tmp not empty meaning we scrapped something
                if len(tmp['Compens.'].unique()) == 1 or 'nan' in tmp['Compens.'].astype(str).values: #if only one unique compensation, it means it is either full nan values or not the full values -> we retry, or if their is at least one nan value in compens. -> we retry 
                    #retry
                    print(f"{idx} full data not received, retrying...")
                    time.sleep(300) #5m sleep
                    retry += 1
                    continue  # Retry the current iteration
                else:
                    #we good,
                    tmp['Ticker'] = idx #add ticker to df
                    tmp['Date'] = datetime.today().strftime('%Y-%m-%d')
                    df_lists.append(tmp)
                    break
            else: #if there is nothing scrapped
                RESPONSE += f"Error scrapping data, get empty dataframe for {idx}"
                break
        else:
            # Exceeded max retries, skip this item
            RESPONSE += f'Skipping {idx} after max retries, no full data found\n'
       
    df = pd.concat(df_lists) #concat into one df
    return df

In [None]:
df = clean_scrapped(urls).reset_index(drop=True)
df['Expiration'] = maturity_to_expiration(df['Maturité'])
df = df.rename(columns={'Ouvert': 'Open', 'Haut': 'High', 'Bas': 'Low', 'Compens.': 'Close', 'Position ouverte': 'Open Interest'})
df = df[['Date', 'Ticker', 'Expiration', 'Open', 'High', 'Low', 'Close', 'Volume', 'Open Interest']]

Unnamed: 0,Maturité,Ouvert,Haut,Bas,Dernier,Différence,Compens.,Volume,Position ouverte,Ticker,Date,Expiration
0,déc 2024,210.0,212.75,209.75,210.5,1.0,210.75,41352,119654,EBM,2024-11-14,DEC24
1,Mar 2025,220.5,223.0,219.5,219.75,-0.5,219.75,60681,287324,EBM,2024-11-14,MAR25
2,mai 2025,225.75,227.5,224.75,224.75,-0.75,225.0,17557,100746,EBM,2024-11-14,MAY25
3,Sep 2025,218.25,219.25,217.0,217.0,-1.25,217.25,4796,56906,EBM,2024-11-14,SEP25
4,déc 2025,224.0,224.75,222.25,222.25,-1.75,222.5,2511,39205,EBM,2024-11-14,DEC25
5,Mar 2026,228.0,228.5,228.0,228.5,0.25,226.75,31,963,EBM,2024-11-14,MAR26
6,mai 2026,231.0,231.0,231.0,231.0,0.0,229.25,1,130,EBM,2024-11-14,MAY26
7,Sep 2026,0.0,0.0,0.0,0.0,,228.25,0,15,EBM,2024-11-14,SEP26
8,déc 2026,0.0,0.0,0.0,0.0,,231.25,0,10,EBM,2024-11-14,DEC26
9,Mar 2027,0.0,0.0,0.0,0.0,,236.25,0,0,EBM,2024-11-14,MAR27


In [None]:
r = insert_db(df) #insert to db
#Logs into my Discord server to be keep track of bugs 
webhook = DiscordWebhook(url=config.discordLogWebhookUrl, content=r)
response = webhook.execute()