In [15]:
import pandas as pd
from bs4 import BeautifulSoup
from seleniumbase import Driver
from io import StringIO
import time
from datetime import datetime
from unidecode import unidecode
import database as db
import pymongo
from discord_webhook import DiscordWebhook
import config

In [16]:
URLS = {
    'EBM': ['https://live.euronext.com/fr/product/commodities-futures/EBM-DPAR/settlement-prices', 12],
    'EMA': ['https://live.euronext.com/fr/product/commodities-futures/EMA-DPAR/settlement-prices', 10],
    'ECO': ['https://live.euronext.com/fr/product/commodities-futures/ECO-DPAR/settlement-prices', 10]
}
MONTHS = {
    'FEV': 'FEB',
    'MAI': 'MAY',
    'JUIN': 'JUN',
    'AOU': 'AUG'
}
RESPONSE = '@everyone \n**####### NEW EURONEXT FUTURES DATA #######\n**'

In [17]:
def insert_db(df, RESPONSE=RESPONSE):
    dbname = db.get_database()
    collection_name_france = dbname["futures"]
    data = df.to_dict('records')
    inserted = str(collection_name_france.insert_many(data))
    RESPONSE += inserted
    return RESPONSE

In [18]:
def maturity_to_expiration(series, months_map=MONTHS):
    strs = series.apply(unidecode).str.upper()
    month = strs.str.strip().str.split().str[0]
    year = strs.str.strip().str.split().str[1].str[-2:]
    mapped = month.map(months_map).fillna(month)
    expi = mapped+year
    return expi
    

In [19]:
def scrapper(url):
    driver = Driver(uc=True, incognito=True) #set driver
    driver.get(url)
    driver.wait_for_element(".table")
    html = driver.page_source #get html code from page
    soup = BeautifulSoup(html, 'lxml')
    table = soup.find('table', class_='table') #find the datatable
    tmp = pd.read_html(StringIO(str(table)))[0].iloc[:-1] #prep dataframe form table
    driver.quit()
    return tmp

In [22]:
def clean_scrapped(urls, RESPONSE=RESPONSE):
    df_lists = []
    for idx, item in urls.items(): #loop throught tickers, URLs and max available contract for eact tickers
        retry = 0
        while retry < 10:  # Retry up to 10 times
            tmp = scrapper(item[0]) #set scrapped df in tmp
            if not tmp.empty: # if tmp not empty meaning we scrapped something
                if len(tmp) != item[1]: #if we de not have full futures month data -> we retry
                    #retry
                    RESPONSE += f"{idx} full data not received, retrying...\n"
                    time.sleep(60) #2m sleep
                    retry += 1
                    continue  # Retry the current iteration
                else:
                    #we good,
                    tmp['Ticker'] = idx #add ticker to df
                    tmp['Date'] = datetime.today().strftime('%Y-%m-%d')
                    df_lists.append(tmp)
                    RESPONSE += f"{idx}, data scrapped ok\n"
                    break
            else: #if there is nothing scrapped
                RESPONSE += f"Error scrapping data, got empty dataframe for {idx}, retrying...\n"
                time.sleep(60) #2m sleep
                continue
        else:
            # Exceeded max retries, skip this item
            RESPONSE += f'@everyone Skipping {idx} after max retries, no full data found\n'
    df = pd.concat(df_lists) #concat into one df
    return df, RESPONSE

In [23]:
df, RESPONSE = clean_scrapped(URLS)
df = df.reset_index(drop=True)
df['Expiration'] = maturity_to_expiration(df['Maturité'])
df = df.rename(columns={'Ouvert': 'Open', 'Haut': 'High', 'Bas': 'Low', 'Compens.': 'Close', 'Position ouverte': 'Open Interest'})
df = df[['Date', 'Ticker', 'Expiration', 'Open', 'High', 'Low', 'Close', 'Volume', 'Open Interest']]
df['Close'] = df['Close'].astype(float)

In [25]:
df = df[df['Ticker'] == 'EBM']
df

Unnamed: 0,Date,Ticker,Expiration,Open,High,Low,Close,Volume,Open Interest
0,2024-12-10,EBM,DEC24,220.0,220.0,218.0,,1592,2005
1,2024-12-10,EBM,MAR25,227.0,229.25,226.25,229.0,42036,331247
2,2024-12-10,EBM,MAY25,231.0,233.0,230.25,232.75,9234,136115
3,2024-12-10,EBM,SEP25,220.75,222.5,220.0,222.25,3224,73161
4,2024-12-10,EBM,DEC25,226.25,228.75,226.0,228.75,1881,54678
5,2024-12-10,EBM,MAR26,232.0,233.5,232.0,233.25,136,2072
6,2024-12-10,EBM,MAY26,235.5,236.0,235.5,235.5,61,736
7,2024-12-10,EBM,SEP26,0.0,0.0,0.0,238.5,0,22
8,2024-12-10,EBM,DEC26,0.0,0.0,0.0,237.0,0,10
9,2024-12-10,EBM,MAR27,0.0,0.0,0.0,254.5,0,0


In [26]:
r = insert_db(df) #insert to db !!!!!!!!! CARE, NO VERIFICATION WILL INSERT EVERYTHING !!!!!!!!!
RESPONSE += r
#Logs into my Discord server to be keep track of bugs 
webhook = DiscordWebhook(url=config.discordLogWebhookUrl, content=RESPONSE)
response = webhook.execute()
