In [7]:
import pandas as pd
import warnings
import pymongo

pd.options.mode.chained_assignment = None
warnings.filterwarnings('ignore', category=RuntimeWarning)

# Setup MongoDB connection (local)
mongo_host = "localhost"
mongo_port = 27017
mongo_user = "admin"
mongo_password = "password"
auth_db = "admin"
client_mongo = pymongo.MongoClient(
    host=mongo_host,
    port=mongo_port,
    username=mongo_user,
    password=mongo_password,
    authSource=auth_db
)
db_mongo = client_mongo.get_database("datalake")

collection_mongo = db_mongo.get_collection("french_gas_station")
cursor = collection_mongo.find({}, {"_id": 0, "Gas_station_id": 1, "Date": 1, "Nom": 1, "Valeur": 1})
df_french_gas_station = pd.DataFrame(list(cursor))

df_french_gas_station['Date'] = pd.to_datetime(df_french_gas_station['Date'])
df_french_gas_station = df_french_gas_station.rename(columns={"Nom": "Fuel_name", "Valeur": "Fuel_eur_liter"})
# Combine the gas station prices when same date and same fuel name (by mean)
df_french_gas_station = df_french_gas_station.groupby(['Date', 'Fuel_name'], as_index=False)['Fuel_eur_liter'].mean()
df_french_gas_station['Fuel_eur_liter'] = df_french_gas_station['Fuel_eur_liter'].round(5)
print("df_french_gas_station")
print(df_french_gas_station.head())


collection_mongo = db_mongo.get_collection("stockmarket_brent_usd")
cursor = collection_mongo.find({}, {"_id": 0, "Date": 1, "Close": 1})
stockmarket_brent_usd = pd.DataFrame(list(cursor))

stockmarket_brent_usd['Date'] = pd.to_datetime(stockmarket_brent_usd['Date'])
stockmarket_brent_usd = stockmarket_brent_usd.rename(columns={"Close": "Brent_usd_barrel"})
print("stockmarket_brent_usd")
print(stockmarket_brent_usd.head())


collection_mongo = db_mongo.get_collection("stockmarket_eur_usd")
cursor = collection_mongo.find({}, {"_id": 0, "Date": 1, "Close": 1})
stockmarket_eur_usd = pd.DataFrame(list(cursor))

stockmarket_eur_usd['Date'] = pd.to_datetime(stockmarket_eur_usd['Date'])
stockmarket_eur_usd = stockmarket_eur_usd.rename(columns={"Close": "Eur_usd"})
print("stockmarket_eur_usd")
print(stockmarket_eur_usd.head())

# Join 1 : gas_station avec le Brent (USD)
df_joined = pd.merge(
    df_french_gas_station,
    stockmarket_brent_usd,
    on='Date',
    how='left'
)

# Join 2 : avec le taux de change EUR/USD
df_joined = pd.merge(
    df_joined,
    stockmarket_eur_usd,
    on='Date',
    how='left'
)
df_result = df_joined.copy()
print("df_result after all the joined")
print(df_result.head())

# Create column Brent_eur
df_result['Brent_eur_barrel'] = df_result['Brent_usd_barrel'] / df_result['Eur_usd']
# Barrel contains 158,987 liters
df_result['Brent_eur_liter'] = (df_result['Brent_eur_barrel'] / 158.987).round(5)
print("df_result after create brent_eur_liter")
print(df_result.iloc[589:600])

# Clean df
df_clean = df_result.drop(columns=['Brent_usd_barrel', 'Eur_usd', 'Brent_eur_barrel'])
print("df_clean len=", len(df_clean))
print(df_clean.iloc[589:600])

# Push to MongoDB
db_mongo = client_mongo.get_database("denormalization")
# Remove collection if exist
db_mongo.drop_collection("compare_french_fuel_to_stockmarket")
collection_mongo = db_mongo.get_collection("compare_french_fuel_to_stockmarket")

records = df_clean.to_dict(orient="records")
collection_mongo.insert_many(records)
print("correctly loaded denormalized datas to MongoDB")

KeyboardInterrupt: 