In [12]:
import pandas as pd
import warnings
import pymongo
from datetime import datetime

pd.options.mode.chained_assignment = None
warnings.filterwarnings('ignore', category=RuntimeWarning)

# Setup MongoDB connection (local)
mongo_host = "localhost"
mongo_port = 27017
mongo_user = "admin"
mongo_password = "password"
auth_db = "admin"
client_mongo = pymongo.MongoClient(
    host=mongo_host,
    port=mongo_port,
    username=mongo_user,
    password=mongo_password,
    authSource=auth_db
)
db_mongo_denorm = client_mongo.get_database("denormalization")
db_mongo_datalake = client_mongo.get_database("datalake")
    
    
# GET station_ttc_gas_eur_liter (daily data)
collection_mongo = db_mongo_denorm.get_collection("station_ttc_gas_eur_liter")
cursor = collection_mongo.find({},{"_id": 0, "Date": 1,
                                   "station_ttc_GAZOLE_eur_liter": 1,"station_ttc_SP95_eur_liter": 1,
                                   "station_ttc_E10_eur_liter": 1,"station_ttc_SP98_eur_liter": 1,
                                   "station_ttc_E85_eur_liter": 1,"station_ttc_GPLC_eur_liter": 1,
                                  })
df_station_ttc_gas_eur_liter = pd.DataFrame(list(cursor))
df_station_ttc_gas_eur_liter['Date'] = pd.to_datetime(df_station_ttc_gas_eur_liter['Date'])
print("---- df_station_ttc_gas_eur_liter ----\n", df_station_ttc_gas_eur_liter.head())


# GET official_ttc_gas_eur_liter (weekly data)
collection_mongo = db_mongo_datalake.get_collection("official_ttc_gas_eur_liter")
cursor = collection_mongo.find({},{"_id": 0, "Date": 1,
                                   "official_ttc_GAZOLE_eur_liter": 1,"official_ttc_SP95_eur_liter": 1,
                                   "official_ttc_E10_eur_liter": 1,"official_ttc_SP98_eur_liter": 1,
                                   "official_ttc_E85_eur_liter": 1,"official_ttc_GPLC_eur_liter": 1,
                                  })
df_official_ttc_gas_eur_liter = pd.DataFrame(list(cursor))
df_official_ttc_gas_eur_liter['Date'] = pd.to_datetime(df_official_ttc_gas_eur_liter['Date'])
# Convert weekly data to daily data
# df_official_ttc_gas_eur_liter = df_official_ttc_gas_eur_liter.set_index('Date')
# df_official_ttc_gas_eur_liter = df_official_ttc_gas_eur_liter.resample('D').ffill()
# df_official_ttc_gas_eur_liter = df_official_ttc_gas_eur_liter.reset_index()
print("---- df_official_ttc_gas_eur_liter -----\n", df_official_ttc_gas_eur_liter.head())


# MERGE TOGETHER
all_ttc_gas_eur_liter = pd.merge(df_official_ttc_gas_eur_liter, df_station_ttc_gas_eur_liter, on='Date', how='outer')
# all_ttc_gas_eur_liter = pd.merge(df_official_ttc_gas_eur_liter, df_station_ttc_gas_eur_liter, on='Date', how='outer', indicator=True)
all_ttc_gas_eur_liter.sort_values(by='Date', inplace=True)
all_ttc_gas_eur_liter['Day_of_week'] = all_ttc_gas_eur_liter['Date'].dt.day_name()
all_ttc_gas_eur_liter['Month'] = all_ttc_gas_eur_liter['Date'].dt.month_name()
all_ttc_gas_eur_liter['Year'] = all_ttc_gas_eur_liter['Date'].dt.year.astype(str)
all_ttc_gas_eur_liter['DayMonth'] = all_ttc_gas_eur_liter['Date'].dt.day.astype(str) + all_ttc_gas_eur_liter['Date'].dt.month_name().str.lower()
# all_ttc_gas_eur_liter.to_csv("all_ttc_gas_eur_liter.csv", index=False)
print("------- all_ttc_gas_eur_liter ------\n", all_ttc_gas_eur_liter)




# ----- Push to MongoDB -----
db_mongo_denorm.drop_collection("all_ttc_gas_eur_liter")
collection_mongo = db_mongo_denorm.get_collection("all_ttc_gas_eur_liter")
collection_mongo.create_index([("Date", pymongo.ASCENDING)])

records = all_ttc_gas_eur_liter.to_dict(orient="records")
collection_mongo.insert_many(records)
print("correctly loaded all_ttc_gas_eur_liter to denormalized collection MongoDB")
    

---- df_station_ttc_gas_eur_liter ----
         Date  station_ttc_E85_eur_liter  station_ttc_GPLC_eur_liter  \
0 2007-01-01                        NaN                         NaN   
1 2007-01-02                        NaN                         NaN   
2 2007-01-03                        NaN                         NaN   
3 2007-01-04                        NaN                         NaN   
4 2007-01-05                        NaN                         NaN   

   station_ttc_GAZOLE_eur_liter  station_ttc_SP95_eur_liter  \
0                       1.10647                     1.24179   
1                       1.00571                     1.17889   
2                       1.00645                     1.18080   
3                       1.02514                     1.18936   
4                       1.00072                     1.17936   

   station_ttc_E10_eur_liter  station_ttc_SP98_eur_liter  
0                        NaN                         NaN  
1                        NaN        