In [None]:
import sys
sys.path.append('..') 
import os

import psycopg2
import pandas as pd
import numpy as np
from datetime import date, timedelta, datetime
from dateutil.relativedelta import relativedelta

# Main

## Data Loading

In [None]:
# """DataLoad"""
# from src.utils.config import get_config
# from src.db_ops.db_ops import PosgreOps

# # Load Config
# config = get_config()
# # Initialize database operator, this case posgre operator
# pgops = PosgreOps(config=config)
# ### Define the required parameters to load the data ###
# # -------------------------------------------------------- #
# months = 84
# table_name = "daas.epdk_petrol_province"
# # -------------------------------------------------------- #
# # Fetch data from database
# epdk_petrol_province_data = pgops.get_monthly_data(table_name=table_name,
#                                                 today=date.today(), months=months)
# # Close the connection after obtaining the data
# pgops.shutdown()

In [None]:
# # Investigate Results
# print(epdk_petrol_province_data.shape)
# print(epdk_petrol_province_data.province.nunique())
# print(epdk_petrol_province_data.date.min())

In [None]:
# epdk_petrol_province_data.date.dtypes

In [None]:
# Load Raw data
epdk_petrol_province_data = pd.read_csv('data/daas_raw_data.csv', index_col=[0])
epdk_petrol_province_data.info()

## Preprocessing

In [None]:
"""Preprocessing"""
from src.preprocessing.preprocess import PreprocessData

# Initialize preprocess object
prep = PreprocessData(data=epdk_petrol_province_data)
### Define parameters to preprocess ###
# -------------------------------------------------------- #
target_col_list = ["date","province","diesel_types"]
row_drop_dict = {"province": "Toplam"} 
# format_date_flag = True # True by default
col_rename_dict = {"diesel_types":"current_month_consumption"}
anomaly_col = "current_month_consumption" 
# -------------------------------------------------------- #
# Preprocess data with given parameters
preprocessed_df = prep.preprocess_data(target_col_list=target_col_list,
                                 row_drop_dict=row_drop_dict,
                                 col_rename_dict=col_rename_dict,
                                 anomaly_col=anomaly_col)


In [None]:
# Investigate results
print(preprocessed_df.shape)
print(preprocessed_df.province.nunique())
print(preprocessed_df.date.min())

In [None]:
preprocessed_df.tail()

In [None]:
# Control whether the missing values at 2021-10-01 are filled 
preprocessed_df.query(f"date=='{date(2021,10,1)}'").sort_values(by="current_month_consumption").tail()

In [None]:
# # Save preprocessed data
# preprocessed_df.to_csv("data/preprocessed_df.csv")
# # Load preprocessed data
# parse_dates = ["date"]
# preprocessed_df = pd.read_csv('data/preprocessed_df.csv', index_col=[0], parse_dates=parse_dates)
# preprocessed_df.info()

## Plotting

In [None]:
"""Plotting"""
from src.utils.plotting import plot_province

plot_province(df=preprocessed_df, col_x="date", col_y="current_month_consumption", province="BAYBURT", stdev=2)

## Feature Engineering

In [None]:
"""FeatureEngineering"""
from src.feature_engineering.feat_eng import FeatureEngineering

# Initialize feature engineering object
feng = FeatureEngineering(data=preprocessed_df)
# Apply feature engineering to the preprocessed data
feature_engineered_df = feng.feature_engineering()

In [None]:
print(feature_engineered_df.shape)
feature_engineered_df.query("current_month_consumption%1==0")

In [None]:
# feature_engineered_df.query("province == 'ARTVİN'")

In [None]:
from src.utils.plotting import plot_metrics
plot_metrics(feature_engineered_df.query("province == 'İSTANBUL'"), col_x="date", col_y1="current_month_consumption", col_y2="ARIMA_prediction")

In [None]:
# Save feature_engineered_df 
feature_engineered_df.to_csv("data/feature_engineered_df.csv")