In [1]:
#import necessary libraries

import glob
import numpy as np
import pycaret
import pandas as pd
import os
from pycaret.regression import *
from pycaret.regression import RegressionExperiment
from prophet import Prophet
from sklearn.metrics import make_scorer


In [2]:
def read_data(pth):
    df = pd.read_csv(pth,index_col='Date',parse_dates=True,na_values=['nan']).drop(['ID'],axis=1)
    df = df.fillna(method='ffill',axis=1)
    df = df.fillna(method='bfill',axis=1)
    #df.info()
    #df.describe().T
    return df

def forecasting(df,daterange,stid):
    df_pol = pd.DataFrame({'ds': daterange}) 
    for col in ['PM2.5', 'PM10', 'O3', 'CO', 'SO2']:
        df_tmp = pd.DataFrame({'ds': df[col].index, 'y': df[col]})
        #df_tmp.tail()
        m = Prophet(changepoint_prior_scale= 0.5, seasonality_prior_scale= 0.01,)
        m.fit(df_tmp)
        future = pd.DataFrame({'ds':daterange})
        forecast = m.predict(future)
        forecast = forecast[['ds','yhat']].rename(columns={'yhat': col})
        df_pol = pd.merge(df_pol,forecast,on='ds',how='left')
    return df_pol

def MMAE(y_pred,y_true):
    errors = np.abs(y_pred - y_true)
    mask = y_true > y_pred
    errors[mask] *= 1.5
    return np.mean(errors)

def LGBM_MMAE(y_pred,y_true):
    errors = np.abs(y_pred - y_true)
    mask = y_true > y_pred
    errors[mask] *= 1.5
    return ('MMAE', np.mean(errors), False)

In [3]:
files = glob.glob('./processed' + "/*.csv")

for filepath in files:
    filename = os.path.basename(filepath)
    Station_id = filename[:filename.rindex('.')]
    print(f'Processing {Station_id} : \n\n')
    data = read_data(filepath)
    shape = data.shape
    
    start_date = data.index.max() + pd.Timedelta(days=1)
    end_date = start_date + pd.Timedelta(days=27)
    date_range = pd.date_range(start_date,end_date)

    s = setup(data, target = 'AQI', session_id = 123,use_gpu=True,verbose=False)
    exp = RegressionExperiment()
    exp.setup(data, target = 'AQI', session_id = 123)
    add_metric(id="MMAE", name="MMAE", score_func=MMAE, greater_is_better=False)

    model = create_model(estimator='rf',fold=10,verbose=False)
    model = tune_model(model, fold=10,custom_scorer="MMAE",verbose=False)

    pol_pred = forecasting(data,date_range,Station_id).set_index('ds')
    predictions = predict_model(model, data = pol_pred)

    submission = pd.DataFrame({'Station_ID':Station_id + '_' + date_range.astype(str)}).set_index(date_range)
    submission.index.name='ds'
    submission = submission.join(predictions['prediction_label'])
    submission.to_csv('submission.csv',mode='a',index=False, header=False)
    


Processing WB_14 : 




Unnamed: 0,Description,Value
0,Session id,123
1,Target,AQI
2,Target type,Regression
3,Original data shape,"(565, 6)"
4,Transformed data shape,"(565, 6)"
5,Transformed train set shape,"(395, 6)"
6,Transformed test set shape,"(170, 6)"
7,Numeric features,5
8,Preprocess,True
9,Imputation type,simple


03:44:03 - cmdstanpy - INFO - Chain [1] start processing
03:44:03 - cmdstanpy - INFO - Chain [1] done processing
03:44:03 - cmdstanpy - INFO - Chain [1] start processing
03:44:03 - cmdstanpy - INFO - Chain [1] done processing
03:44:03 - cmdstanpy - INFO - Chain [1] start processing
03:44:04 - cmdstanpy - INFO - Chain [1] done processing
03:44:04 - cmdstanpy - INFO - Chain [1] start processing
03:44:04 - cmdstanpy - INFO - Chain [1] done processing
03:44:04 - cmdstanpy - INFO - Chain [1] start processing
03:44:04 - cmdstanpy - INFO - Chain [1] done processing


Unnamed: 0,Description,Value
0,Session id,123
1,Target,AQI
2,Target type,Regression
3,Original data shape,"(770, 6)"
4,Transformed data shape,"(770, 6)"
5,Transformed train set shape,"(539, 6)"
6,Transformed test set shape,"(231, 6)"
7,Numeric features,5
8,Preprocess,True
9,Imputation type,simple


Processing WB_16 : 




Unnamed: 0,Description,Value
0,Session id,123
1,Target,AQI
2,Target type,Regression
3,Original data shape,"(770, 6)"
4,Transformed data shape,"(770, 6)"
5,Transformed train set shape,"(539, 6)"
6,Transformed test set shape,"(231, 6)"
7,Numeric features,5
8,Preprocess,True
9,Imputation type,simple


03:57:08 - cmdstanpy - INFO - Chain [1] start processing
03:57:08 - cmdstanpy - INFO - Chain [1] done processing
03:57:08 - cmdstanpy - INFO - Chain [1] start processing
03:57:09 - cmdstanpy - INFO - Chain [1] done processing
03:57:09 - cmdstanpy - INFO - Chain [1] start processing
03:57:09 - cmdstanpy - INFO - Chain [1] done processing
03:57:10 - cmdstanpy - INFO - Chain [1] start processing
03:57:10 - cmdstanpy - INFO - Chain [1] done processing
03:57:10 - cmdstanpy - INFO - Chain [1] start processing
03:57:11 - cmdstanpy - INFO - Chain [1] done processing
