In [47]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

from fbprophet import Prophet

In [48]:
df = pd.read_csv('air_pollution.csv')

In [49]:
df['date'] = pd.to_datetime(df['date'])

In [50]:
# scaler = MinMaxScaler()


# for column in df.columns[1:]:
    
#     df[column] = scaler.fit_transform(df[[column]])

In [51]:
def prophet_prediction(train,test_size):
    
    m = Prophet(daily_seasonality=False,weekly_seasonality=True)
    m.fit(train)
    future = m.make_future_dataframe(periods=test_size)
    forecast = m.predict(future)
    
    forecast = forecast.tail(test_size)
    
    prediction = np.array(forecast['yhat'].tolist())
    
    return prediction

In [52]:
def get_predictions(df):
    
    train = df[df['ds'].dt.year < 2017]
    
    test = df[df['ds'].dt.year >= 2017]
    
    actuals = np.array(test['y'].to_list())
    
    test_size = len(test)
    
    predictions = prophet_prediction(train,test_size)
        
    return actuals, predictions
        

In [53]:
def smape(y_true, pred):
    return 100/len(y_true) * np.sum(2 * np.abs(pred - y_true) / (np.abs(y_true) + np.abs(pred)))

def rmse(y_true,pred):
    return np.sqrt(np.mean((pred-y_true)**2))

In [54]:
smapes = []
rmses = []

for column in df.columns[1:]:

    modeldf = df[['date',column]]
    modeldf.columns = ['ds','y']
    
    actuals, predictions = get_predictions(modeldf)

    smapes.append(smape(actuals,predictions))
    rmses.append(rmse(actuals,predictions))
    

In [55]:
predictions

array([18.82642926, 18.83101212, 18.8264949 , 18.8127383 , 18.7898007 ,
       18.75793678, 12.4290551 , 18.6693846 , 18.61410069, 18.55266074,
       18.48610015, 18.41553932, 18.34215227, 11.97859802, 18.19166519,
       18.11688136, 18.04383743, 17.97347844, 17.90661151, 17.84388165,
       11.49721627, 17.73248892, 17.68415285, 17.64059446, 17.60145712,
       17.56618519, 17.53403819, 11.21557472, 17.47535575, 17.44661673,
       17.41665762, 17.38420018, 17.34796151, 17.30669288, 10.97068285,
       17.20447341, 17.14153869, 17.06967446, 16.98834818, 16.89725846,
       16.79635321, 10.39730614, 16.5662008 , 16.43817244, 16.302757  ,
       16.16119778, 16.01495989, 15.86570281,  9.42671193, 15.56553932,
       15.41860535, 15.27651117, 15.14131397, 15.01501563, 14.89951599,
        8.508032  , 14.70773359, 14.63434783, 14.57748155, 14.5379141 ,
       14.51611042, 14.51220525,  8.23745889, 14.5569346 , 14.60414802,
       14.66643812, 14.7423097 , 14.82999681, 14.92749656,  8.74

In [56]:
print(np.mean(smapes))

66.12774610029933


In [57]:
print(np.mean(rmses))

12.399652691324293
