In [3]:
import sys
import os

# Get the absolute path of parent folder
current_dir = os.path.abspath("")
parent_dir = os.path.join(current_dir, os.pardir)

# Add to sys.path
sys.path.append(parent_dir)

In [4]:
import ml_combat as ml
from ml_combat import data
from ml_combat.MetaModel import MetaModel

In [2]:
from prophet import Prophet
import pandas as pd
import numpy as np

In [3]:
class ProphetModel(MetaModel):
    
    def __init__(self):
        super().__init__("Prophet")
        
    def preprocess(self, df):
        """
        """
        temp_df = df.copy()

        temp_df['total_rad_1h:J'] = df['diffuse_rad_1h:J'] + df['direct_rad_1h:J']

        temp_df = temp_df.dropna(axis=0, how="all", subset="total_rad_1h:J")

        if('y' in temp_df.columns.tolist()):
            temp_df = temp_df.dropna(axis=0, how="all", subset="y")

        temp_df.fillna(0, inplace=True)
        
        return temp_df

    def train(self, df):
        """
        """
        df = self.preprocess(df)

        self.prophet_model = Prophet()
        # for feat in [i for i in df.columns.to_list() if i not in ['location', 'ds', 'y', 'weather_data_type']]:
        self.prophet_model.add_regressor('total_rad_1h:J')
        self.prophet_model.fit(df)

    def predict(self, df):
        """
        """
        df = self.preprocess(df)

        forecast = self.prophet_model.predict(df)
        # fig = prophet_model.plot_components(forecast)
        temp_ret = forecast[['ds', 'yhat']].rename(columns={'yhat':'y_pred'})

        return temp_ret
    


In [4]:
# df[df['snow_density:kgm3'].notna()]
# df[df.isna().any(axis=1)]

# df[df['visibility:m'].isna()]

In [25]:
df = ml.data.get_training_flattened()
test = ml.data.get_testing_flattened()
ret = pd.DataFrame()
for location in ['A', 'B', 'C']:
    temp_df = df[df['location']==location]

    if location != "A":
        temp_df = temp_df[df.weather_data_type == 'observed']

    lr = ProphetModel()
    print("----------------- RESULT HERE", np.mean(lr.test(temp_df)))

    lr.train(temp_df)
    ret = pd.concat([ret, lr.predict(test[test['location']==location])])

print("Done creating a linear regression model!")

11:48:19 - cmdstanpy - INFO - Chain [1] start processing


Testing Prophet


11:48:20 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:1.0008809020436928 achieved MAE 346.8157370892518


11:48:21 - cmdstanpy - INFO - Chain [1] start processing
11:48:22 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:2.0008809020436926 achieved MAE 387.6145769442664


11:48:23 - cmdstanpy - INFO - Chain [1] start processing
11:48:24 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:3.0008809020436926 achieved MAE 410.0770603400472


11:48:26 - cmdstanpy - INFO - Chain [1] start processing
11:48:28 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:4.000880902043693 achieved MAE 336.8192848425941


11:48:30 - cmdstanpy - INFO - Chain [1] start processing
11:48:33 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:5.000880902043693 achieved MAE 249.19093592890155
----------------- RESULT HERE 346.1035190290122


11:48:35 - cmdstanpy - INFO - Chain [1] start processing
11:48:39 - cmdstanpy - INFO - Chain [1] done processing
  temp_df = temp_df[df.weather_data_type == 'observed']
11:48:39 - cmdstanpy - INFO - Chain [1] start processing


Testing Prophet


11:48:39 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:1.0008215239268843 achieved MAE 60.823373436351694


11:48:40 - cmdstanpy - INFO - Chain [1] start processing
11:48:41 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:2.0008215239268843 achieved MAE 97.73108598926572


11:48:42 - cmdstanpy - INFO - Chain [1] start processing
11:48:43 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:3.0008215239268843 achieved MAE 45.062494723754895


11:48:45 - cmdstanpy - INFO - Chain [1] start processing
11:48:46 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:4.000821523926884 achieved MAE 96.3815669247695


11:48:48 - cmdstanpy - INFO - Chain [1] start processing
11:48:51 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:5.000821523926884 achieved MAE 47.76870516733394
----------------- RESULT HERE 69.55344524829516


11:48:53 - cmdstanpy - INFO - Chain [1] start processing
11:48:58 - cmdstanpy - INFO - Chain [1] done processing
  temp_df = temp_df[df.weather_data_type == 'observed']
11:48:58 - cmdstanpy - INFO - Chain [1] start processing


Testing Prophet


11:48:58 - cmdstanpy - INFO - Chain [1] done processing
11:48:59 - cmdstanpy - INFO - Chain [1] start processing


Train-Test ratio:1.0012966804979253 achieved MAE 65.37090261622258


11:49:00 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:2.0012966804979255 achieved MAE 63.82997170237396


11:49:01 - cmdstanpy - INFO - Chain [1] start processing
11:49:02 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:3.0012966804979255 achieved MAE 43.1546901223127


11:49:03 - cmdstanpy - INFO - Chain [1] start processing
11:49:04 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:4.0012966804979255 achieved MAE 45.168879365594584


11:49:05 - cmdstanpy - INFO - Chain [1] start processing
11:49:06 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:5.0012966804979255 achieved MAE 31.211890623073625
----------------- RESULT HERE 49.747266885915494


11:49:08 - cmdstanpy - INFO - Chain [1] start processing
11:49:09 - cmdstanpy - INFO - Chain [1] done processing


Done creating a linear regression model!


In [26]:
ret = ret.reset_index(drop=True).reset_index().drop(columns=["ds"]).rename(columns={'index': 'id', 'y_pred': 'prediction'})
ret.prediction = ret.prediction.apply(lambda a : max(a, 0))
ret

Unnamed: 0,id,prediction
0,0,26.597097
1,1,33.739131
2,2,29.505813
3,3,55.278284
4,4,298.035157
...,...,...
2155,2155,70.740189
2156,2156,46.783561
2157,2157,26.121931
2158,2158,17.905157


In [27]:
ret.to_csv("prophet_tweaked.csv", index=False)

In [5]:
df = ml.data.get_training_flattened()
for location in ['A', 'B', 'C']:
    temp_df = df[df['location']==location]

    temp_df = temp_df[df.weather_data_type == 'observed']

    lr = ProphetModel()
    print("----------------- RESULT HERE", np.mean(lr.test(temp_df)))

print("Done creating a linear regression model!")

  temp_df = temp_df[df.weather_data_type == 'observed']
11:28:07 - cmdstanpy - INFO - Chain [1] start processing


Testing Prophet


11:28:07 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:1.0006067961165048 achieved MAE 530.1876666525234


11:28:08 - cmdstanpy - INFO - Chain [1] start processing
11:28:09 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:2.000606796116505 achieved MAE 259.9216498385317


11:28:10 - cmdstanpy - INFO - Chain [1] start processing
11:28:12 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:3.000606796116505 achieved MAE 417.94016890818455


11:28:13 - cmdstanpy - INFO - Chain [1] start processing
11:28:15 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:4.000606796116505 achieved MAE 240.73292436950007


11:28:17 - cmdstanpy - INFO - Chain [1] start processing
11:28:19 - cmdstanpy - INFO - Chain [1] done processing
  temp_df = temp_df[df.weather_data_type == 'observed']
11:28:20 - cmdstanpy - INFO - Chain [1] start processing


Train-Test ratio:5.000606796116505 achieved MAE 381.2536422984788
----------------- RESULT HERE 366.00721041344366
Testing Prophet


11:28:20 - cmdstanpy - INFO - Chain [1] done processing
11:28:21 - cmdstanpy - INFO - Chain [1] start processing


Train-Test ratio:1.0008215239268843 achieved MAE 60.823373436351694


11:28:22 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:2.0008215239268843 achieved MAE 97.73108598926572


11:28:23 - cmdstanpy - INFO - Chain [1] start processing
11:28:24 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:3.0008215239268843 achieved MAE 45.062494723754895


11:28:26 - cmdstanpy - INFO - Chain [1] start processing
11:28:27 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:4.000821523926884 achieved MAE 96.3815669247695


11:28:29 - cmdstanpy - INFO - Chain [1] start processing
11:28:32 - cmdstanpy - INFO - Chain [1] done processing
  temp_df = temp_df[df.weather_data_type == 'observed']
11:28:33 - cmdstanpy - INFO - Chain [1] start processing


Train-Test ratio:5.000821523926884 achieved MAE 47.76870516733394
----------------- RESULT HERE 69.55344524829516
Testing Prophet


11:28:33 - cmdstanpy - INFO - Chain [1] done processing
11:28:34 - cmdstanpy - INFO - Chain [1] start processing


Train-Test ratio:1.0012966804979253 achieved MAE 65.37090261622258


11:28:35 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:2.0012966804979255 achieved MAE 63.82997170237396


11:28:35 - cmdstanpy - INFO - Chain [1] start processing
11:28:36 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:3.0012966804979255 achieved MAE 43.1546901223127


11:28:37 - cmdstanpy - INFO - Chain [1] start processing
11:28:38 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:4.0012966804979255 achieved MAE 45.168879365594584


11:28:40 - cmdstanpy - INFO - Chain [1] start processing
11:28:41 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:5.0012966804979255 achieved MAE 31.211890623073625
----------------- RESULT HERE 49.747266885915494
Done creating a linear regression model!


In [6]:
df = ml.data.get_training_flattened()
for location in ['A', 'B', 'C']:
    temp_df = df[df['location']==location]

    lr = ProphetModel()
    print("----------------- RESULT HERE", np.mean(lr.test(temp_df)))
    
print("Done creating a linear regression model!")

11:28:43 - cmdstanpy - INFO - Chain [1] start processing


Testing Prophet


11:28:43 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:1.0008809020436928 achieved MAE 346.8157370892518


11:28:45 - cmdstanpy - INFO - Chain [1] start processing
11:28:45 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:2.0008809020436926 achieved MAE 387.6145769442664


11:28:47 - cmdstanpy - INFO - Chain [1] start processing
11:28:48 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:3.0008809020436926 achieved MAE 410.0770603400472


11:28:50 - cmdstanpy - INFO - Chain [1] start processing
11:28:52 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:4.000880902043693 achieved MAE 336.8192848425941


11:28:54 - cmdstanpy - INFO - Chain [1] start processing
11:28:56 - cmdstanpy - INFO - Chain [1] done processing
11:28:57 - cmdstanpy - INFO - Chain [1] start processing


Train-Test ratio:5.000880902043693 achieved MAE 249.19093592890155
----------------- RESULT HERE 346.1035190290122
Testing Prophet


11:28:58 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:1.0009142439202778 achieved MAE 62.316077008302024


11:28:59 - cmdstanpy - INFO - Chain [1] start processing
11:28:59 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:2.000914243920278 achieved MAE 130.35720074626607


11:29:01 - cmdstanpy - INFO - Chain [1] start processing
11:29:02 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:3.000914243920278 achieved MAE 84.16510888676031


11:29:04 - cmdstanpy - INFO - Chain [1] start processing
11:29:07 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:4.000914243920278 achieved MAE 74.33771923582101


11:29:08 - cmdstanpy - INFO - Chain [1] start processing
11:29:13 - cmdstanpy - INFO - Chain [1] done processing
11:29:14 - cmdstanpy - INFO - Chain [1] start processing


Train-Test ratio:5.000914243920278 achieved MAE 66.5601819986914
----------------- RESULT HERE 83.54725757516816
Testing Prophet


11:29:15 - cmdstanpy - INFO - Chain [1] done processing
11:29:16 - cmdstanpy - INFO - Chain [1] start processing


Train-Test ratio:1.000230149597238 achieved MAE 187.0254253591293


11:29:16 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:2.0002301495972383 achieved MAE 25.66748639366984


11:29:17 - cmdstanpy - INFO - Chain [1] start processing
11:29:18 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:3.0002301495972383 achieved MAE 66.69101794748853


11:29:19 - cmdstanpy - INFO - Chain [1] start processing
11:29:20 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:4.000230149597238 achieved MAE 40.265174080928304


11:29:22 - cmdstanpy - INFO - Chain [1] start processing
11:29:23 - cmdstanpy - INFO - Chain [1] done processing


Train-Test ratio:5.000230149597238 achieved MAE 40.7065991885315
----------------- RESULT HERE 72.07114059394948
Done creating a linear regression model!
