In [None]:
# Install Necessary Libraries
!pip install pandas
!pip install numpy
!pip install -U scikit-learn 
!pip instal matplotlib
!pip install scalecast --use-deprecated=backtrack-on-build-failures
#Loading up the libraries
import sklearn
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import matplotlib.ticker as ticker
import matplotlib.pyplot as plt
from scalecast.Forecaster import Forecaster
from scalecast.MVForecaster import MVForecaster
from scalecast.multiseries import export_model_summaries
from scalecast import GridGenerator
from scalecast.Forecaster import Forecaster
from scalecast.MVForecaster import MVForecaster
from scalecast.multiseries import export_model_summaries
from scalecast import GridGenerator
from sklearn.ensemble import BaggingRegressor
from sklearn.neural_network import MLPRegressor

In [None]:
def multi_variate_forecasting(district):

  df = pd.read_csv('cleaned_data/{}-cleaned.csv'.format(district))
  header = df.columns
  df = df.set_index(['Date'])
  df.columns = list(['District','Rainfall', 'temp_min', 'temp_max', 'humidity_min',
        'Humidity_max', 'wind_speed_min', 'wind_speed_max'])
  
  models = ('knn','mlr','ridge') #The final 3 models that we stuck with.
  GridGenerator.get_example_grids()
  GridGenerator.get_mv_grids() #For Grid Search CV

  #Load the Forecaster objects
  f_temp_max = Forecaster(y=df['temp_max'],current_dates = df.index)
  f_temp_min = Forecaster(y=df['temp_min'],current_dates = df.index)
  f_hum_min = Forecaster(y=df['humidity_min'],current_dates = df.index)

  for f in (f_temp_max,f_temp_min,f_hum_min):
    f.generate_future_dates(400) #How many future values we would need
    f.set_test_length(.2) #20% of the data goes for testing
    f.set_validation_length(100) #100 samples goes for validation set for evaluating the best hyperparameters

    #Adding in seasonal terms because of time-series data.
    f.add_seasonal_regressors('quarter','week','dayofyear',raw=False,sincos=True) 
    f.add_seasonal_regressors('dayofweek','is_leap_year','week','month',raw=False,dummy=True,drop_first=True)
    f.add_seasonal_regressors('year')
    f.add_time_trend()
    f.add_cycle(365) #Adds a regressor that acts as a seasonal cycle every 365 days.
    f.add_ar_terms(100) #Add 100 lags values as features

    #Initialize the multi-variate-forecaster object
    mvf = MVForecaster(f_temp_max,f_temp_min,f_hum_min,names=['temp_max','temp_min','humidity_min']) # init the mvf object
    mvf.set_test_length(0.2)
    mvf.set_validation_length(100)
    # how to optimize on one series
    mvf.set_optimize_on('temp_max')
    #mvf.set_optimize_on('mean')
    
    #Run the model
    mvf.tune_test_forecast(models)
    mvf.set_best_model(determine_best_by='LevelTestSetMAPE') #Use the model that has the best MAPE score.

    #Plot the max-temp predictions for all the models
    mvf.plot_test_set(series='temp_max',put_best_on_top=True)
    plt.title('Conventional Multivariate Test-set Results',size=16)
    plt.show()


    #Export the model data into a list
    pd.options.display.max_colwidth = 100
    results = mvf.export('model_summaries')
    print(results[
        [
            'ModelNickname',
            'Series',
            'HyperParams',
            'LevelTestSetMAPE',
            'LevelTestSetR2',
            'InSampleMAPE',
            'InSampleR2',
            'Lags'
        ]
    ].to_markdown())

    #Plot forecast for max temperature
    mvf.plot(series='temp_max',models=['mlr'])
    plt.title('MLR Forecast - Max. Temperature',size=16)
    plt.show()

    #Plot forecast for all the series
    mvf.plot(models='mlr')
    plt.title('MLR - Multi Variate Forecasting ',size=16)
    plt.show()

    #Export the model data and model predictions into an excel file for furthur use.
    mvf.export(dfs=['model_summaries', 'lvl_test_set_predictions', 'lvl_fcsts'],models=['knn','mlr','ridge'],to_excel=True,excel_name='3model-{}-max-temp.xlsx'.format(district),out_path='./model_results/')

In [None]:
multi_variate_forecasting('Adilabad')

In [None]:
multi_variate_forecasting('Karimnagar')

In [None]:
multi_variate_forecasting('Warangal')

In [None]:
multi_variate_forecasting('Khammam')

In [None]:
multi_variate_forecasting('Nizamabad')

#AQI

In [None]:
def plot_test_export_summaries(f):
    """ exports the relevant statisitcal information and displays a plot of the test-set results for the last model run
    """
    f.plot_test_set(models=f.estimator,ci=True)
    plt.title(f'{f.estimator} test-set results',size=16)
    plt.show()
    return f.export('model_summaries',determine_best_by='TestSetMAPE')[
        [
            'ModelNickname',
            'HyperParams',
            'TestSetMAPE',
            'TestSetR2',
            'InSampleMAPE',
            'InSampleR2'
        ]
    ]

def aqi_modelling(district):
  #Data-Preprocessing
  df = pd.read_csv('aqi_data/{}-aqi.csv'.format(district))
  df["Period"] = df["Month"].astype(str) + " " + df["Year"].astype(str) #adding month and year as a single column
  df = df.drop(columns=["Month","Year"]) #removing those individual columns
  y = df.set_index(['Period'])
  y.index = pd.to_datetime(y.index)

  #Set the forecaster
  f=Forecaster(y=y['AQI'],current_dates=y.index,metrics=['mae','mape','r2'])

  #Plot the original series
  plt.title('Original Series',size=16)
  plt.plot(y.index,y.AQI)
  plt.show()

  #Set forecaster parameters
  fcst_length = 12 #Forecasting for a year ahead. 12 months.
  f.generate_future_dates(fcst_length)
  f.set_test_length(20) #Test length is 20 samples
  f.set_validation_length(6) #Validation is 6 samples
  f.set_validation_metric('mape')
  f.eval_cis() # tell the object to build confidence intervals for all models
  f.add_ar_terms(6) #Add 6 lags
  f.add_seasonal_regressors('quarter',raw=False,sincos=True) #Quaterly 
  f.add_seasonal_regressors('month',raw=False,dummy=True,drop_first=True) #Monthly
  f.add_seasonal_regressors('year') #Yearly

  #MLR
  f.set_estimator('mlr')
  f.manual_forecast()
  plot_test_export_summaries(f)

  #Ridge Regression
  f.set_estimator('ridge')
  lasso_grid = {'alpha':np.linspace(0,50,300)}
  f.ingest_grid(lasso_grid)
  f.cross_validate(k=3)
  f.auto_forecast()
  plot_test_export_summaries(f)

  #SGD Regressor
  f.set_estimator('sgd')
  f.cross_validate(k=3)
  f.auto_forecast()
  plot_test_export_summaries(f)
  
  #Bagging
  f.add_sklearn_estimator(BaggingRegressor,'bagging')
  f.set_estimator('bagging')
  f.manual_forecast(
      base_estimator = MLPRegressor(
          hidden_layer_sizes=(25,25,25)
          ,solver='lbfgs'
      ),
      max_samples = 0.9,
      max_features = 0.5,
  )
  plot_test_export_summaries(f)

  #Export the model data into a list
  results = f.export('model_summaries',determine_best_by='TestSetMAPE')
  print(results[
        [
            'ModelNickname',
            'HyperParams',
            'TestSetMAPE',
            'TestSetR2',
            'InSampleMAPE',
            'InSampleR2'
        ]
    ].to_markdown())
  #Export the model data and model predictions into an excel file for furthur use.
  f.export(dfs=['model_summaries', 'lvl_test_set_predictions', 'lvl_fcsts'],models=['mlr','ridge','sgd','bagging'],to_excel=True,excel_name='{}-aqi-pred.xlsx'.format(district),out_path='./model_results')

In [None]:
aqi_modelling('Adilabad')

In [None]:
aqi_modelling('Nizamabad')

In [None]:
aqi_modelling('Warangal')

In [None]:
aqi_modelling('Khammam')

In [None]:
aqi_modelling('Karimnagar')