In [11]:
from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.forecasting.theta import ThetaForecaster
from sktime.performance_metrics.forecasting import mean_absolute_error,mean_squared_error,mean_absolute_percentage_error,mean_squared_percentage_error,median_absolute_percentage_error
import pandas as pd
from sktime.utils.plotting import plot_series
import sys
from sklearn.metrics import mean_absolute_error, r2_score
from sktime.forecasting.arima import AutoARIMA, ARIMA
from sktime.forecasting.fbprophet import Prophet
from sktime.forecasting.naive import NaiveForecaster
from sktime.forecasting.exp_smoothing import ExponentialSmoothing
import matplotlib.pyplot as plt
import numpy as np

import requests
from datetime import datetime, timedelta
from sseclient import SSEClient
import json
import js2py



class AQIData:
    def __init__(self):
        self.JS_FUNCS: str = """
        function checkValidDigitNumber(t) {
            return !isNaN(t) && parseInt(Number(t)) == t && !isNaN(parseInt(t, 10))
        }

        function a(backendData, n) {
            var e = 0,
                i = 0,
                r = 0,
                o = 1,
                resultArray = [];

            function s(t, r) {
                /* Variable r seems to uselessly bounce from 0 to 1 to 0 for no reason
                other than to obfuscate

                If r is 0 the code executes, otherwise it won't */

                for (0 == r && (r = 1); r > 0; r--) e++, i += t, resultArray.push({
                    t: n(e), /** n seems to be a method to determine "which day of month" */
                    v: i * o /** appears to be "value"? */
                })
            }

            function charInPositionIsDigit(t) {
                /* ASCII 48-57 is for 0-9 (digits) */
                return backendData.charCodeAt(t) >= 48 && backendData.charCodeAt(t) <= 57
            }
            for (var idx = 0; idx < backendData.length; idx++) {
                var u = function() {
                        var t = 0,
                            n = 1;
                            /** 45 is ASCII for - and 46 is ASCII for . */
                        for (45 == backendData.charCodeAt(idx + 1) && (n = -1, idx++); charInPositionIsDigit(idx + 1);) t = 10 * t + (backendData.charCodeAt(idx + 1) - 48), idx++;
                        return 46 == backendData.charCodeAt(idx + 1) && idx++, n * t
                    },
                    h = backendData.charCodeAt(idx);
                if (0 == idx && 42 == h) o = 1 / u(), idx++;    /* 42 is ASCII for * */
                else if (36 == h) e += 1;           /* 36 is ASCII for $ */
                else if (37 == h) e += 2;           /* 37 is ASCII for % */
                else if (39 == h) e += 3;           /* 39 is ASCII for ' */
                else if (47 == h) o = u(), idx++;     /* 47 is ASCII for / */
                else if (33 == h) s(u(), r), r = 0; /* 33 is ASCII for ! */
                else if (124 == h) e += u() - 1;    /* 124 is ASCII for | */
                else if (h >= 65 && h <= 90) s(h - 65, r), r = 0;           /* This conditional is true when given ASCII for uppercase A-Z */
                else if (h >= 97 && h <= 122) s(-(h - 97) - 1, r), r = 0;   /* This conditional is true when given ASCII for lowercase a-z */
                else {
                    if (!(h >= 48 && h <= 57)) throw "decode: invalid character " + h + " (" + backendData.charAt(idx) + ") at " + idx;
                    r = 10 * r + h - 48
                }
            }
            return resultArray
        }

        function s(t) {
            /* NOTE: Appears to be the "main gun" since here's a try catch block */
            if (!t) return null;
            try {
                var n, e, i = [],
                    r = {
                        pm25: "PM<sub>2.5</sub>",
                        pm10: "PM<sub>10</sub>",
                        o3: "O<sub>3</sub>",
                        no2: "NO<sub>2</sub>",
                        so2: "SO<sub>2</sub>",
                        co: "CO"
                    },
                    o = function() {
                        try {
                            n = [];
                            var o = t.ps[s]; /* Long string backend data is o */
                            if ("1" == o[0]) n = a(o.substr(1), function(n) {
                                return {
                                    d: c(new Date(3600 * (n * t.dh + t.st) * 1e3)), /** This expression results in 'seconds after Unix epoch' style value. st is an "hour after Unix epoch" value. */
                                    t: n
                                }
                            });
                            else if ("2" == o[0]) {
                                e = {};
                                var d = "w" == o[1];
                                for (var l in o.substr(3).split("/").forEach(function(n) {
                                        a(n, function(n) {
                                            if (d) {
                                                var e = n + t.st,
                                                    i = e % 53;
                                                return {
                                                    d: c(function(t, n, e) {
                                                        var i = 2 + e + 7 * (n - 1) - new Date(t, 0, 1).getDay();
                                                        return new Date(t, 0, i)
                                                    }(a = (e - i) / 53, i, 0)),
                                                    t: n
                                                }
                                            }
                                            var r = n + t.st,
                                                o = r % 12,
                                                a = (r - o) / 12;
                                            return {
                                                d: c(new Date(a, o)),
                                                t: n
                                            }
                                        }).forEach(function(t) {
                                            var n = t.t.t;
                                            e[n] = e[n] || {
                                                v: [],
                                                t: t.t
                                            }, e[n].v.push(t.v)
                                        })
                                    }), e) n.push(e[l])
                            }
                            n.forEach(function(t, e) {
                                n[e].t.dh = e ? (t.t.d.getTime() - n[e - 1].t.d.getTime()) / 36e5 : 0
                            }), i.push({
                                name: r[s] || s,
                                values: n,
                                pol: s
                            })
                        } catch (t) {
                            console.error("decode: Oopps...", t)
                        }
                    };
                for (var s in t.ps) o(); /* For each variable? do o()*/
                return i.sort(function(t, n) {
                    var e = ["pm25", "pm10", "o3", "no2", "so2", "co"],
                        i = e.indexOf(t.pol),
                        r = e.indexOf(n.pol);
                    return r < 0 ? 1 : i < 0 ? -1 : i - r
                }), {
                    species: i,
                    dailyhours: t.dh,
                    source: t.meta.si,
                    period: t.period
                }
            } catch (t) {
                return console.error("decode:", t), null
            }
        }

        function c(t) {
            return new Date(t.getUTCFullYear(), t.getUTCMonth(), t.getUTCDate(), t.getUTCHours(), t.getUTCMinutes(), t.getUTCSeconds())
        }

        function gatekeep_convert_date_object_to_unix_seconds(t) {
            /** Wrapper function:
                Perform decoding using s() function above, and afterwards convert all Date objects within
                the result into Unix timestamps, i.e. 'seconds since 1970/1/1'.
                This is necessary so that the Python context can convert that Unix timestamps back into datetime objects.
                js2py is unable to (at the time of writing, to my limited knowledge) convert JS Date objects into Python-understandable objects.
            */
            var RES = s(t)
            for(var i = 0; i < RES.species.length; i++){
            var values = RES.species[i].values
                for(var j = 0; j < values.length; j++){
                    values[j].t.d = values[j].t.d.getTime()/1000
                }
            RES.species[i].values = values
            }
            return RES
        }
        """


        # NOTE(lahdjirayhan):
        # The JS_FUNCS variable is a long string, a source JS code that
        # is excerpted from one of aqicn.org frontend's scripts.
        # See relevant_funcs.py for more information.


        # Make js context where js code can be executed
        self._context = js2py.EvalJs()
        self._context.execute(self.JS_FUNCS)

    # def __init__(self, token:str = '') -> None:
    #     self.token = token

    def parse_incoming_result(self, json_object: dict) -> pd.DataFrame:
        # Run JS code
        # Function is defined within JS code above
        # Convert result to Python dict afterwards
        OUTPUT = self._context.gatekeep_convert_date_object_to_unix_seconds(
            json_object["msg"]
        ).to_dict()

        result_dict = {}
        for spec in OUTPUT["species"]:
            pollutant_name: str = spec["pol"]

            dates, values = [], []
            for step in spec["values"]:
                # Change unix timestamp back to datetime
                date = datetime.fromtimestamp(step["t"]["d"])
                value: int = step["v"]

                dates.append(date)
                values.append(value)

            series = pd.Series(values, index=dates)
            result_dict[pollutant_name] = series

        FRAME = pd.DataFrame(result_dict)
        return FRAME

        
    def get_results_from_backend(self, city_id: int):
        event_data_url = f"https://api.waqi.info/api/attsse/{city_id}/yd.json"

        r = requests.get(event_data_url)

        # Catch cases where the returned response is not a server-sent events,
        # i.e. an error.
        if "text/event-stream" not in r.headers["Content-Type"]:
            raise Exception(
                "Server does not return data stream. "
                f'It is likely that city ID "{city_id}" does not exist.'
            )

        client = SSEClient(event_data_url)
        result = []

        for event in client:
            if event.event == "done":
                break

            try:
                if "msg" in event.data:
                    result.append(json.loads(event.data))
            except json.JSONDecodeError:
                pass

        return result


    def get_data_from_id(self, city_id: int) -> pd.DataFrame:
        backend_data = self.get_results_from_backend(city_id)
        result = pd.concat([self.parse_incoming_result(data) for data in backend_data])
        # result = parse_incoming_result(backend_data[0])

        # Arrange to make most recent appear on top of DataFrame
        result = result.sort_index(ascending=False, na_position="last")

        # Deduplicate because sometimes the backend sends duplicates
        result = result[~result.index.duplicated()]

        # Reindex to make missing dates appear with value nan
        # Conditional is necessary to avoid error when trying to
        # reindex empty dataframe i.e. just in case the returned
        # response AQI data was empty.
        if len(result) > 1:
            complete_days = pd.date_range(
                result.index.min(), result.index.max(), freq="D"
            )
            result = result.reindex(complete_days, fill_value=None)

            # Arrange to make most recent appear on top of DataFrame
            result = result.sort_index(ascending=False, na_position="last")

        return result

    
    def get_city_station_options(self, city: str) -> pd.DataFrame:
        """Get available stations for a given city
        Args:
            city (str): Name of a city.

        Returns:
            pd.DataFrame: Table of stations and their relevant information.
        """
        # NOTE, HACK, FIXME:
        # This functionality was born together with historical data feature.
        # This endpoint is outside WAQI API's specification, thus not using
        # _check_and_get_data_obj private method above.
        # If exists, alternative within API's spec is more than welcome to
        # replace this implementation.
        r = requests.get(f"https://search.waqi.info/nsearch/station/{city}")
        res = r.json()

        city_id, country_code, station_name, city_url, score = [], [], [], [], []

        for candidate in res["results"]:
            city_id.append(candidate["x"])
            country_code.append(candidate["c"])
            station_name.append(candidate["n"])
            city_url.append(candidate["s"].get("u"))
            score.append(candidate["score"])

        return pd.DataFrame(
            {
                "city_id": city_id,
                "country_code": country_code,
                "station_name": station_name,
                "city_url": city_url,
                "score": score,
            }
        ).sort_values(by=["score"], ascending=False)


    def get_historical_data(
        self, city: str = None, city_id: int = None  # type: ignore
    ) -> pd.DataFrame:
        """Get historical air quality data for a city

        Args:
            city (str): Name of the city. If given, the argument must be named.
            city_id (int): City ID. If given, the argument must be named.
                If not given, city argument must not be None.

        Returns:
            pd.DataFrame: The dataframe containing the data.
        """
        if city_id is None:
            if city is None:
                raise ValueError("If city_id is not specified, city must be specified.")

            # Take first search result
            search_result = self.get_city_station_options(city)
            if len(search_result) == 0:
                return 404

            first_result = search_result.iloc[0, :]

            city_id = first_result["city_id"]
            station_name = first_result["station_name"]
            country_code = first_result["country_code"]

        df = self.get_data_from_id(city_id)
        if "pm25" in df.columns:
            # This ensures that pm25 data is labelled correctly.
            df.rename(columns={"pm25": "pm2.5"}, inplace=True)

        # Reset date index and rename the column appropriately
        # df = df.reset_index().rename(columns={"index": "date"})
        # print(df)

        return [df ,city , station_name, country_code]


def getCityData(city_name):
    #creating AQI data object
    o = AQIData()
    # dataset = o.get_historical_data(city="New York")
    # forecaster = AutoARIMA(sp=1, suppress_warnings=True)

    #creating forecaster Object
    # forecaster = Prophet(yearly_seasonality=True, weekly_seasonality=True)
    # forecaster = ThetaForecaster(sp=12)

    #getting historical data of the city
    data = o.get_historical_data(city=city_name)
    
    #for storing final output
    finalOut = {}

    #if data exists about the city
    if data != 404:

        dates = [i for i in data[0].index]
        for i in range(len(dates)):
            if(dates[i].date() > datetime.now().date()):
                data[0].drop(index=dates[i].date(), inplace=True)

        dataset = data[0]
        dataset = dataset.dropna()
        #for arrainging in ascending order of dates
        dataset = dataset.resample(rule='D').sum()


        mean_value = dataset.mean()

        # Replace zeros with mean value
        dataset = dataset.replace(0, mean_value)

        #saving the file locally without index
        # dataset.to_csv(f"Data/{city_name}_data.csv", index=False)

        #reading the file while parsing the dates
        # dataset = pd.read_csv(f"Data/{city_name}_data.csv", parse_dates=[0], index_col=[0])
        # print(dataset)
        #remove future dates in the dateset
        

        # dataset.to_csv('data.csv', index=False)
        # dataset.to_csv(f'Datasets/{city_name}_Data.csv', index=True)
        # d = pd.DataFrame(csv) 
        # t = sktime_forecast(dataset=dataset, horizon=30, validation=False)

        return dataset

    #     predicted_data, plotImages = t[0], t[1]

    #     #for present day datax``
    #     presentDayData = {}
    #     for i in data[0]:
    #         if str(data[0][i][0]) != 'nan':
    #             presentDayData[i] = data[0][i][0]


    #     finalOut = {
    #         'code' : 200,
    #         'response' : {
    #             "predicted_data" : predicted_data,
    #             "presentDayData" : presentDayData,
    #             "city_name" : data[1],
    #             "city_station" : data[2],
    #             "country_code" : data[3],
    #             "plotImages" : plotImages
    #         }
    #     }

    # else:
    #     finalOut = {
    #         'code' : 404
    #     }

    # return finalOut


In [12]:
d = getCityData(city_name='Delhi')

In [31]:

class Sktime_forecast:
    def __init__(self, dataset) -> None:
        self.dataset = dataset
        self.horizon = 30
        # self.validation = False
        self.confidence = 0.9 
        self.frequency = "D"

        self.y_train = dataset[:-30]
        self.y_test = dataset.tail(30)

        self.fh_train = ForecastingHorizon(self.y_test.index, is_relative=False)

        #for present date
        present_date = datetime.now().date()
        #to start predictions from tomorrow
        present_date = str(present_date + timedelta(days=1)).split(' ')[0]

        self.fh_pred = ForecastingHorizon(pd.date_range(str(present_date), periods=30, freq='D'),is_relative=False)

    def getAccuracyMetrics(self, forecaster):
        print('Inside Accuracy Metrics')
        performance_metrics = {}
        for param in self.y_train:
            print(param)
            forecaster.fit(self.y_train[param])
            y_pred = forecaster.predict(self.fh_train)
            # ci = forecaster.predict_interval(fh, coverage=0.9).astype("float")
            y_true = self.dataset[param].tail(30)

            mae = mean_absolute_error(y_true, y_pred)
            mape = mean_absolute_percentage_error(y_true, y_pred)
            mse = mean_squared_error(y_true, y_pred)
            rmse = np.sqrt(mse)
            mspe = mean_squared_percentage_error(y_true, y_pred)    
            mape = median_absolute_percentage_error(y_true, y_pred)
            # r2_score_ = r2_score(y_true, y_pred)

            performance_metrics[param] = {
                'mae': mae,
                'mape': mape,
                'mse': mse,
                'rmse': rmse,
                'mspe': mspe,
                'mape': mape
            }

        return performance_metrics
    
    def getPredictions(self, forecaster):
        print('inside Prediction')
        predictions = {}
        
        for param in self.dataset:
            print(param)
            forecaster.fit(self.dataset[param])

            y_pred = forecaster.predict(self.fh_pred)

            # predictions[param] = y_pred

            for i in self.fh_pred:
                try:
                    predictions[i.strftime('%Y-%m-%d')][param] = y_pred[i]
                except:
                    predictions[i.strftime('%Y-%m-%d')] = {}
                    predictions[i.strftime('%Y-%m-%d')][param] = y_pred[i]


        return predictions
    

    def getAQIForecasts(self):
        
        forecasters = {
            'Prophet' : Prophet(yearly_seasonality=True, weekly_seasonality=True),
            'ExponentialSmoothening' : ExponentialSmoothing(trend="mul", seasonal="mul", sp=12),
            'AutoARIMA': AutoARIMA(sp=1, suppress_warnings=True)
        }

        forecasts = {}

        for forecaster_name, forecaster in forecasters.items():
            forecasts[forecaster_name] = {}
            forecasts[forecaster_name] = {}
            forecasts[forecaster_name]['accuracy_metrics'] = self.getAccuracyMetrics(forecaster)
            forecasts[forecaster_name]['predictions'] = self.getPredictions(forecaster)
        
        return forecasts
        
    """Loop over a time series dataframe, train an sktime forecasting model, and visualize the results.

    Args:
        dataset (pd.DataFrame): Input time series DataFrame with datetime index
        horizon (int): Forecast horizon
        forecaster (sktime.forecasting): Configured forecaster
        validation (bool, optional): . Defaults to False.
        confidence (float, optional): Confidence level. Defaults to 0.9.
        frequency (str, optional): . Defaults to "D".
    """

    # forecaster = Prophet(yearly_seasonality=True, weekly_seasonality=True)
    # # Adjust frequency of index(dates)
    # forecast_df = dataset.resample(rule=frequency).sum()
    # # Interpolate missing periods (if any)
    # forecast_df = forecast_df.interpolate(method="time")

    # all_parameters_values = {}

    # #to store plot images
    # plotImages = {}
    # for col in dataset.columns:
    #     # Use train/test split to validate forecaster
    #     if validation:
    #         df = forecast_df[col]

    #         y_train = df[:-horizon]
    #         y_test = df.tail(horizon)

    #         forecaster.fit(y_train)
    #         fh = ForecastingHorizon(y_test.index, is_relative=False)
    #         y_pred = forecaster.predict(fh)
    #         ci = forecaster.predict_interval(fh, coverage=confidence).astype("float")
    #         y_true = df.tail(horizon)

    #         # mae = mean_absolute_error(y_true, y_pred)

    #     # Make predictions beyond the dataset
    #     if not validation:
    #         df = forecast_df[col].dropna()
          
    #         forecaster.fit(df)

    #         #for present date            
    #         present_date = datetime.now().date()
    #         #to start predictions from tomorrow
    #         present_date = str(present_date + timedelta(days=1)).split(' ')[0]
    #         fh = ForecastingHorizon(
    #             pandas.date_range(str(present_date), periods=horizon, freq=frequency),
    #             is_relative=False,
    #         )

    #         y_pred = forecaster.predict(fh)
    #         ci = forecaster.predict_interval(fh, coverage=confidence).astype("float")
    #         # mae = np.nan

        # Visualize results
        # plt.plot(
        #     df.tail(horizon),
        #     label="Actual",h
        #     color="black",
        # )
        # plt.gca().fill_between(
        #     ci.index, (ci.iloc[:, 0]), (ci.iloc[:, 1]), color="b", alpha=0.1
        # )
        # print(y_pred)
        # plt.imshow(y_pred, cmap='hot', interpolation='nearest')
        # # plt.plot(y_pred, label="Predicted")
        # # plt.xticks(rotation=30, ha='right')
        # # # plt.title(
        # # #     f"{horizon} day forecast for {col} (mae: {round(mae, 2)}, confidence: {confidence*100}%)"
        # # # )
        # # plt.ylim(bottom=0)
        # # plt.legend()
        # # plt.grid(False)
        # plt.show()
        # print(y_pred)
            
        # # data = np.random.rand(10, 10)

        # # Create heatmap
        # # plt.imshow(data, cmap='hot', interpolation='nearest')
        # # plt.colorbar()  # Add color bar indicating the scale
        # # plt.show()

        # plt.figure(figsize=(8, 6))  # Adjust the figure size if needed
        # sns.heatmap(y_pred, annot=True, cmap='coolwarm', linewidths=.5)
        # plt.title('Heatmap of DataFrame')
        # plt.show()

        # buffer = BytesIO()
        # # plt.savefig(buffer, format='png')
        # buffer.seek(0)
        # image_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
        # buffer.close()

        # plotImages[col] = image_base64
        # print(image_base64)
        # print("Mean Absolute Error : ", mae)

        # try :
        #     temp = all_parameters_values['date']
        # except:
        #     all_parameters_values['date'] = [i.strftime("%d-%m-%Y") for i in fh]

    #     all_parameters_values[col] = y_pred.values
    

    
    # dates = [i.strftime("%d-%m-%Y") for i in fh]

    # predicted_data = {}
    # for date in range(len(dates)):
    #     temp = {}
    #     for param in all_parameters_values:
    #         temp[param] = all_parameters_values[param][date]
    #     predicted_data[dates[date]] = temp
    
    # return [predicted_data, plotImages]



In [48]:
forecast_df = d1.resample(rule='D').sum()
# print(forecast_df)
# forecast_df = forecast_df.interpolate(method="time")
forecast_df

TypeError: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'Index'

In [32]:
obj = Sktime_forecast(forecast_df)
forecaster = Prophet(yearly_seasonality=True, weekly_seasonality=True)
# obj.getAccuracyMetrics(forecaster)
# obj.getPredictions(forecaster)
obj.getAQIForecasts()

Inside Accuracy Metrics
pm2.5


18:17:44 - cmdstanpy - INFO - Chain [1] start processing
18:17:44 - cmdstanpy - INFO - Chain [1] done processing


pm10


18:17:45 - cmdstanpy - INFO - Chain [1] start processing
18:17:45 - cmdstanpy - INFO - Chain [1] done processing


o3


18:17:46 - cmdstanpy - INFO - Chain [1] start processing
18:17:46 - cmdstanpy - INFO - Chain [1] done processing


no2


18:17:46 - cmdstanpy - INFO - Chain [1] start processing
18:17:47 - cmdstanpy - INFO - Chain [1] done processing


so2


18:17:47 - cmdstanpy - INFO - Chain [1] start processing
18:17:47 - cmdstanpy - INFO - Chain [1] done processing


co


18:17:48 - cmdstanpy - INFO - Chain [1] start processing
18:17:48 - cmdstanpy - INFO - Chain [1] done processing


inside Prediction
pm2.5


18:17:49 - cmdstanpy - INFO - Chain [1] start processing
18:17:49 - cmdstanpy - INFO - Chain [1] done processing


pm10


18:17:49 - cmdstanpy - INFO - Chain [1] start processing
18:17:49 - cmdstanpy - INFO - Chain [1] done processing


o3


18:17:50 - cmdstanpy - INFO - Chain [1] start processing
18:17:50 - cmdstanpy - INFO - Chain [1] done processing


no2


18:17:51 - cmdstanpy - INFO - Chain [1] start processing
18:17:51 - cmdstanpy - INFO - Chain [1] done processing


so2


18:17:52 - cmdstanpy - INFO - Chain [1] start processing
18:17:52 - cmdstanpy - INFO - Chain [1] done processing


co


18:17:53 - cmdstanpy - INFO - Chain [1] start processing
18:17:53 - cmdstanpy - INFO - Chain [1] done processing


Inside Accuracy Metrics
pm2.5


  return err.T @ err


pm10


  return err.T @ err
  return err.T @ err


o3
no2


  return err.T @ err


so2


  return err.T @ err


co


  return err.T @ err


inside Prediction
pm2.5


  return err.T @ err


pm10


  return err.T @ err
  return err.T @ err


o3
no2


  return err.T @ err


so2


  return err.T @ err


co


  return err.T @ err


Inside Accuracy Metrics
pm2.5
pm10
o3
no2
so2
co
inside Prediction
pm2.5
pm10
o3
no2
so2
co


{'Prophet': {'accuracy_metrics': {'pm2.5': {'mae': 37.31928262952046,
    'mape': 0.22567229626325558,
    'mse': 1889.3322683034335,
    'rmse': 43.466449916037924,
    'mspe': 0.08744409462717434},
   'pm10': {'mae': 42.967188489581964,
    'mape': 0.2616942576340397,
    'mse': 2806.941339040315,
    'rmse': 52.98057511050928,
    'mspe': 0.1491707266556821},
   'o3': {'mae': 4.0101483885845095,
    'mape': 0.2891495923752062,
    'mse': 23.31097501552715,
    'rmse': 4.828144054968446,
    'mspe': 0.7078145154264786},
   'no2': {'mae': 14.663560721444169,
    'mape': 0.6671857049702068,
    'mse': 324.50779158469743,
    'rmse': 18.014099799454243,
    'mspe': 0.9596066366931848},
   'so2': {'mae': 0.8086225134644586,
    'mape': 0.20204896944041378,
    'mse': 0.9339002460026897,
    'rmse': 0.9663851437199817,
    'mspe': 0.26135606621678614},
   'co': {'mae': 1.9760008135560156,
    'mape': 0.14529840587433412,
    'mse': 7.070768585642813,
    'rmse': 2.659091684324332,
    'ms

In [28]:
obj.getPredictions(forecaster)

08:14:30 - cmdstanpy - INFO - Chain [1] start processing
08:14:30 - cmdstanpy - INFO - Chain [1] done processing
08:14:30 - cmdstanpy - INFO - Chain [1] start processing
08:14:31 - cmdstanpy - INFO - Chain [1] done processing
08:14:31 - cmdstanpy - INFO - Chain [1] start processing
08:14:31 - cmdstanpy - INFO - Chain [1] done processing
08:14:32 - cmdstanpy - INFO - Chain [1] start processing
08:14:32 - cmdstanpy - INFO - Chain [1] done processing
08:14:32 - cmdstanpy - INFO - Chain [1] start processing
08:14:33 - cmdstanpy - INFO - Chain [1] done processing
08:14:33 - cmdstanpy - INFO - Chain [1] start processing
08:14:33 - cmdstanpy - INFO - Chain [1] done processing


{'pm2.5': 2024-03-09    152.174033
 2024-03-10    140.279751
 2024-03-11    138.199461
 2024-03-12    142.791633
 2024-03-13    141.759507
 2024-03-14    138.607607
 2024-03-15    138.815134
 2024-03-16    133.514215
 2024-03-17    122.548945
 2024-03-18    121.553955
 2024-03-19    127.377326
 2024-03-20    127.708547
 2024-03-21    126.035375
 2024-03-22    127.817671
 2024-03-23    124.166148
 2024-03-24    114.901981
 2024-03-25    115.635960
 2024-03-26    123.192025
 2024-03-27    125.235836
 2024-03-28    125.232227
 2024-03-29    128.619597
 2024-03-30    126.489169
 2024-03-31    118.645031
 2024-04-01    120.683657
 2024-04-02    129.417705
 2024-04-03    132.504760
 2024-04-04    133.404867
 2024-04-05    137.554892
 2024-04-06    136.047656
 2024-04-07    128.691766
 Name: pm2.5, dtype: float64,
 'pm10': 2024-03-09    125.579727
 2024-03-10    113.684256
 2024-03-11    112.670790
 2024-03-12    125.373499
 2024-03-13    118.849417
 2024-03-14    127.769988
 2024-03-15    11

In [None]:
forecast_df = d.resample(rule='D').sum()    
forecast_df = forecast_df.interpolate(method="time")

df = forecast_df['pm2.5']

mean_value = df.mean()

# Replace zeros with mean value
df = df.replace(0, mean_value)

y_train = df[:-30]
y_test = df.tail(30)

# y_train, y_test = temporal_train_test_split(df, train_size=0.8)

In [33]:
import requests as re

r = re.get('http://127.0.0.1:8000/data/delhi')
print(r)

<Response [200]>


In [37]:
r.content

b'{"pm2.5":{"2024-03-12T00:00:00":167.0,"2024-03-11T00:00:00":164.0,"2024-03-10T00:00:00":134.0,"2024-03-09T00:00:00":145.0,"2024-03-08T00:00:00":165.0,"2024-03-07T00:00:00":148.0,"2024-03-06T00:00:00":136.0,"2024-03-05T00:00:00":124.0,"2024-03-04T00:00:00":125.0,"2024-03-03T00:00:00":102.0,"2024-03-02T00:00:00":181.0,"2024-03-01T00:00:00":150.0,"2024-02-29T00:00:00":145.0,"2024-02-28T00:00:00":149.0,"2024-02-27T00:00:00":161.0,"2024-02-26T00:00:00":155.0,"2024-02-25T00:00:00":159.0,"2024-02-24T00:00:00":138.0,"2024-02-23T00:00:00":142.0,"2024-02-22T00:00:00":187.0,"2024-02-21T00:00:00":200.0,"2024-02-20T00:00:00":179.0,"2024-02-19T00:00:00":188.0,"2024-02-18T00:00:00":211.0,"2024-02-17T00:00:00":203.0,"2024-02-16T00:00:00":218.0,"2024-02-15T00:00:00":263.0,"2024-02-14T00:00:00":316.0,"2024-02-13T00:00:00":238.0,"2024-02-12T00:00:00":254.0,"2024-02-11T00:00:00":284.0,"2024-02-10T00:00:00":175.0,"2024-02-09T00:00:00":160.0,"2024-02-08T00:00:00":163.0,"2024-02-07T00:00:00":161.0,"2024-02

In [49]:
d1 = pd.DataFrame(dict_data, index=0)

TypeError: Index(...) must be called with a collection of some kind, 0 was passed

In [44]:
d1

Unnamed: 0,pm2.5,pm10,o3,no2,so2,co
2024-03-12T00:00:00,167.0,122.0,3.0,16.0,4.0,11.0
2024-03-11T00:00:00,164.0,137.0,15.0,14.0,4.0,11.0
2024-03-10T00:00:00,134.0,128.0,13.0,15.0,4.0,11.0
2024-03-09T00:00:00,145.0,106.0,11.0,13.0,3.0,10.0
2024-03-08T00:00:00,165.0,115.0,10.0,14.0,3.0,10.0
...,...,...,...,...,...,...
2018-01-28T00:00:00,313.0,250.0,5.0,12.0,1.0,11.0
2018-01-27T00:00:00,268.0,330.0,6.0,13.0,2.0,13.0
2018-01-26T00:00:00,260.0,182.0,4.0,9.0,1.0,6.0
2018-01-25T00:00:00,274.0,207.0,2.0,17.0,2.0,8.0


In [42]:
dict_data

{'pm2.5': {'2024-03-12T00:00:00': 167.0,
  '2024-03-11T00:00:00': 164.0,
  '2024-03-10T00:00:00': 134.0,
  '2024-03-09T00:00:00': 145.0,
  '2024-03-08T00:00:00': 165.0,
  '2024-03-07T00:00:00': 148.0,
  '2024-03-06T00:00:00': 136.0,
  '2024-03-05T00:00:00': 124.0,
  '2024-03-04T00:00:00': 125.0,
  '2024-03-03T00:00:00': 102.0,
  '2024-03-02T00:00:00': 181.0,
  '2024-03-01T00:00:00': 150.0,
  '2024-02-29T00:00:00': 145.0,
  '2024-02-28T00:00:00': 149.0,
  '2024-02-27T00:00:00': 161.0,
  '2024-02-26T00:00:00': 155.0,
  '2024-02-25T00:00:00': 159.0,
  '2024-02-24T00:00:00': 138.0,
  '2024-02-23T00:00:00': 142.0,
  '2024-02-22T00:00:00': 187.0,
  '2024-02-21T00:00:00': 200.0,
  '2024-02-20T00:00:00': 179.0,
  '2024-02-19T00:00:00': 188.0,
  '2024-02-18T00:00:00': 211.0,
  '2024-02-17T00:00:00': 203.0,
  '2024-02-16T00:00:00': 218.0,
  '2024-02-15T00:00:00': 263.0,
  '2024-02-14T00:00:00': 316.0,
  '2024-02-13T00:00:00': 238.0,
  '2024-02-12T00:00:00': 254.0,
  '2024-02-11T00:00:00': 284.0,

In [41]:
import json

# Step 1: Decode the bytes into a string
string_data = r.content.decode('utf-8')

# Step 2: Parse the string into a dictionary
dict_data = json.loads(string_data)