In [1]:
import math
import itertools

import pandas as pd
from sqlalchemy.sql import select

from predict_aqi.load_data import (
    AirLocation, get_db_session, load_air_location_data, load_measurement_data,
    load_nearby_location_measurement_data
)

# Select some airlocation with bad aqi
session = get_db_session()
s = select([AirLocation]).where(AirLocation.aqi > 100)
some_airlocation_id = session.execute(s).scalar()
some_airlocation = load_air_location_data(some_airlocation_id)

print("{}, {}".format(some_airlocation.short_name, some_airlocation.en_country_name))

measurements = load_measurement_data(some_airlocation_id)

  (attype, name))


Mudanjiang, China


In [2]:
# Select classifier to use
from sklearn.neural_network import MLPRegressor
from predict_aqi.model import generate_AQI_inputs_and_outputs
from predict_aqi import config


indices_ahead_to_predict = range(2, config.NUMBER_AHEAD_TO_PREDICT + 2, 2)
indices_behind_to_use = range(1, 250)
df, input_columns, output_columns = generate_AQI_inputs_and_outputs(
    measurements, indices_behind_to_use, indices_ahead_to_predict
)

In [3]:
# Define functions for generate_predictions method

from predict_aqi.transform_data import generate_time_inputs


def first_step_format_inputs_outputs(all_data):
    return all_data, input_columns, output_columns

 
def first_step_split_function(all_data, input_columns, output_columns):
    # Split by the first 90% of data. That means we're our testing set is extrapolation.
    row_count = all_data.count()[0]
    split_row = int(round(row_count * 0.90))
    # x_train, y_train, x_test, y_test
    return (all_data[input_columns][:split_row], 
            all_data[output_columns][:split_row],
            all_data[input_columns][split_row:],
            all_data[output_columns][split_row:])


def second_step_format_inputs_outputs(all_data):
    all_data, time_columns = generate_time_inputs(all_data)
    first_step_prediction_columns = ['{}_ahead_first_step_pred'.format(str(i)) 
                                     for i in indices_ahead_to_predict]
    second_input_columns = list(itertools.chain(time_columns, first_step_prediction_columns))
    # return the right columns
    return all_data, second_input_columns, output_columns

second_step_split_function = first_step_split_function

In [6]:
from predict_aqi.model import generate_predictions
from sklearn.metrics import mean_absolute_error
import copy


row_count = df.count()[0]
split_row = int(round(row_count * 0.90))
largest_input_columns = copy.deepcopy(input_columns)

errors = {}

for indices_behind_to_use_index in range(12, 250, 12):
    input_columns = largest_input_columns[:indices_behind_to_use_index]
    first_step_regressor = MLPRegressor()
    second_step_regressor = MLPRegressor()
    this_iteration_df = df.copy(deep=True)
    this_iteration_df = generate_predictions(
        this_iteration_df,
        first_step_format_inputs_outputs,
        first_step_split_function,
        second_step_format_inputs_outputs,
        second_step_split_function,
        first_step_regressor,
        second_step_regressor,
        indices_ahead_to_predict,
    )
    this_iteration_error_dict = {'first_step': {}, 'second_step': {}}
    for i in range(12, 48, 12):
        this_iteration_error_dict['first_step'][i] = mean_absolute_error(
            this_iteration_df['{}_ahead_AQI'.format(i)][:split_row], 
            this_iteration_df['{}_ahead_first_step_pred'.format(i)][:split_row]
        )
        this_iteration_error_dict['second_step'][i] = mean_absolute_error(
            this_iteration_df['{}_ahead_AQI'.format(i)][:split_row], 
            this_iteration_df['{}_ahead_second_step_pred'.format(i)][:split_row]
        )
    errors[indices_behind_to_use_index] = this_iteration_error_dict
    del this_iteration_df
    

Step 1
Training MLPRegressor...


Done!
Training time (secs): 0.578
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.022
Step 2


Training MLPRegressor...


Done!
Training time (secs): 0.707
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.024
Step 1


Training MLPRegressor...


Done!
Training time (secs): 0.586
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.023
Step 2


Training MLPRegressor...


Done!
Training time (secs): 0.821
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.033
Step 1


Training MLPRegressor...


Done!
Training time (secs): 0.545
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.022
Step 2


Training MLPRegressor...


Done!
Training time (secs): 0.943
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.028
Step 1


Training MLPRegressor...


Done!
Training time (secs): 0.675
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.029
Step 2


Training MLPRegressor...


Done!
Training time (secs): 1.065
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.024
Step 1


Training MLPRegressor...


Done!
Training time (secs): 0.496
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.023
Step 2


Training MLPRegressor...


Done!
Training time (secs): 1.015
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.025
Step 1


Training MLPRegressor...


Done!
Training time (secs): 0.477
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.022
Step 2


Training MLPRegressor...


Done!
Training time (secs): 1.130
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.025
Step 1


Training MLPRegressor...


Done!
Training time (secs): 0.489
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.023
Step 2


Training MLPRegressor...


Done!
Training time (secs): 1.022
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.024
Step 1


Training MLPRegressor...


Done!
Training time (secs): 0.572
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.022
Step 2


Training MLPRegressor...


Done!
Training time (secs): 0.765
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.025
Step 1


Training MLPRegressor...


Done!
Training time (secs): 0.489
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.023
Step 2


Training MLPRegressor...


Done!
Training time (secs): 0.925
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.024
Step 1


Training MLPRegressor...


Done!
Training time (secs): 0.480
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.023
Step 2


Training MLPRegressor...


Done!
Training time (secs): 0.823
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.024
Step 1


Training MLPRegressor...


Done!
Training time (secs): 0.491
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.023
Step 2


Training MLPRegressor...


Done!
Training time (secs): 0.828
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.024
Step 1


Training MLPRegressor...


Done!
Training time (secs): 0.484
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.022
Step 2


Training MLPRegressor...


Done!
Training time (secs): 0.815
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.024
Step 1


Training MLPRegressor...


Done!
Training time (secs): 0.501
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.023
Step 2


Training MLPRegressor...


Done!
Training time (secs): 0.807
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.025
Step 1


Training MLPRegressor...


Done!
Training time (secs): 0.486
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.022
Step 2


Training MLPRegressor...


Done!
Training time (secs): 0.820
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.024
Step 1


Training MLPRegressor...


Done!
Training time (secs): 0.493
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.023
Step 2


Training MLPRegressor...


Done!
Training time (secs): 0.800
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.025
Step 1


Training MLPRegressor...


Done!
Training time (secs): 0.482
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.023
Step 2


Training MLPRegressor...


Done!
Training time (secs): 0.705
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.024
Step 1


Training MLPRegressor...


Done!
Training time (secs): 0.495
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.022
Step 2


Training MLPRegressor...


Done!
Training time (secs): 0.813
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.024
Step 1


Training MLPRegressor...


Done!
Training time (secs): 0.577
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.037
Step 2


Training MLPRegressor...


Done!
Training time (secs): 0.809
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.024
Step 1


Training MLPRegressor...


Done!
Training time (secs): 0.507
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.032
Step 2


Training MLPRegressor...


Done!
Training time (secs): 0.697
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.024
Step 1


Training MLPRegressor...


Done!
Training time (secs): 0.481
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.022
Step 2


Training MLPRegressor...


Done!
Training time (secs): 0.898
Predicting values using MLPRegressor...
Done!
Prediction time (secs): 0.024


In [10]:
error_indices = range(12, 48, 12)
first_step_errors = {
    i: error_indices[i]['first_step'].values()
    for i in error_indices
}

second_step_errors = {
    i: error_indices[i]['second_step'].values()
    for i in error_indices
}

96
{'second_step': {24: 0.059711159459524141, 36: 0.062978334565501864, 12: 0.054065315569793188}, 'first_step': {24: 0.066430080484686585, 36: 0.07423218095167658, 12: 0.056408194986850117}}
144
{'second_step': {24: 0.056212840401468199, 36: 0.058767488372728778, 12: 0.052156765757485297}, 'first_step': {24: 0.066082710382869336, 36: 0.073131436180919676, 12: 0.056225640550654737}}
240
{'second_step': {24: 0.05785388402123659, 36: 0.060659361342043291, 12: 0.051469550124866974}, 'first_step': {24: 0.067461082725609259, 36: 0.074637444343475218, 12: 0.057797879612618355}}
180
{'second_step': {24: 0.056877905490638321, 36: 0.06016619665697804, 12: 0.051606645589273727}, 'first_step': {24: 0.065933290497265679, 36: 0.073083761840362735, 12: 0.056378903259939772}}
132
{'second_step': {24: 0.056511919224299607, 36: 0.058698036440408968, 12: 0.051468268045121543}, 'first_step': {24: 0.06697457944909456, 36: 0.072050860547980031, 12: 0.056336590617269186}}
204
{'second_step': {24: 0.05867132

In [None]:
import matplotlib.pyplot as plt
for distance in first_step_errors:
    plt.scatter(
        range(12, 50, 12), 
        y=first_step_errors[distance], 
        label="{}hrs input first step error".format(distance // 30)
    )
    plt.scatter(
        range(12, 50, 12), 
        y=second_step_errors[distance], 
        label="{}hrs input second step error".format(distance // 30)
    )
#fig = aqi_graph.figure
#fig.set_size_inches((15, 10), forward=True)
#fig.suptitle("AQI actual and predictions (all of test split)", fontsize=20)
#aqi_graph.axis([0.0, 130000000.0, 0.0, 250.0])