Clone the respository

In [None]:
!git clone https://github.com/Zerve-AI/pypelines.git

Installing the pypeline

In [2]:
import os
folder = ''
os.chdir(f'{folder}/pypelines')

In [None]:
!pip install .

LIST OF MODELS

MODELS FOR TIME SERIES REGRESSION PROBLEM

In [None]:
import pypelines.ts_regression_pipeline as pipe
from pypelines import utils


utils.list_supported_models(model_type='timeseries_regression')

TIME SERIES REGRESSION

Loading the library

In [52]:
import pypelines.ts_regression_pipeline as pipe
from pypelines import utils
import pandas as pd
from sktime.datasets import load_unit_test

# Load the dataset
data_tuple = load_unit_test()
X_train, y_train = data_tuple

# Convert the data to a DataFrame
X_train_df = pd.DataFrame(X_train)
y_train_df = pd.Series(y_train, name="target")

# Concatenate features and target column
df = pd.concat([X_train_df, y_train_df], axis=1)

SINGLE TIME SERIES REGRESSION

Data Load and Model Selection

In [66]:
ts_reg_pypelines_all = pipe.TSRegressionPipeline(data = df, test_data = df, models = ['CNN'], target_column = 'target', nfolds = 3)

Default Hyperparameters

In [67]:
ts_reg_pypelines_all.get_hyperparameters()
ts_reg_pypelines_all.code_to_clipboard()

Model tranining code generation

Training code for single time series regression model

In [None]:

from sktime import *
from sklearn.metrics import accuracy_score


import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')


# target dataframe: df
target = "target"
features = list(df.columns.drop("target"))

# train test split
X_train = df[features]
y_train = df[target]

X_test = df[features]
y_test = df[target]

model_comparison_list = []

##### End of Data Processing Pipeline #####


##### Model Pipeline for CNN #####

from sktime.regression.deep_learning.cnn import CNNRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error,make_scorer,r2_score,explained_variance_score
import matplotlib.pyplot as plt
CNNRegressor_param_grid = {
"n_epochs": np.arange(100, 2000, 1000),
}

CNNRegressor_model = CNNRegressor()

# Create the grid search
CNNRegressor_grid_search = GridSearchCV(estimator=CNNRegressor_model, param_grid=CNNRegressor_param_grid, cv=3, scoring=make_scorer(accuracy_score), verbose=3)
CNNRegressor_grid_search.fit(X_train, y_train)

# Get the best hyperparameters
CNNRegressor_best_estimator = CNNRegressor_grid_search.best_estimator_

# Store results as a dataframe  
CNNRegressor_search_results = pd.DataFrame(CNNRegressor_grid_search.cv_results_)


# Generate Predictions
CNNRegressor_predictions = CNNRegressor_best_estimator.predict(X_test)
CNNRegressor_predictions_df = pd.DataFrame(CNNRegressor_best_estimator.predict(X_test))

# Generate Model Metrics
CNNRegressor_r2_score = r2_score(y_test, CNNRegressor_predictions_df.iloc[:,0])
CNNRegressor_mean_squared_error = mean_squared_error(y_test, CNNRegressor_predictions_df.iloc[:,0])
CNNRegressor_explained_variance_score = explained_variance_score(y_test, CNNRegressor_predictions_df.iloc[:,0])
CNNRegressor_performance_metrics = [['CNNRegressor','r2_score', CNNRegressor_r2_score], 
                                  ['CNNRegressor','mean_squared_error',CNNRegressor_mean_squared_error],
                                  ['CNNRegressor','explained_variance_score', CNNRegressor_explained_variance_score]]
CNNRegressor_performance_metrics = pd.DataFrame(CNNRegressor_performance_metrics, columns=['model','metric', 'value'])

# Generate Actual vs Predicted Plot
CNNRegressor_actual_predicted_plot, CNNRegressor_actual_predicted_plot_ax = plt.subplots()
CNNRegressor_actual_predicted_plot = CNNRegressor_actual_predicted_plot_ax.scatter(x=y_test, y=CNNRegressor_predictions_df.iloc[:,0], alpha=0.5)
# Add diagonal line
CNNRegressor_actual_predicted_plot_ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', alpha=0.5)
# Set axis labels and title
CNNRegressor_actual_predicted_plot_ax.set_xlabel('Actual')
CNNRegressor_actual_predicted_plot_ax.set_ylabel('Predicted')
CNNRegressor_actual_predicted_plot_ax.set_title(f'CNNRegressor Actual vs. Predicted')
plt.show(block=False)

# Generate Decile Lift Chart
# Calculate the deciles based on the residuals
CNNRegressor_deciles = np.percentile(CNNRegressor_predictions, np.arange(0, 100, 10))
# Calculate the mean actual and predicted values for each decile
CNNRegressor_mean_actual = []
CNNRegressor_mean_predicted = []
for i in range(len(CNNRegressor_deciles) - 1):
    mask = (CNNRegressor_predictions >= CNNRegressor_deciles[i]) & (CNNRegressor_predictions < CNNRegressor_deciles[i + 1])
    CNNRegressor_mean_actual.append(np.mean(y_test[mask]))
    CNNRegressor_mean_predicted.append(np.mean(CNNRegressor_predictions[mask]))

# Create a bar chart of the mean actual and predicted values for each decile
CNNRegressor_lift_plot, CNNRegressor_lift_plot_ax = plt.subplots()
CNNRegressor_lift_plot_ax.bar(np.arange(len(CNNRegressor_mean_actual)), CNNRegressor_mean_actual, label='Actual')
CNNRegressor_lift_plot_ax.plot(np.arange(len(CNNRegressor_mean_predicted)), CNNRegressor_mean_predicted, color='red', linewidth=2, label='Predicted')
CNNRegressor_lift_plot_ax.set_xlabel('Deciles')
CNNRegressor_lift_plot_ax.set_ylabel('Mean')
CNNRegressor_lift_plot_ax.set_title(f'CNNRegressor Decile Analysis Chart')
CNNRegressor_lift_plot_ax.legend()
plt.show(block=False)


model_comparison_list.append(CNNRegressor_performance_metrics)##### End of Model Pipeline for CNN #####
##### Model Comparison #####
table = pd.concat(model_comparison_list)
table = table.sort_values(by=['value'], ascending=False)
table = table[table['metric'] == 'r2_score']
print(table)
print(f"The best model is {table['model'].iloc[0]} with {table['value'].iloc[0]} as {table['metric'].iloc[0]}")

# Predict test data using the best model
test_predictions = eval(table['model'].iloc[0]+"_best_estimator").predict(X_test)
print('Predictions from best model are stored in test_predictions')


Multiple Time Series Regression

In [57]:
ts_reg_pypelines_all = pipe.TSRegressionPipeline(data = df, test_data = df, models = ['CNN', 'KNN'], target_column = 'target', nfolds = 3)

Default Hyperparameters

In [None]:
ts_reg_pypelines_all.get_hyperparameters()

Model tranining code generation

In [59]:
ts_reg_pypelines_all.code_to_clipboard()

Training code for multiple time series regression model

In [None]:

from sktime import *
from sklearn.metrics import accuracy_score


import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')


# target dataframe: df
target = "target"
features = list(df.columns.drop("target"))

# train test split
X_train = df[features]
y_train = df[target]

X_test = df[features]
y_test = df[target]

model_comparison_list = []

##### End of Data Processing Pipeline #####


##### Model Pipeline for CNN #####

from sktime.regression.deep_learning.cnn import CNNRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error,make_scorer,r2_score,explained_variance_score
import matplotlib.pyplot as plt
CNNRegressor_param_grid = {
"n_epochs": np.arange(100, 2000, 1000),
}

CNNRegressor_model = CNNRegressor()

# Create the grid search
CNNRegressor_grid_search = GridSearchCV(estimator=CNNRegressor_model, param_grid=CNNRegressor_param_grid, cv=3, scoring=make_scorer(accuracy_score), verbose=3)
CNNRegressor_grid_search.fit(X_train, y_train)

# Get the best hyperparameters
CNNRegressor_best_estimator = CNNRegressor_grid_search.best_estimator_

# Store results as a dataframe  
CNNRegressor_search_results = pd.DataFrame(CNNRegressor_grid_search.cv_results_)


# Generate Predictions
CNNRegressor_predictions = CNNRegressor_best_estimator.predict(X_test)
CNNRegressor_predictions_df = pd.DataFrame(CNNRegressor_best_estimator.predict(X_test))

# Generate Model Metrics
CNNRegressor_r2_score = r2_score(y_test, CNNRegressor_predictions_df.iloc[:,0])
CNNRegressor_mean_squared_error = mean_squared_error(y_test, CNNRegressor_predictions_df.iloc[:,0])
CNNRegressor_explained_variance_score = explained_variance_score(y_test, CNNRegressor_predictions_df.iloc[:,0])
CNNRegressor_performance_metrics = [['CNNRegressor','r2_score', CNNRegressor_r2_score], 
                                  ['CNNRegressor','mean_squared_error',CNNRegressor_mean_squared_error],
                                  ['CNNRegressor','explained_variance_score', CNNRegressor_explained_variance_score]]
CNNRegressor_performance_metrics = pd.DataFrame(CNNRegressor_performance_metrics, columns=['model','metric', 'value'])

# Generate Actual vs Predicted Plot
CNNRegressor_actual_predicted_plot, CNNRegressor_actual_predicted_plot_ax = plt.subplots()
CNNRegressor_actual_predicted_plot = CNNRegressor_actual_predicted_plot_ax.scatter(x=y_test, y=CNNRegressor_predictions_df.iloc[:,0], alpha=0.5)
# Add diagonal line
CNNRegressor_actual_predicted_plot_ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', alpha=0.5)
# Set axis labels and title
CNNRegressor_actual_predicted_plot_ax.set_xlabel('Actual')
CNNRegressor_actual_predicted_plot_ax.set_ylabel('Predicted')
CNNRegressor_actual_predicted_plot_ax.set_title(f'CNNRegressor Actual vs. Predicted')
plt.show(block=False)

# Generate Decile Lift Chart
# Calculate the deciles based on the residuals
CNNRegressor_deciles = np.percentile(CNNRegressor_predictions, np.arange(0, 100, 10))
# Calculate the mean actual and predicted values for each decile
CNNRegressor_mean_actual = []
CNNRegressor_mean_predicted = []
for i in range(len(CNNRegressor_deciles) - 1):
    mask = (CNNRegressor_predictions >= CNNRegressor_deciles[i]) & (CNNRegressor_predictions < CNNRegressor_deciles[i + 1])
    CNNRegressor_mean_actual.append(np.mean(y_test[mask]))
    CNNRegressor_mean_predicted.append(np.mean(CNNRegressor_predictions[mask]))

# Create a bar chart of the mean actual and predicted values for each decile
CNNRegressor_lift_plot, CNNRegressor_lift_plot_ax = plt.subplots()
CNNRegressor_lift_plot_ax.bar(np.arange(len(CNNRegressor_mean_actual)), CNNRegressor_mean_actual, label='Actual')
CNNRegressor_lift_plot_ax.plot(np.arange(len(CNNRegressor_mean_predicted)), CNNRegressor_mean_predicted, color='red', linewidth=2, label='Predicted')
CNNRegressor_lift_plot_ax.set_xlabel('Deciles')
CNNRegressor_lift_plot_ax.set_ylabel('Mean')
CNNRegressor_lift_plot_ax.set_title(f'CNNRegressor Decile Analysis Chart')
CNNRegressor_lift_plot_ax.legend()
plt.show(block=False)


model_comparison_list.append(CNNRegressor_performance_metrics)##### End of Model Pipeline for CNN #####
##### Model Pipeline for KNN #####

from sktime.regression.distance_based import KNeighborsTimeSeriesRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error,make_scorer,r2_score,explained_variance_score
import matplotlib.pyplot as plt
KNeighborsTimeSeriesRegressor_param_grid = {
"n_neighbors": np.arange(1, 10, 2),
"distance": ['euclidean'],
"weights": ['uniform'],
"algorithm": ['brute'],
}

KNeighborsTimeSeriesRegressor_model = KNeighborsTimeSeriesRegressor()

# Create the grid search
KNeighborsTimeSeriesRegressor_grid_search = GridSearchCV(estimator=KNeighborsTimeSeriesRegressor_model, param_grid=KNeighborsTimeSeriesRegressor_param_grid, cv=3, scoring=make_scorer(accuracy_score), verbose=3)
KNeighborsTimeSeriesRegressor_grid_search.fit(X_train, y_train)

# Get the best hyperparameters
KNeighborsTimeSeriesRegressor_best_estimator = KNeighborsTimeSeriesRegressor_grid_search.best_estimator_

# Store results as a dataframe  
KNeighborsTimeSeriesRegressor_search_results = pd.DataFrame(KNeighborsTimeSeriesRegressor_grid_search.cv_results_)


# Generate Predictions
KNeighborsTimeSeriesRegressor_predictions = KNeighborsTimeSeriesRegressor_best_estimator.predict(X_test)
KNeighborsTimeSeriesRegressor_predictions_df = pd.DataFrame(KNeighborsTimeSeriesRegressor_best_estimator.predict(X_test))

# Generate Model Metrics
KNeighborsTimeSeriesRegressor_r2_score = r2_score(y_test, KNeighborsTimeSeriesRegressor_predictions_df.iloc[:,0])
KNeighborsTimeSeriesRegressor_mean_squared_error = mean_squared_error(y_test, KNeighborsTimeSeriesRegressor_predictions_df.iloc[:,0])
KNeighborsTimeSeriesRegressor_explained_variance_score = explained_variance_score(y_test, KNeighborsTimeSeriesRegressor_predictions_df.iloc[:,0])
KNeighborsTimeSeriesRegressor_performance_metrics = [['KNeighborsTimeSeriesRegressor','r2_score', KNeighborsTimeSeriesRegressor_r2_score], 
                                  ['KNeighborsTimeSeriesRegressor','mean_squared_error',KNeighborsTimeSeriesRegressor_mean_squared_error],
                                  ['KNeighborsTimeSeriesRegressor','explained_variance_score', KNeighborsTimeSeriesRegressor_explained_variance_score]]
KNeighborsTimeSeriesRegressor_performance_metrics = pd.DataFrame(KNeighborsTimeSeriesRegressor_performance_metrics, columns=['model','metric', 'value'])

# Generate Actual vs Predicted Plot
KNeighborsTimeSeriesRegressor_actual_predicted_plot, KNeighborsTimeSeriesRegressor_actual_predicted_plot_ax = plt.subplots()
KNeighborsTimeSeriesRegressor_actual_predicted_plot = KNeighborsTimeSeriesRegressor_actual_predicted_plot_ax.scatter(x=y_test, y=KNeighborsTimeSeriesRegressor_predictions_df.iloc[:,0], alpha=0.5)
# Add diagonal line
KNeighborsTimeSeriesRegressor_actual_predicted_plot_ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', alpha=0.5)
# Set axis labels and title
KNeighborsTimeSeriesRegressor_actual_predicted_plot_ax.set_xlabel('Actual')
KNeighborsTimeSeriesRegressor_actual_predicted_plot_ax.set_ylabel('Predicted')
KNeighborsTimeSeriesRegressor_actual_predicted_plot_ax.set_title(f'KNeighborsTimeSeriesRegressor Actual vs. Predicted')
plt.show(block=False)

# Generate Decile Lift Chart
# Calculate the deciles based on the residuals
KNeighborsTimeSeriesRegressor_deciles = np.percentile(KNeighborsTimeSeriesRegressor_predictions, np.arange(0, 100, 10))
# Calculate the mean actual and predicted values for each decile
KNeighborsTimeSeriesRegressor_mean_actual = []
KNeighborsTimeSeriesRegressor_mean_predicted = []
for i in range(len(KNeighborsTimeSeriesRegressor_deciles) - 1):
    mask = (KNeighborsTimeSeriesRegressor_predictions >= KNeighborsTimeSeriesRegressor_deciles[i]) & (KNeighborsTimeSeriesRegressor_predictions < KNeighborsTimeSeriesRegressor_deciles[i + 1])
    KNeighborsTimeSeriesRegressor_mean_actual.append(np.mean(y_test[mask]))
    KNeighborsTimeSeriesRegressor_mean_predicted.append(np.mean(KNeighborsTimeSeriesRegressor_predictions[mask]))

# Create a bar chart of the mean actual and predicted values for each decile
KNeighborsTimeSeriesRegressor_lift_plot, KNeighborsTimeSeriesRegressor_lift_plot_ax = plt.subplots()
KNeighborsTimeSeriesRegressor_lift_plot_ax.bar(np.arange(len(KNeighborsTimeSeriesRegressor_mean_actual)), KNeighborsTimeSeriesRegressor_mean_actual, label='Actual')
KNeighborsTimeSeriesRegressor_lift_plot_ax.plot(np.arange(len(KNeighborsTimeSeriesRegressor_mean_predicted)), KNeighborsTimeSeriesRegressor_mean_predicted, color='red', linewidth=2, label='Predicted')
KNeighborsTimeSeriesRegressor_lift_plot_ax.set_xlabel('Deciles')
KNeighborsTimeSeriesRegressor_lift_plot_ax.set_ylabel('Mean')
KNeighborsTimeSeriesRegressor_lift_plot_ax.set_title(f'KNeighborsTimeSeriesRegressor Decile Analysis Chart')
KNeighborsTimeSeriesRegressor_lift_plot_ax.legend()
plt.show(block=False)


model_comparison_list.append(KNeighborsTimeSeriesRegressor_performance_metrics)##### End of Model Pipeline for KNN #####
##### Model Comparison #####
table = pd.concat(model_comparison_list)
table = table.sort_values(by=['value'], ascending=False)
table = table[table['metric'] == 'r2_score']
print(table)
print(f"The best model is {table['model'].iloc[0]} with {table['value'].iloc[0]} as {table['metric'].iloc[0]}")

# Predict test data using the best model
test_predictions = eval(table['model'].iloc[0]+"_best_estimator").predict(X_test)
print('Predictions from best model are stored in test_predictions')


TIME SERIES REGRESSION MODEL - DEFAULT RUN

In [61]:
ts_reg_pypelines_all = pipe.TSRegressionPipeline(data = df, test_data = df, target_column = 'target', nfolds = 3)

Default Hyperparameters

In [62]:
ts_reg_pypelines_all.get_hyperparameters()
ts_reg_pypelines_all.model_list()

['CNN', 'KNN', 'TAPNET', 'DUMMY']


Model tranining code generation for time series regression default run

In [64]:
ts_reg_pypelines_all.code_to_clipboard()

In [None]:

from sktime import *
from sklearn.metrics import accuracy_score


import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')


# target dataframe: df
target = "target"
features = list(df.columns.drop("target"))

# train test split
X_train = df[features]
y_train = df[target]

X_test = df[features]
y_test = df[target]

model_comparison_list = []

##### End of Data Processing Pipeline #####


##### Model Pipeline for CNN #####

from sktime.regression.deep_learning.cnn import CNNRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error,make_scorer,r2_score,explained_variance_score
import matplotlib.pyplot as plt
CNNRegressor_param_grid = {
"n_epochs": np.arange(100, 2000, 1000),
}

CNNRegressor_model = CNNRegressor()

# Create the grid search
CNNRegressor_grid_search = GridSearchCV(estimator=CNNRegressor_model, param_grid=CNNRegressor_param_grid, cv=3, scoring=make_scorer(accuracy_score), verbose=3)
CNNRegressor_grid_search.fit(X_train, y_train)

# Get the best hyperparameters
CNNRegressor_best_estimator = CNNRegressor_grid_search.best_estimator_

# Store results as a dataframe  
CNNRegressor_search_results = pd.DataFrame(CNNRegressor_grid_search.cv_results_)


# Generate Predictions
CNNRegressor_predictions = CNNRegressor_best_estimator.predict(X_test)
CNNRegressor_predictions_df = pd.DataFrame(CNNRegressor_best_estimator.predict(X_test))

# Generate Model Metrics
CNNRegressor_r2_score = r2_score(y_test, CNNRegressor_predictions_df.iloc[:,0])
CNNRegressor_mean_squared_error = mean_squared_error(y_test, CNNRegressor_predictions_df.iloc[:,0])
CNNRegressor_explained_variance_score = explained_variance_score(y_test, CNNRegressor_predictions_df.iloc[:,0])
CNNRegressor_performance_metrics = [['CNNRegressor','r2_score', CNNRegressor_r2_score], 
                                  ['CNNRegressor','mean_squared_error',CNNRegressor_mean_squared_error],
                                  ['CNNRegressor','explained_variance_score', CNNRegressor_explained_variance_score]]
CNNRegressor_performance_metrics = pd.DataFrame(CNNRegressor_performance_metrics, columns=['model','metric', 'value'])

# Generate Actual vs Predicted Plot
CNNRegressor_actual_predicted_plot, CNNRegressor_actual_predicted_plot_ax = plt.subplots()
CNNRegressor_actual_predicted_plot = CNNRegressor_actual_predicted_plot_ax.scatter(x=y_test, y=CNNRegressor_predictions_df.iloc[:,0], alpha=0.5)
# Add diagonal line
CNNRegressor_actual_predicted_plot_ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', alpha=0.5)
# Set axis labels and title
CNNRegressor_actual_predicted_plot_ax.set_xlabel('Actual')
CNNRegressor_actual_predicted_plot_ax.set_ylabel('Predicted')
CNNRegressor_actual_predicted_plot_ax.set_title(f'CNNRegressor Actual vs. Predicted')
plt.show(block=False)

# Generate Decile Lift Chart
# Calculate the deciles based on the residuals
CNNRegressor_deciles = np.percentile(CNNRegressor_predictions, np.arange(0, 100, 10))
# Calculate the mean actual and predicted values for each decile
CNNRegressor_mean_actual = []
CNNRegressor_mean_predicted = []
for i in range(len(CNNRegressor_deciles) - 1):
    mask = (CNNRegressor_predictions >= CNNRegressor_deciles[i]) & (CNNRegressor_predictions < CNNRegressor_deciles[i + 1])
    CNNRegressor_mean_actual.append(np.mean(y_test[mask]))
    CNNRegressor_mean_predicted.append(np.mean(CNNRegressor_predictions[mask]))

# Create a bar chart of the mean actual and predicted values for each decile
CNNRegressor_lift_plot, CNNRegressor_lift_plot_ax = plt.subplots()
CNNRegressor_lift_plot_ax.bar(np.arange(len(CNNRegressor_mean_actual)), CNNRegressor_mean_actual, label='Actual')
CNNRegressor_lift_plot_ax.plot(np.arange(len(CNNRegressor_mean_predicted)), CNNRegressor_mean_predicted, color='red', linewidth=2, label='Predicted')
CNNRegressor_lift_plot_ax.set_xlabel('Deciles')
CNNRegressor_lift_plot_ax.set_ylabel('Mean')
CNNRegressor_lift_plot_ax.set_title(f'CNNRegressor Decile Analysis Chart')
CNNRegressor_lift_plot_ax.legend()
plt.show(block=False)


model_comparison_list.append(CNNRegressor_performance_metrics)##### End of Model Pipeline for CNN #####
##### Model Pipeline for KNN #####

from sktime.regression.distance_based import KNeighborsTimeSeriesRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error,make_scorer,r2_score,explained_variance_score
import matplotlib.pyplot as plt
KNeighborsTimeSeriesRegressor_param_grid = {
"n_neighbors": np.arange(1, 10, 2),
"distance": ['euclidean'],
"weights": ['uniform'],
"algorithm": ['brute'],
}

KNeighborsTimeSeriesRegressor_model = KNeighborsTimeSeriesRegressor()

# Create the grid search
KNeighborsTimeSeriesRegressor_grid_search = GridSearchCV(estimator=KNeighborsTimeSeriesRegressor_model, param_grid=KNeighborsTimeSeriesRegressor_param_grid, cv=3, scoring=make_scorer(accuracy_score), verbose=3)
KNeighborsTimeSeriesRegressor_grid_search.fit(X_train, y_train)

# Get the best hyperparameters
KNeighborsTimeSeriesRegressor_best_estimator = KNeighborsTimeSeriesRegressor_grid_search.best_estimator_

# Store results as a dataframe  
KNeighborsTimeSeriesRegressor_search_results = pd.DataFrame(KNeighborsTimeSeriesRegressor_grid_search.cv_results_)


# Generate Predictions
KNeighborsTimeSeriesRegressor_predictions = KNeighborsTimeSeriesRegressor_best_estimator.predict(X_test)
KNeighborsTimeSeriesRegressor_predictions_df = pd.DataFrame(KNeighborsTimeSeriesRegressor_best_estimator.predict(X_test))

# Generate Model Metrics
KNeighborsTimeSeriesRegressor_r2_score = r2_score(y_test, KNeighborsTimeSeriesRegressor_predictions_df.iloc[:,0])
KNeighborsTimeSeriesRegressor_mean_squared_error = mean_squared_error(y_test, KNeighborsTimeSeriesRegressor_predictions_df.iloc[:,0])
KNeighborsTimeSeriesRegressor_explained_variance_score = explained_variance_score(y_test, KNeighborsTimeSeriesRegressor_predictions_df.iloc[:,0])
KNeighborsTimeSeriesRegressor_performance_metrics = [['KNeighborsTimeSeriesRegressor','r2_score', KNeighborsTimeSeriesRegressor_r2_score], 
                                  ['KNeighborsTimeSeriesRegressor','mean_squared_error',KNeighborsTimeSeriesRegressor_mean_squared_error],
                                  ['KNeighborsTimeSeriesRegressor','explained_variance_score', KNeighborsTimeSeriesRegressor_explained_variance_score]]
KNeighborsTimeSeriesRegressor_performance_metrics = pd.DataFrame(KNeighborsTimeSeriesRegressor_performance_metrics, columns=['model','metric', 'value'])

# Generate Actual vs Predicted Plot
KNeighborsTimeSeriesRegressor_actual_predicted_plot, KNeighborsTimeSeriesRegressor_actual_predicted_plot_ax = plt.subplots()
KNeighborsTimeSeriesRegressor_actual_predicted_plot = KNeighborsTimeSeriesRegressor_actual_predicted_plot_ax.scatter(x=y_test, y=KNeighborsTimeSeriesRegressor_predictions_df.iloc[:,0], alpha=0.5)
# Add diagonal line
KNeighborsTimeSeriesRegressor_actual_predicted_plot_ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', alpha=0.5)
# Set axis labels and title
KNeighborsTimeSeriesRegressor_actual_predicted_plot_ax.set_xlabel('Actual')
KNeighborsTimeSeriesRegressor_actual_predicted_plot_ax.set_ylabel('Predicted')
KNeighborsTimeSeriesRegressor_actual_predicted_plot_ax.set_title(f'KNeighborsTimeSeriesRegressor Actual vs. Predicted')
plt.show(block=False)

# Generate Decile Lift Chart
# Calculate the deciles based on the residuals
KNeighborsTimeSeriesRegressor_deciles = np.percentile(KNeighborsTimeSeriesRegressor_predictions, np.arange(0, 100, 10))
# Calculate the mean actual and predicted values for each decile
KNeighborsTimeSeriesRegressor_mean_actual = []
KNeighborsTimeSeriesRegressor_mean_predicted = []
for i in range(len(KNeighborsTimeSeriesRegressor_deciles) - 1):
    mask = (KNeighborsTimeSeriesRegressor_predictions >= KNeighborsTimeSeriesRegressor_deciles[i]) & (KNeighborsTimeSeriesRegressor_predictions < KNeighborsTimeSeriesRegressor_deciles[i + 1])
    KNeighborsTimeSeriesRegressor_mean_actual.append(np.mean(y_test[mask]))
    KNeighborsTimeSeriesRegressor_mean_predicted.append(np.mean(KNeighborsTimeSeriesRegressor_predictions[mask]))

# Create a bar chart of the mean actual and predicted values for each decile
KNeighborsTimeSeriesRegressor_lift_plot, KNeighborsTimeSeriesRegressor_lift_plot_ax = plt.subplots()
KNeighborsTimeSeriesRegressor_lift_plot_ax.bar(np.arange(len(KNeighborsTimeSeriesRegressor_mean_actual)), KNeighborsTimeSeriesRegressor_mean_actual, label='Actual')
KNeighborsTimeSeriesRegressor_lift_plot_ax.plot(np.arange(len(KNeighborsTimeSeriesRegressor_mean_predicted)), KNeighborsTimeSeriesRegressor_mean_predicted, color='red', linewidth=2, label='Predicted')
KNeighborsTimeSeriesRegressor_lift_plot_ax.set_xlabel('Deciles')
KNeighborsTimeSeriesRegressor_lift_plot_ax.set_ylabel('Mean')
KNeighborsTimeSeriesRegressor_lift_plot_ax.set_title(f'KNeighborsTimeSeriesRegressor Decile Analysis Chart')
KNeighborsTimeSeriesRegressor_lift_plot_ax.legend()
plt.show(block=False)


model_comparison_list.append(KNeighborsTimeSeriesRegressor_performance_metrics)##### End of Model Pipeline for KNN #####
##### Model Pipeline for TAPNET #####

from sktime.regression.deep_learning.tapnet import TapNetRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error,make_scorer,r2_score,explained_variance_score
import matplotlib.pyplot as plt
TapNetRegressor_param_grid = {
"n_epochs": np.arange(100, 2000, 1000),
"use_lstm": ['True'],
"use_cnn": ['True'],
"use_rp": ['True'],
"use_att": ['True'],
}

TapNetRegressor_model = TapNetRegressor()

# Create the grid search
TapNetRegressor_grid_search = GridSearchCV(estimator=TapNetRegressor_model, param_grid=TapNetRegressor_param_grid, cv=3, scoring=make_scorer(accuracy_score), verbose=3)
TapNetRegressor_grid_search.fit(X_train, y_train)

# Get the best hyperparameters
TapNetRegressor_best_estimator = TapNetRegressor_grid_search.best_estimator_

# Store results as a dataframe  
TapNetRegressor_search_results = pd.DataFrame(TapNetRegressor_grid_search.cv_results_)


# Generate Predictions
TapNetRegressor_predictions = TapNetRegressor_best_estimator.predict(X_test)
TapNetRegressor_predictions_df = pd.DataFrame(TapNetRegressor_best_estimator.predict(X_test))

# Generate Model Metrics
TapNetRegressor_r2_score = r2_score(y_test, TapNetRegressor_predictions_df.iloc[:,0])
TapNetRegressor_mean_squared_error = mean_squared_error(y_test, TapNetRegressor_predictions_df.iloc[:,0])
TapNetRegressor_explained_variance_score = explained_variance_score(y_test, TapNetRegressor_predictions_df.iloc[:,0])
TapNetRegressor_performance_metrics = [['TapNetRegressor','r2_score', TapNetRegressor_r2_score], 
                                  ['TapNetRegressor','mean_squared_error',TapNetRegressor_mean_squared_error],
                                  ['TapNetRegressor','explained_variance_score', TapNetRegressor_explained_variance_score]]
TapNetRegressor_performance_metrics = pd.DataFrame(TapNetRegressor_performance_metrics, columns=['model','metric', 'value'])

# Generate Actual vs Predicted Plot
TapNetRegressor_actual_predicted_plot, TapNetRegressor_actual_predicted_plot_ax = plt.subplots()
TapNetRegressor_actual_predicted_plot = TapNetRegressor_actual_predicted_plot_ax.scatter(x=y_test, y=TapNetRegressor_predictions_df.iloc[:,0], alpha=0.5)
# Add diagonal line
TapNetRegressor_actual_predicted_plot_ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', alpha=0.5)
# Set axis labels and title
TapNetRegressor_actual_predicted_plot_ax.set_xlabel('Actual')
TapNetRegressor_actual_predicted_plot_ax.set_ylabel('Predicted')
TapNetRegressor_actual_predicted_plot_ax.set_title(f'TapNetRegressor Actual vs. Predicted')
plt.show(block=False)

# Generate Decile Lift Chart
# Calculate the deciles based on the residuals
TapNetRegressor_deciles = np.percentile(TapNetRegressor_predictions, np.arange(0, 100, 10))
# Calculate the mean actual and predicted values for each decile
TapNetRegressor_mean_actual = []
TapNetRegressor_mean_predicted = []
for i in range(len(TapNetRegressor_deciles) - 1):
    mask = (TapNetRegressor_predictions >= TapNetRegressor_deciles[i]) & (TapNetRegressor_predictions < TapNetRegressor_deciles[i + 1])
    TapNetRegressor_mean_actual.append(np.mean(y_test[mask]))
    TapNetRegressor_mean_predicted.append(np.mean(TapNetRegressor_predictions[mask]))

# Create a bar chart of the mean actual and predicted values for each decile
TapNetRegressor_lift_plot, TapNetRegressor_lift_plot_ax = plt.subplots()
TapNetRegressor_lift_plot_ax.bar(np.arange(len(TapNetRegressor_mean_actual)), TapNetRegressor_mean_actual, label='Actual')
TapNetRegressor_lift_plot_ax.plot(np.arange(len(TapNetRegressor_mean_predicted)), TapNetRegressor_mean_predicted, color='red', linewidth=2, label='Predicted')
TapNetRegressor_lift_plot_ax.set_xlabel('Deciles')
TapNetRegressor_lift_plot_ax.set_ylabel('Mean')
TapNetRegressor_lift_plot_ax.set_title(f'TapNetRegressor Decile Analysis Chart')
TapNetRegressor_lift_plot_ax.legend()
plt.show(block=False)


model_comparison_list.append(TapNetRegressor_performance_metrics)##### End of Model Pipeline for TAPNET #####
##### Model Pipeline for DUMMY #####

from sktime.regression.dummy import DummyRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error,make_scorer,r2_score,explained_variance_score
import matplotlib.pyplot as plt
DummyRegressor_param_grid = {
"strategy": ['prior'],
}

DummyRegressor_model = DummyRegressor()

# Create the grid search
DummyRegressor_grid_search = GridSearchCV(estimator=DummyRegressor_model, param_grid=DummyRegressor_param_grid, cv=3, scoring=make_scorer(accuracy_score), verbose=3)
DummyRegressor_grid_search.fit(X_train, y_train)

# Get the best hyperparameters
DummyRegressor_best_estimator = DummyRegressor_grid_search.best_estimator_

# Store results as a dataframe  
DummyRegressor_search_results = pd.DataFrame(DummyRegressor_grid_search.cv_results_)


# Generate Predictions
DummyRegressor_predictions = DummyRegressor_best_estimator.predict(X_test)
DummyRegressor_predictions_df = pd.DataFrame(DummyRegressor_best_estimator.predict(X_test))

# Generate Model Metrics
DummyRegressor_r2_score = r2_score(y_test, DummyRegressor_predictions_df.iloc[:,0])
DummyRegressor_mean_squared_error = mean_squared_error(y_test, DummyRegressor_predictions_df.iloc[:,0])
DummyRegressor_explained_variance_score = explained_variance_score(y_test, DummyRegressor_predictions_df.iloc[:,0])
DummyRegressor_performance_metrics = [['DummyRegressor','r2_score', DummyRegressor_r2_score], 
                                  ['DummyRegressor','mean_squared_error',DummyRegressor_mean_squared_error],
                                  ['DummyRegressor','explained_variance_score', DummyRegressor_explained_variance_score]]
DummyRegressor_performance_metrics = pd.DataFrame(DummyRegressor_performance_metrics, columns=['model','metric', 'value'])

# Generate Actual vs Predicted Plot
DummyRegressor_actual_predicted_plot, DummyRegressor_actual_predicted_plot_ax = plt.subplots()
DummyRegressor_actual_predicted_plot = DummyRegressor_actual_predicted_plot_ax.scatter(x=y_test, y=DummyRegressor_predictions_df.iloc[:,0], alpha=0.5)
# Add diagonal line
DummyRegressor_actual_predicted_plot_ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', alpha=0.5)
# Set axis labels and title
DummyRegressor_actual_predicted_plot_ax.set_xlabel('Actual')
DummyRegressor_actual_predicted_plot_ax.set_ylabel('Predicted')
DummyRegressor_actual_predicted_plot_ax.set_title(f'DummyRegressor Actual vs. Predicted')
plt.show(block=False)

# Generate Decile Lift Chart
# Calculate the deciles based on the residuals
DummyRegressor_deciles = np.percentile(DummyRegressor_predictions, np.arange(0, 100, 10))
# Calculate the mean actual and predicted values for each decile
DummyRegressor_mean_actual = []
DummyRegressor_mean_predicted = []
for i in range(len(DummyRegressor_deciles) - 1):
    mask = (DummyRegressor_predictions >= DummyRegressor_deciles[i]) & (DummyRegressor_predictions < DummyRegressor_deciles[i + 1])
    DummyRegressor_mean_actual.append(np.mean(y_test[mask]))
    DummyRegressor_mean_predicted.append(np.mean(DummyRegressor_predictions[mask]))

# Create a bar chart of the mean actual and predicted values for each decile
DummyRegressor_lift_plot, DummyRegressor_lift_plot_ax = plt.subplots()
DummyRegressor_lift_plot_ax.bar(np.arange(len(DummyRegressor_mean_actual)), DummyRegressor_mean_actual, label='Actual')
DummyRegressor_lift_plot_ax.plot(np.arange(len(DummyRegressor_mean_predicted)), DummyRegressor_mean_predicted, color='red', linewidth=2, label='Predicted')
DummyRegressor_lift_plot_ax.set_xlabel('Deciles')
DummyRegressor_lift_plot_ax.set_ylabel('Mean')
DummyRegressor_lift_plot_ax.set_title(f'DummyRegressor Decile Analysis Chart')
DummyRegressor_lift_plot_ax.legend()
plt.show(block=False)


model_comparison_list.append(DummyRegressor_performance_metrics)##### End of Model Pipeline for DUMMY #####
##### Model Comparison #####
table = pd.concat(model_comparison_list)
table = table.sort_values(by=['value'], ascending=False)
table = table[table['metric'] == 'r2_score']
print(table)
print(f"The best model is {table['model'].iloc[0]} with {table['value'].iloc[0]} as {table['metric'].iloc[0]}")

# Predict test data using the best model
test_predictions = eval(table['model'].iloc[0]+"_best_estimator").predict(X_test)
print('Predictions from best model are stored in test_predictions')
