In [1]:
import pandas as pd

In [2]:
from sktime.classification.deep_learning.cnn import CNNClassifier
from sktime.datasets import load_unit_test
df_train, df_train_y= load_unit_test(split="train")
df_test, df_test_y = load_unit_test(split="test")


In [3]:
df_train['class']=df_train_y
df_test['class']=df_test_y

In [4]:
from pypelines import ts_classification_pipeline as pipe

In [5]:
tsc = pipe.TSClassificationPipeline(data=df_train,
                                    target_column='class',
                                    models=['ProximityForest'],
                                    test_data=df_test,positive_class='2')


In [6]:
tsc.code_to_clipboard()

In [7]:

from sktime import *
from sklearn.metrics import accuracy_score


import pandas as pd
import numpy as np


# target dataframe: df_train
target = "class"
features = list(df_train.columns.drop("class"))

# train test split
X_train = df_train[features]
y_train = df_train[target]

X_test = df_test[features]
y_test = df_test[target]

model_comparison_list = []

##### End of Data Processing Pipeline #####


##### Model Pipeline for ProximityForest #####

from sktime.classification.distance_based import ProximityForest
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score,make_scorer,f1_score,precision_score,recall_score,roc_auc_score,roc_curve,auc
import matplotlib.pyplot as plt
ProximityForest_param_grid = {
"n_estimators": np.arange(10, 100, 100),
"max_depth": np.arange(10, 20, 10),
}

ProximityForest_model = ProximityForest()

# Create the grid search
ProximityForest_grid_search = GridSearchCV(estimator=ProximityForest_model, param_grid=ProximityForest_param_grid, cv=3, scoring=make_scorer(accuracy_score), verbose=3)
ProximityForest_grid_search.fit(X_train, y_train)

# Get the best hyperparameters
ProximityForest_best_estimator = ProximityForest_grid_search.best_estimator_

# Store results as a dataframe  
ProximityForest_search_results = pd.DataFrame(ProximityForest_grid_search.cv_results_)

# Generate Predictions
ProximityForest_predictions = pd.DataFrame(ProximityForest_best_estimator.predict(X_test))
ProximityForest_predictions_prob = ProximityForest_best_estimator.predict_proba(X_test)
ProximityForest_predictions_prob_df = pd.DataFrame()
ProximityForest_predictions_prob_df[ProximityForest_grid_search.classes_[0]] = ProximityForest_predictions_prob[:,0]
ProximityForest_predictions_prob_df[ProximityForest_grid_search.classes_[1]] = ProximityForest_predictions_prob[:,1] 


# Generate Model Metrics
ProximityForest_accuracy = accuracy_score(y_test, ProximityForest_predictions.iloc[:,0])
ProximityForest_f1_score = f1_score(y_test, ProximityForest_predictions.iloc[:,0],pos_label='2')
ProximityForest_precision = precision_score(y_test, ProximityForest_predictions.iloc[:,0],pos_label='2')
ProximityForest_recall = recall_score(y_test, ProximityForest_predictions.iloc[:,0],pos_label='2')
ProximityForest_roc_auc_score = roc_auc_score(y_test, ProximityForest_predictions_prob_df[ProximityForest_grid_search.classes_[1]])
ProximityForest_performance_metrics = [['ProximityForest','accuracy',ProximityForest_accuracy], 
                                  ['ProximityForest','f1_score',ProximityForest_f1_score],
                                  ['ProximityForest','precision', ProximityForest_precision],
                                  ['ProximityForest','recall', ProximityForest_recall],
                                  ['ProximityForest','roc_auc_score', ProximityForest_roc_auc_score]]
ProximityForest_performance_metrics = pd.DataFrame(ProximityForest_performance_metrics, columns=['model','metric', 'value'])
fpr, tpr, thresholds = roc_curve(y_test, ProximityForest_predictions_prob_df[ProximityForest_grid_search.classes_[1]],pos_label='2')
roc_auc = auc(fpr, tpr)

# ROC Curve plot
ProximityForest_roc_auc_plot, ProximityForest_roc_auc_plot_ax = plt.subplots()
ProximityForest_roc_auc_plot_ax.plot(fpr, tpr, label=f'ROC curve (AUC = {roc_auc:.4f})')
ProximityForest_roc_auc_plot_ax.plot([0, 1], [0, 1], 'r--', label='Random guess')
# Set axis labels and title
ProximityForest_roc_auc_plot_ax.set_xlabel('False Positive Rate')
ProximityForest_roc_auc_plot_ax.set_ylabel('True Positive Rate')
ProximityForest_roc_auc_plot_ax.set_title(f'ProximityForest ROC Curve')
# Add legend
ProximityForest_roc_auc_plot_ax.legend()


print(ProximityForest_performance_metrics[ProximityForest_performance_metrics['metric'] == 'roc_auc_score'])
model_comparison_list.append(ProximityForest_performance_metrics)##### End of Model Pipeline for ProximityForest #####
##### Model Comparison #####
table = pd.concat(model_comparison_list)
table = table.sort_values(by=['value'], ascending=False)
table = table[table['metric'] == 'roc_auc_score']
print(table)
print(f"The best model is {table['model'].iloc[0]} with {table['value'].iloc[0]} as {table['metric'].iloc[0]}")

# Predict test data using the best model
test_predictions = eval(table['model'].iloc[0]+"_best_estimator").predict(X_test)
print('Predictions from best model are stored in test_predictions')


Fitting 3 folds for each of 1 candidates, totalling 3 fits


Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_classification.py", line 113, in _check_targets
    unique_values = np.union1d(y_true, y_pred)
  File "<__array_function__ internals>", line 180, in union1d
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/numpy/lib/arraysetops.py", line 781, in union1d
    return unique(np.concatenate((ar1, ar2), axis=None))
  File "<__array_function__ internals>", line 180, in unique
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/numpy/lib/arraysetops.py", line 274, in unique
    ret = _unique1d(ar, return_index, return_inverse, return_counts,
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/numpy/lib/arraysetops.py", line 336, in _unique1d
    ar.sort()
TypeError: '<' not supported between instances of 'int' and 'str'

The above exception was the direct

[CV 1/3] END .......max_depth=10, n_estimators=10;, score=nan total time=  35.0s


Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_classification.py", line 113, in _check_targets
    unique_values = np.union1d(y_true, y_pred)
  File "<__array_function__ internals>", line 180, in union1d
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/numpy/lib/arraysetops.py", line 781, in union1d
    return unique(np.concatenate((ar1, ar2), axis=None))
  File "<__array_function__ internals>", line 180, in unique
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/numpy/lib/arraysetops.py", line 274, in unique
    ret = _unique1d(ar, return_index, return_inverse, return_counts,
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/numpy/lib/arraysetops.py", line 336, in _unique1d
    ar.sort()
TypeError: '<' not supported between instances of 'int' and 'str'

The above exception was the direct

[CV 2/3] END .......max_depth=10, n_estimators=10;, score=nan total time=  42.2s


Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_classification.py", line 113, in _check_targets
    unique_values = np.union1d(y_true, y_pred)
  File "<__array_function__ internals>", line 180, in union1d
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/numpy/lib/arraysetops.py", line 781, in union1d
    return unique(np.concatenate((ar1, ar2), axis=None))
  File "<__array_function__ internals>", line 180, in unique
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/numpy/lib/arraysetops.py", line 274, in unique
    ret = _unique1d(ar, return_index, return_inverse, return_counts,
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/numpy/lib/arraysetops.py", line 336, in _unique1d
    ar.sort()
TypeError: '<' not supported between instances of 'int' and 'str'

The above exception was the direct

[CV 3/3] END .......max_depth=10, n_estimators=10;, score=nan total time= 1.2min


TypeError: Labels in y_true and y_pred should be of the same type. Got y_true=['1' '2'] and y_pred=[0 1]. Make sure that the predictions provided by the classifier coincides with the true labels.