In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from tqdm.notebook import tqdm
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, balanced_accuracy_score
from sklearn.utils import all_estimators

In [3]:
#set this to the root directory of the project
#path_root_dir="./"
data = pd.read_csv("C:/Users/ASUS/OneDrive/DataScience/FINAL-PROJECT/data-science-Optimal-EV-station-placement-main/main_raw/data/processed/all_city_data_with_pop.csv")

In [4]:
data.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,geometry,parking,edges,EV_stations,parking_space,civic,restaurant,park,...,cinema,library,commercial,retail,townhall,government,residential,city,population,Berlin_data_onlycenter_
0,0,0,"POLYGON ((8.4727605 50.099822499999995, 8.4730...",0,0,0,0,0,0,0,...,0,0,0,0,0,0.0,0,Frankfurt,9.014051,
1,1,1,"POLYGON ((8.4775730092433 50.10302720327834, 8...",0,0,0,0,0,0,0,...,0,0,0,0,0,0.0,0,Frankfurt,0.0,
2,2,2,"POLYGON ((8.479750879173663 50.09863320231676,...",0,0,0,0,0,0,0,...,0,0,0,0,0,0.0,0,Frankfurt,9.014051,
3,3,3,"POLYGON ((8.479688060978736 50.10443297769501,...",0,0,0,0,0,0,0,...,0,0,0,0,0,0.0,0,Frankfurt,9.014051,
4,4,4,"POLYGON ((8.47965547981383 50.107440331063444,...",0,0,0,0,0,0,0,...,0,0,0,0,0,0.0,0,Frankfurt,0.0,


In [5]:
#filtering out columsn to be used for modeling
data = data[['geometry','city','EV_stations', 'parking', 'edges',
        'parking_space', 'civic', 'restaurant', 'park', 'school',
       'node', 'Community_centre', 'place_of_worship', 'university', 'cinema',
       'library', 'commercial', 'retail', 'townhall', 'government',
       'residential', 'population']]
print("data size:" , data.shape)
data = data.dropna()
print("data size after dropping na:" , data.shape)
data.head()

data size: (10824, 22)
data size after dropping na: (10129, 22)


Unnamed: 0,geometry,city,EV_stations,parking,edges,parking_space,civic,restaurant,park,school,...,place_of_worship,university,cinema,library,commercial,retail,townhall,government,residential,population
0,"POLYGON ((8.4727605 50.099822499999995, 8.4730...",Frankfurt,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0.0,0,9.014051
1,"POLYGON ((8.4775730092433 50.10302720327834, 8...",Frankfurt,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0.0,0,0.0
2,"POLYGON ((8.479750879173663 50.09863320231676,...",Frankfurt,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0.0,0,9.014051
3,"POLYGON ((8.479688060978736 50.10443297769501,...",Frankfurt,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0.0,0,9.014051
4,"POLYGON ((8.47965547981383 50.107440331063444,...",Frankfurt,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0.0,0,0.0


In [6]:
def data_splitter(data, train_cities=None, test_cities=None, test_size=0.2, random_state=42):

    if train_cities is not None:
        train = data[data['city'].isin(train_cities)]
        test = data[data['city'].isin(test_cities)]


        X_train = train.drop(['city','geometry', 'EV_stations'], axis=1)
        y_train = train['EV_stations'].astype(int)
        y_train = y_train.apply(lambda x: 1 if x > 0 else 0)

        X_test = test.drop(['city','geometry', 'EV_stations'], axis=1)
        y_test = test['EV_stations'].astype(int)
        y_test = y_test.apply(lambda x: 1 if x > 0 else 0)
    else:
        X = data.drop(['city','geometry', "EV_stations"], axis=1)
        y = data['EV_stations']
        y = y.apply(lambda x: 1 if x > 0 else 0)
        X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=test_size, random_state=random_state)

    return X_train, X_test, y_train, y_test

In [7]:
X_train, X_test, y_train, y_test = data_splitter(data)

In [8]:
# logistic regression
logreg = LogisticRegression()
logreg.fit(X_train, y_train)
print("Logistic Regression Test Accuracy: ", logreg.score(X_test, y_test))
# classification report
y_pred = logreg.predict(X_test)
print(classification_report(y_test, y_pred))

Logistic Regression Test Accuracy:  0.8973346495557749
              precision    recall  f1-score   support

           0       0.91      0.98      0.94      1786
           1       0.64      0.31      0.42       240

    accuracy                           0.90      2026
   macro avg       0.77      0.64      0.68      2026
weighted avg       0.88      0.90      0.88      2026



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [9]:
# Get all classification model classes
classifiers = all_estimators(type_filter='classifier')

# Initialize result table
results = []
models = {}
# Run models and collect results
for name, ClassifierClass in tqdm(classifiers):
    try:
        # Initialize model
        model = ClassifierClass()
        model.fit(X_train, y_train)
        models[name] = model
        y_pred = model.predict(X_test)
        
        # Calculate metrics
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='macro')
        recall = recall_score(y_test, y_pred, average='macro')
        f1 = f1_score(y_test, y_pred, average='macro')
        auc = roc_auc_score(y_test, y_pred)
        balanced_accuracy = balanced_accuracy_score(y_test, y_pred)
        
        # Append results
        results.append([name, accuracy, precision, recall, f1, auc, balanced_accuracy])
    except Exception as e:
        print(f"Error occurred for {name}: {str(e)}")

# Create a DataFrame from results
results_df = pd.DataFrame(results, columns=["Model", "Accuracy", "Precision", "Recall", "F1-score", "AUC", "Balanced Accuracy"])
results_df = results_df.sort_values(by=['F1-score', 'AUC'], ascending=False)
print(results_df)


  0%|          | 0/41 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


Error occurred for CategoricalNB: index 7 is out of bounds for axis 1 with size 7
Error occurred for ClassifierChain: __init__() missing 1 required positional argument: 'base_estimator'


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  probabilities /= normalizer
  probabilities /= normalizer
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Error occurred for MultiOutputClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for NuSVC: specified nu is infeasible
Error occurred for OneVsOneClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OneVsRestClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OutputCodeClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for RadiusNeighborsClassifier: No neighbors found for test samples array([   0,    1,    2, ..., 2022, 2023, 2025], dtype=int64), you can try using larger radius, giving a label for outliers, or considering removing them from your dataset.
Error occurred for StackingClassifier: __init__() missing 1 required positional argument: 'estimators'
Error occurred for VotingClassifier: __init__() missing 1 required positional argument: 'estimators'
                             Model  Accuracy  Precision    Recall  F1-scor

array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [10]:
def run_experiment(X_train, X_test, y_train, y_test):
    # Get all classification model classes
    classifiers = all_estimators(type_filter='classifier')

    # Initialize result table
    results = []
    models = {}
    # Run models and collect results
    for name, ClassifierClass in tqdm(classifiers):
        try:
            # Initialize model
            model = ClassifierClass()
            model.fit(X_train, y_train)
            models[name] = model
            y_pred = model.predict(X_test)
            
            # Calculate metrics
            accuracy = accuracy_score(y_test, y_pred)
            precision = precision_score(y_test, y_pred, average='macro')
            recall = recall_score(y_test, y_pred, average='macro')
            f1 = f1_score(y_test, y_pred, average='macro')
            auc = roc_auc_score(y_test, y_pred)
            balanced_accuracy = balanced_accuracy_score(y_test, y_pred)
            
            # Append results
            results.append([name, accuracy, precision, recall, f1, auc, balanced_accuracy])
        except Exception as e:
            print(f"Error occurred for {name}: {str(e)}")

    # Create a DataFrame from results
    results_df = pd.DataFrame(results, columns=["Model", "Accuracy", "Precision", "Recall", "F1-score", "AUC", "Balanced Accuracy"])
    results_df = results_df.sort_values(by=['F1-score', 'AUC'], ascending=False)
    return results_df, models


In [17]:
result_df, models = run_experiment(X_train, X_test, y_train, y_test)

  0%|          | 0/41 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Error occurred for CategoricalNB: index 47 is out of bounds for axis 1 with size 33
Error occurred for ClassifierChain: _BaseChain.__init__() missing 1 required positional argument: 'base_estimator'


  probabilities /= normalizer
  probabilities /= normalizer
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Pleas

Error occurred for MultiOutputClassifier: MultiOutputClassifier.__init__() missing 1 required positional argument: 'estimator'
Error occurred for NuSVC: specified nu is infeasible
Error occurred for OneVsOneClassifier: OneVsOneClassifier.__init__() missing 1 required positional argument: 'estimator'
Error occurred for OneVsRestClassifier: OneVsRestClassifier.__init__() missing 1 required positional argument: 'estimator'
Error occurred for OutputCodeClassifier: OutputCodeClassifier.__init__() missing 1 required positional argument: 'estimator'
Error occurred for RadiusNeighborsClassifier: No neighbors found for test samples array([  0,   2,   3,   8,  11,  12,  14,  16,  17,  19,  21,  22,  24,
        26,  27,  28,  29,  30,  33,  35,  36,  37,  38,  39,  40,  43,
        44,  45,  46,  47,  48,  49,  50,  54,  56,  57,  58,  59,  60,
        61,  62,  63,  64,  65,  66,  67,  68,  72,  73,  74,  75,  77,
        78,  79,  80,  81,  82,  83,  84,  86,  87,  89,  90,  91,  92,
        9

  _warn_prf(average, modifier, msg_start, len(result))


In [12]:
results_df

Unnamed: 0,Model,Accuracy,Precision,Recall,F1-score,AUC,Balanced Accuracy
12,HistGradientBoostingClassifier,0.900296,0.767657,0.705403,0.73058,0.705403,0.705403
0,AdaBoostClassifier,0.900296,0.768031,0.7036,0.729434,0.7036,0.7036
11,GradientBoostingClassifier,0.898815,0.764618,0.693743,0.721253,0.693743,0.693743
26,RandomForestClassifier,0.903258,0.785986,0.685442,0.720676,0.685442,0.685442
8,ExtraTreesClassifier,0.903258,0.786779,0.683639,0.719382,0.683639,0.683639
1,BaggingClassifier,0.896841,0.758818,0.68541,0.713322,0.68541,0.68541
16,LinearDiscriminantAnalysis,0.894373,0.750447,0.680403,0.707106,0.680403,0.680403
9,GaussianNB,0.866239,0.687214,0.707727,0.696568,0.707727,0.707727
21,MultinomialNB,0.872162,0.693752,0.693052,0.693401,0.693052,0.693052
4,ComplementNB,0.855874,0.674136,0.712668,0.689998,0.712668,0.712668


In [14]:
results_df.to_csv("C:/Users/ASUS/OneDrive/DataScience/FINAL-PROJECT/data-science-Optimal-EV-station-placement-main/main_raw/data/results/all_cities_random_shuffle.csv", index=False)

In [15]:
"""
Berlin, Munich, Stuttgart, Frankfurt: Big CITY EXP-1
Kalsruhe, trier, saarbrucken, mainz: EXP-2
"""

# EXP-1
big_cities = ['Berlin', 'Munich', 'Stuttgart', 'Frankfurt']
small_cities = ['Karlsruhe', 'Trier', 'Saarbrücken', 'Mainz']


# make a table in the end to summarise the results of all experiments

# big cities splited in trian and test where only one big city is test and all possible combinations for this
for city in tqdm(big_cities):
    test_cities = [city]
    train_cities = [x for x in big_cities if x != city]
    X_train, X_test, y_train, y_test = data_splitter(data, train_cities=train_cities, test_cities=test_cities)
    results_df, models = run_experiment(X_train, X_test, y_train, y_test)
    results_df.to_csv(f"C:/Users/ASUS/OneDrive/DataScience/FINAL-PROJECT/data-science-Optimal-EV-station-placement-main/main_raw/data/results/big_cities_test_city_{city}_.csv", index=False)
    



# small cities splited in trian and test where only one small city is test and all possible combinations for this
for city in tqdm(small_cities):
    test_cities = [city]
    train_cities = [x for x in small_cities if x != city]
    X_train, X_test, y_train, y_test = data_splitter(data, train_cities=train_cities, test_cities=test_cities)
    results_df, models = run_experiment(X_train, X_test, y_train, y_test)
    results_df.to_csv(f"C:/Users/ASUS/OneDrive/DataScience/FINAL-PROJECT/data-science-Optimal-EV-station-placement-main/main_raw/data/results/small_cities_test_city_{city}_.csv", index=False)

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/41 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


Error occurred for CategoricalNB: index 51 is out of bounds for axis 1 with size 51
Error occurred for ClassifierChain: __init__() missing 1 required positional argument: 'base_estimator'


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  probabilities /= normalizer
  probabilities /= normalizer
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Error occurred for MultiOutputClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for NuSVC: specified nu is infeasible
Error occurred for OneVsOneClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OneVsRestClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OutputCodeClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for RadiusNeighborsClassifier: No neighbors found for test samples array([   2,    9,   17, ..., 3877, 3878, 3879], dtype=int64), you can try using larger radius, giving a label for outliers, or considering removing them from your dataset.
Error occurred for StackingClassifier: __init__() missing 1 required positional argument: 'estimators'
Error occurred for VotingClassifier: __init__() missing 1 required positional argument: 'estimators'


  0%|          | 0/41 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


Error occurred for CategoricalNB: index 53 is out of bounds for axis 1 with size 48
Error occurred for ClassifierChain: __init__() missing 1 required positional argument: 'base_estimator'


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  probabilities /= normalizer
  probabilities /= normalizer
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Error occurred for MultiOutputClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for NuSVC: specified nu is infeasible
Error occurred for OneVsOneClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OneVsRestClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OutputCodeClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for RadiusNeighborsClassifier: No neighbors found for test samples array([  15,   17,   22, ..., 1409, 1410, 1420], dtype=int64), you can try using larger radius, giving a label for outliers, or considering removing them from your dataset.
Error occurred for StackingClassifier: __init__() missing 1 required positional argument: 'estimators'
Error occurred for VotingClassifier: __init__() missing 1 required positional argument: 'estimators'


  0%|          | 0/41 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Error occurred for CategoricalNB: index 13 is out of bounds for axis 1 with size 10
Error occurred for ClassifierChain: __init__() missing 1 required positional argument: 'base_estimator'


  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  probabilities /= normalizer
  probabilities /= normalizer
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_i

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Error occurred for MultiOutputClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for NuSVC: specified nu is infeasible
Error occurred for OneVsOneClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OneVsRestClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OutputCodeClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for RadiusNeighborsClassifier: No neighbors found for test samples array([ 11,  20,  21,  23,  27,  30,  31,  32,  39,  40,  41,  42,  43,
        44,  45,  46,  49,  50,  51,  52,  54,  55,  56,  57,  58,  59,
        64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  77,
        79,  80,  81,  82,  84,  85,  86,  95,  98,  99, 100, 101, 102,
       103, 104, 106, 107, 108, 109, 110, 111, 112, 119, 122, 123, 124,
       125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137,
       138, 139, 14

  0%|          | 0/41 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


Error occurred for CategoricalNB: index 12 is out of bounds for axis 1 with size 11
Error occurred for ClassifierChain: __init__() missing 1 required positional argument: 'base_estimator'


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  probabilities /= normalizer
  probabilities /= normalizer
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Error occurred for MultiOutputClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for NuSVC: specified nu is infeasible
Error occurred for OneVsOneClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OneVsRestClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OutputCodeClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for RadiusNeighborsClassifier: No neighbors found for test samples array([   6,   12,   13,   19,   20,   21,   27,   28,   29,   30,   31,
         32,   33,   41,   43,   44,   45,   46,   55,   56,   57,   58,
         59,   60,   61,   66,   67,   68,   69,   70,   71,   79,   80,
         81,   82,   83,   84,   86,   88,   89,   90,   91,   92,   93,
         94,   95,   96,   97,  101,  102,  103,  104,  105,  106,  107,
        108,  109,  110,  115,  116,  117,  118,  120,  121,  122,  123,
        129, 

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/41 [00:00<?, ?it/s]

Error occurred for AdaBoostClassifier: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for BaggingClassifier: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for BernoulliNB: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for CalibratedClassifierCV: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)


Error occurred for CategoricalNB: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for ClassifierChain: __init__() missing 1 required positional argument: 'base_estimator'
Error occurred for ComplementNB: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for DecisionTreeClassifier: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for DummyClassifier: Found array with 0 sample(s) (shape=(0,)) while a minimum of 1 is required.
Error occurred for ExtraTreeClassifier: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for ExtraTreesClassifier: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for GaussianNB: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for GaussianProcessClassifier: Found array with 0 sample(s) (shape=(0, 19

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Error occurred for LogisticRegressionCV: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.




Error occurred for MLPClassifier: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for MultiOutputClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for MultinomialNB: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for NearestCentroid: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for NuSVC: specified nu is infeasible
Error occurred for OneVsOneClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OneVsRestClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OutputCodeClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for PassiveAggressiveClassifier: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for Perceptron: Found array with 0 sample(s) (shap

  0%|          | 0/41 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


Error occurred for CategoricalNB: index 87 is out of bounds for axis 1 with size 46
Error occurred for ClassifierChain: __init__() missing 1 required positional argument: 'base_estimator'


  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  probabilities /= normalizer
  probabilities /= normalizer
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_i

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

  _warn_prf(average, modifier, msg_start, len(result))


Error occurred for MultiOutputClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for NuSVC: specified nu is infeasible
Error occurred for OneVsOneClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OneVsRestClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OutputCodeClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for RadiusNeighborsClassifier: No neighbors found for test samples array([  7,  11,  15,  21,  25,  27,  28,  29,  39,  40,  41,  42,  43,
        47,  51,  52,  53,  54,  55,  58,  59,  63,  64,  65,  66,  70,
        71,  72,  73,  74,  75,  76,  82,  83,  84,  85,  86,  87,  88,
        90,  91,  92,  93,  94,  95,  96,  97,  98,  99, 102, 103, 104,
       105, 106, 107, 108, 109, 110, 112, 115, 116, 120, 121, 122, 123,
       124, 125, 126, 127, 128, 129, 130, 131, 133, 135, 137, 138, 139,
       140, 141, 14

  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/41 [00:00<?, ?it/s]

Error occurred for AdaBoostClassifier: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for BaggingClassifier: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for BernoulliNB: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for CalibratedClassifierCV: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)


Error occurred for CategoricalNB: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for ClassifierChain: __init__() missing 1 required positional argument: 'base_estimator'
Error occurred for ComplementNB: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for DecisionTreeClassifier: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for DummyClassifier: Found array with 0 sample(s) (shape=(0,)) while a minimum of 1 is required.
Error occurred for ExtraTreeClassifier: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for ExtraTreesClassifier: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for GaussianNB: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for GaussianProcessClassifier: Found array with 0 sample(s) (shape=(0, 19

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Error occurred for LogisticRegressionCV: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for MLPClassifier: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for MultiOutputClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for MultinomialNB: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for NearestCentroid: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for NuSVC: specified nu is infeasible
Error occurred for OneVsOneClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OneVsRestClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OutputCodeClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for PassiveAggressiveClassifier: Found array with 0 sampl

  0%|          | 0/41 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


Error occurred for CategoricalNB: index 47 is out of bounds for axis 1 with size 33
Error occurred for ClassifierChain: __init__() missing 1 required positional argument: 'base_estimator'


  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  probabilities /= normalizer
  probabilities /= normalizer
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_i

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Error occurred for MultiOutputClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for NuSVC: specified nu is infeasible
Error occurred for OneVsOneClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OneVsRestClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OutputCodeClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for RadiusNeighborsClassifier: No neighbors found for test samples array([  0,   2,   3,   8,  11,  12,  14,  16,  17,  19,  21,  22,  24,
        26,  27,  28,  29,  30,  33,  35,  36,  37,  38,  39,  40,  43,
        44,  45,  46,  47,  48,  49,  50,  54,  56,  57,  58,  59,  60,
        61,  62,  63,  64,  65,  66,  67,  68,  72,  73,  74,  75,  77,
        78,  79,  80,  81,  82,  83,  84,  86,  87,  89,  90,  91,  92,
        93,  94,  95,  97,  98,  99, 101, 102, 104, 105, 106, 107, 108,
       110, 111, 11

  _warn_prf(average, modifier, msg_start, len(result))


In [16]:
results_df

Unnamed: 0,Model,Accuracy,Precision,Recall,F1-score,AUC,Balanced Accuracy
23,PassiveAggressiveClassifier,0.918699,0.692404,0.692404,0.692404,0.692404,0.692404
18,LogisticRegression,0.916667,0.679212,0.664927,0.671674,0.664927,0.664927
2,BernoulliNB,0.847561,0.633567,0.81241,0.665443,0.81241,0.81241
5,DecisionTreeClassifier,0.918699,0.677897,0.639637,0.655727,0.639637,0.639637
11,GradientBoostingClassifier,0.922764,0.690739,0.628634,0.651895,0.628634,0.628634
21,MultinomialNB,0.894309,0.626664,0.652892,0.637969,0.652892,0.652892
26,RandomForestClassifier,0.934959,0.789076,0.595624,0.635116,0.595624,0.595624
1,BaggingClassifier,0.918699,0.667934,0.613254,0.633574,0.613254,0.613254
4,ComplementNB,0.880081,0.605113,0.645233,0.620127,0.645233,0.645233
8,ExtraTreesClassifier,0.934959,0.803313,0.582432,0.619342,0.582432,0.582432


In [17]:
import pandas as pd
import glob

# Get a list of all result files from different experiments
result_files = glob.glob("C:/Users/ASUS/OneDrive/DataScience/FINAL-PROJECT/data-science-Optimal-EV-station-placement-main/main_raw/data/results/*.csv")

# Create a dictionary to store the total AUC and count for each model
auc_sum_per_model = {}
count_per_model = {}

# Iterate over each result file
for file in result_files:
    print(file)
    # Load the results for each experiment
    results = pd.read_csv(file)
    
    # Iterate over each row in the results
    for _, row in results.iterrows():
        model = row['Model']
        auc = row['AUC']
        
        # Update the total AUC and count for the model
        if model in auc_sum_per_model:
            auc_sum_per_model[model] += auc
            count_per_model[model] += 1
        else:
            auc_sum_per_model[model] = auc
            count_per_model[model] = 1

# Calculate the average AUC for each model
average_auc_per_model = {model: auc_sum_per_model[model] / count_per_model[model] for model in auc_sum_per_model}

# Create a DataFrame from the average AUC dictionary
average_auc_df = pd.DataFrame(list(average_auc_per_model.items()), columns=['Model', 'Average AUC'])

# Sort the DataFrame by Average AUC in descending order
sorted_models = average_auc_df.sort_values(by='Average AUC', ascending=False)

# Select the top 5 models
top_5_models = sorted_models.head(5)

# Display the best models
print(top_5_models)


C:/Users/ASUS/OneDrive/DataScience/FINAL-PROJECT/data-science-Optimal-EV-station-placement-main/main_raw/data/results\all_cities_random_shuffle.csv
C:/Users/ASUS/OneDrive/DataScience/FINAL-PROJECT/data-science-Optimal-EV-station-placement-main/main_raw/data/results\big_cities_test_city_Berlin_.csv
C:/Users/ASUS/OneDrive/DataScience/FINAL-PROJECT/data-science-Optimal-EV-station-placement-main/main_raw/data/results\big_cities_test_city_Frankfurt_.csv
C:/Users/ASUS/OneDrive/DataScience/FINAL-PROJECT/data-science-Optimal-EV-station-placement-main/main_raw/data/results\big_cities_test_city_Munich_.csv
C:/Users/ASUS/OneDrive/DataScience/FINAL-PROJECT/data-science-Optimal-EV-station-placement-main/main_raw/data/results\big_cities_test_city_Stuttgart_.csv
C:/Users/ASUS/OneDrive/DataScience/FINAL-PROJECT/data-science-Optimal-EV-station-placement-main/main_raw/data/results\small_cities_test_city_Karlsruhe_.csv
C:/Users/ASUS/OneDrive/DataScience/FINAL-PROJECT/data-science-Optimal-EV-station-place

In [18]:
top_5_models

Unnamed: 0,Model,Average AUC
11,BernoulliNB,0.779203
14,NearestCentroid,0.766557
9,ComplementNB,0.671147
8,MultinomialNB,0.662987
2,GradientBoostingClassifier,0.652306


In [21]:
import pandas as pd
import glob

# Get a list of all result files from different experiments
result_files = glob.glob("C:/Users/ASUS/OneDrive/DataScience/FINAL-PROJECT/data-science-Optimal-EV-station-placement-main/main_raw/data/results/*.csv")

# Create an empty DataFrame to store the combined results
combined_results = pd.DataFrame()

# Create an empty DataFrame to store the summary
summary_results = pd.DataFrame(columns=['type_city'])

# Iterate over each result file
for type_city in ['big', 'small', 'all']:
    # Reset combined_results for each type_city iteration
    combined_results = pd.DataFrame()

    # Iterate over each result file
    for file in result_files:
        # Load the results for each experiment
        if type_city in file:
            results = pd.read_csv(file)
            
            # Append the results to the combined DataFrame
            combined_results = combined_results.append(results)

    # Calculate the average AUC for each model
    average_auc_per_model = combined_results.groupby('Model')['AUC'].mean()
    
    # Sort the models by average AUC in descending order
    sorted_models = average_auc_per_model.sort_values(ascending=False)
    
    # Filter the results to include only the rows corresponding to the top 5 models
    filtered_results = combined_results[combined_results['Model'].isin(top_5_models)]

    # Calculate the average AUC for each model
    average_auc_by_model = filtered_results.groupby('Model')['AUC'].mean()
    
    # Create a row with type_city and average AUC values for each model
    row = {'type_city': type_city}
    row.update(average_auc_by_model)
    
    # Append the row to the summary_results DataFrame
    summary_results = summary_results.append(row, ignore_index=True)

# Display the summary_results DataFrame
print(summary_results)


  type_city
0       big
1     small
2       all


  combined_results = combined_results.append(results)
  combined_results = combined_results.append(results)
  combined_results = combined_results.append(results)
  combined_results = combined_results.append(results)
  summary_results = summary_results.append(row, ignore_index=True)
  combined_results = combined_results.append(results)
  combined_results = combined_results.append(results)
  combined_results = combined_results.append(results)
  combined_results = combined_results.append(results)
  summary_results = summary_results.append(row, ignore_index=True)
  combined_results = combined_results.append(results)
  combined_results = combined_results.append(results)
  combined_results = combined_results.append(results)
  combined_results = combined_results.append(results)
  combined_results = combined_results.append(results)
  summary_results = summary_results.append(row, ignore_index=True)


In [23]:
summary_results

Unnamed: 0,type_city
0,big
1,small
2,all


In [25]:
import pandas as pd
import glob

# Get a list of all result files from different experiments
result_files = glob.glob("C:/Users/ASUS/OneDrive/DataScience/FINAL-PROJECT/data-science-Optimal-EV-station-placement-main/main_raw/data/results/*.csv")

# Create an empty DataFrame to store the combined results
combined_results = pd.DataFrame()

# Create an empty DataFrame to store the summary
summary_results = pd.DataFrame(columns=['type_city', 'AUC', 'Accuracy', 'Precision', 'Recall'])

# Iterate over each result file
for type_city in ['big', 'small', 'all']:
    # Reset combined_results for each type_city iteration
    combined_results = pd.DataFrame()

    # Iterate over each result file
    for file in result_files:
        # Load the results for each experiment
        if type_city in file:
            results = pd.read_csv(file)
            
            # Append the results to the combined DataFrame
            combined_results = combined_results.append(results)

    # Filter the results to include only the rows corresponding to the top 5 models
    filtered_results = combined_results[combined_results['Model'].isin(top_5_models)]

    # Calculate the average values for each metric
    average_metrics_per_model = filtered_results.groupby('Model')['AUC', 'Accuracy', 'Precision', 'Recall'].mean()

    # Calculate the average values for each metric
    average_values = average_metrics_per_model.mean()

    # Create a row with type_city, average values for each metric
    row = {'type_city': type_city}
    for metric in ['AUC', 'Accuracy', 'Precision', 'Recall']:
        row[metric] = average_values[metric]

    # Append the row to the summary_results DataFrame
    summary_results = summary_results.append(row, ignore_index=True)

# Display the summary_results DataFrame
summary_results


  combined_results = combined_results.append(results)
  combined_results = combined_results.append(results)
  combined_results = combined_results.append(results)
  combined_results = combined_results.append(results)
  average_metrics_per_model = filtered_results.groupby('Model')['AUC', 'Accuracy', 'Precision', 'Recall'].mean()
  summary_results = summary_results.append(row, ignore_index=True)
  combined_results = combined_results.append(results)
  combined_results = combined_results.append(results)
  combined_results = combined_results.append(results)
  combined_results = combined_results.append(results)
  average_metrics_per_model = filtered_results.groupby('Model')['AUC', 'Accuracy', 'Precision', 'Recall'].mean()
  summary_results = summary_results.append(row, ignore_index=True)
  combined_results = combined_results.append(results)
  combined_results = combined_results.append(results)
  combined_results = combined_results.append(results)
  combined_results = combined_results.append(r

Unnamed: 0,type_city,AUC,Accuracy,Precision,Recall
0,big,,,,
1,small,,,,
2,all,,,,


In [26]:
top_5_models

Unnamed: 0,Model,Average AUC
11,BernoulliNB,0.779203
14,NearestCentroid,0.766557
9,ComplementNB,0.671147
8,MultinomialNB,0.662987
2,GradientBoostingClassifier,0.652306


In [27]:
summary_results.to_csv("C:/Users/ASUS/OneDrive/DataScience/FINAL-PROJECT/data-science-Optimal-EV-station-placement-main/main_raw/data/results/summary_results.csv", index=False)