In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from tqdm.notebook import tqdm
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, balanced_accuracy_score
from sklearn.utils import all_estimators
import glob

In [2]:
# Function to split data into train and test sets
def data_splitter(data, train_cities=None, test_cities=None, test_size=0.2, random_state=42):
    if train_cities is not None:
        train = data[data['city'].isin(train_cities)]
        test = data[data['city'].isin(test_cities)]

        X_train = train.drop(['city','geometry', 'EV_stations'], axis=1)
        y_train = train['EV_stations'].astype(int)
        y_train = y_train.apply(lambda x: 1 if x > 0 else 0)

        X_test = test.drop(['city','geometry', 'EV_stations'], axis=1)
        y_test = test['EV_stations'].astype(int)
        y_test = y_test.apply(lambda x: 1 if x > 0 else 0)
    else:
        X = data.drop(['city','geometry', "EV_stations"], axis=1)
        y = data['EV_stations']
        y = y.apply(lambda x: 1 if x > 0 else 0)
        X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=test_size, random_state=random_state)

    return X_train, X_test, y_train, y_test

In [3]:
# Function to run the experiment for a given city
def run_experiment(X_train, X_test, y_train, y_test):
    # Get all classification model classes
    classifiers = all_estimators(type_filter='classifier')

    # Initialize result table
    results = []
    models = {}
    classification_reports = {}

    # Run models and collect results
    for name, ClassifierClass in tqdm(classifiers):
        try:
            # Initialize model
            model = ClassifierClass()
            model.fit(X_train, y_train)
            models[name] = model
            y_pred = model.predict(X_test)

            # Calculate metrics
            accuracy = accuracy_score(y_test, y_pred)
            precision = precision_score(y_test, y_pred, average='macro')
            recall = recall_score(y_test, y_pred, average='macro')
            f1 = f1_score(y_test, y_pred, average='macro')
            auc = roc_auc_score(y_test, y_pred)
            balanced_accuracy = balanced_accuracy_score(y_test, y_pred)

            # Append results
            results.append([name, accuracy, precision, recall, f1, auc, balanced_accuracy])

            # Save classification report
            report_dict = classification_report(y_test, y_pred, output_dict=True)
            classification_reports[name] = report_dict

        except Exception as e:
            print(f"Error occurred for {name}: {str(e)}")

    # Create a DataFrame from results
    results_df = pd.DataFrame(results, columns=["Model", "Accuracy", "Precision", "Recall", "F1-score", "AUC", "Balanced Accuracy"])
    results_df = results_df.sort_values(by=['F1-score', 'AUC'], ascending=False)
    results_df.to_csv("C:/Users/ASUS/OneDrive/DataScience/FINAL-PROJECT/data-science-Optimal-EV-station-placement-main/main_raw/data/raw_results/all_cities_random_shuffle.csv", index=False)

    # Save classification reports to CSV
    classification_reports_df = pd.DataFrame.from_dict(classification_reports, orient='index')
    classification_reports_df.to_csv("C:/Users/ASUS/OneDrive/DataScience/FINAL-PROJECT/data-science-Optimal-EV-station-placement-main/main_raw/data/raw_results/classification_reports.csv")

    return results_df, models

In [4]:
data = pd.read_csv("C:/Users/ASUS/OneDrive/DataScience/FINAL-PROJECT/data-science-Optimal-EV-station-placement-main/main_raw/data/processed/all_city_data_with_pop.csv")

In [5]:
# Filtering columns to be used for modeling
data = data[['geometry', 'city', 'EV_stations', 'parking', 'edges',
             'parking_space', 'civic', 'restaurant', 'park', 'school',
             'node', 'Community_centre', 'place_of_worship', 'university', 'cinema',
             'library', 'commercial', 'retail', 'townhall', 'government',
             'residential', 'population']]
print("data size:", data.shape)
data = data.dropna()
print("data size after dropping na:", data.shape)


data size: (10824, 22)
data size after dropping na: (10129, 22)


In [6]:
# Define big and small cities
big_cities = ['Berlin', 'Munich', 'Stuttgart', 'Frankfurt']
small_cities = ['Karlsruhe', 'Trier', 'Saarbrücken', 'Mainz']

In [7]:
# Run experiments for big cities
for city in tqdm(big_cities):
    test_cities = [city]
    train_cities = [x for x in big_cities if x != city]
    X_train, X_test, y_train, y_test = data_splitter(data, train_cities=train_cities, test_cities=test_cities)
    results_df, models = run_experiment(X_train, X_test, y_train, y_test)
    results_df.to_csv(f"C:/Users/ASUS/OneDrive/DataScience/FINAL-PROJECT/data-science-Optimal-EV-station-placement-main/main_raw/data/raw_results/big_cities_test_city_{city}.csv", index=False)

# Run experiments for small cities
for city in tqdm(small_cities):
    test_cities = [city]
    train_cities = [x for x in small_cities if x != city]
    X_train, X_test, y_train, y_test = data_splitter(data, train_cities=train_cities, test_cities=test_cities)
    results_df, models = run_experiment(X_train, X_test, y_train, y_test)
    results_df.to_csv(f"C:/Users/ASUS/OneDrive/DataScience/FINAL-PROJECT/data-science-Optimal-EV-station-placement-main/main_raw/data/raw_results/small_cities_test_city_{city}.csv", index=False)

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/41 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Error occurred for CategoricalNB: index 51 is out of bounds for axis 1 with size 51
Error occurred for ClassifierChain: __init__() missing 1 required positional argument: 'base_estimator'


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  probabilities /= normalizer
  probabilities /= normalizer
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Error occurred for MultiOutputClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for NuSVC: specified nu is infeasible
Error occurred for OneVsOneClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OneVsRestClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OutputCodeClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for RadiusNeighborsClassifier: No neighbors found for test samples array([   2,    9,   17, ..., 3877, 3878, 3879], dtype=int64), you can try using larger radius, giving a label for outliers, or considering removing them from your dataset.
Error occurred for StackingClassifier: __init__() missing 1 required positional argument: 'estimators'
Error occurred for VotingClassifier: __init__() missing 1 required positional argument: 'estimators'


  0%|          | 0/41 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Error occurred for CategoricalNB: index 53 is out of bounds for axis 1 with size 48
Error occurred for ClassifierChain: __init__() missing 1 required positional argument: 'base_estimator'


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  probabilities /= normalizer
  probabilities /= normalizer
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Error occurred for MultiOutputClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for NuSVC: specified nu is infeasible
Error occurred for OneVsOneClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OneVsRestClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OutputCodeClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for RadiusNeighborsClassifier: No neighbors found for test samples array([  15,   17,   22, ..., 1409, 1410, 1420], dtype=int64), you can try using larger radius, giving a label for outliers, or considering removing them from your dataset.
Error occurred for StackingClassifier: __init__() missing 1 required positional argument: 'estimators'
Error occurred for VotingClassifier: __init__() missing 1 required positional argument: 'estimators'


  0%|          | 0/41 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Error occurred for CategoricalNB: index 13 is out of bounds for axis 1 with size 10
Error occurred for ClassifierChain: __init__() missing 1 required positional argument: 'base_estimator'


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  probabilities /= normalizer
  probabilities /= normalizer
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modu

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Error occurred for MultiOutputClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for NuSVC: specified nu is infeasible
Error occurred for OneVsOneClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OneVsRestClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OutputCodeClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for RadiusNeighborsClassifier: No neighbors found for test samples array([ 11,  20,  21,  23,  27,  30,  31,  32,  39,  40,  41,  42,  43,
        44,  45,  46,  49,  50,  51,  52,  54,  55,  56,  57,  58,  59,
        64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  77,
        79,  80,  81,  82,  84,  85,  86,  95,  98,  99, 100, 101, 102,
       103, 104, 106, 107, 108, 109, 110, 111, 112, 119, 122, 123, 124,
       125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137,
       138, 139, 14

  0%|          | 0/41 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Error occurred for CategoricalNB: index 12 is out of bounds for axis 1 with size 11
Error occurred for ClassifierChain: __init__() missing 1 required positional argument: 'base_estimator'


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  probabilities /= normalizer
  probabilities /= normalizer
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Error occurred for MultiOutputClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for NuSVC: specified nu is infeasible
Error occurred for OneVsOneClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OneVsRestClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OutputCodeClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for RadiusNeighborsClassifier: No neighbors found for test samples array([   6,   12,   13,   19,   20,   21,   27,   28,   29,   30,   31,
         32,   33,   41,   43,   44,   45,   46,   55,   56,   57,   58,
         59,   60,   61,   66,   67,   68,   69,   70,   71,   79,   80,
         81,   82,   83,   84,   86,   88,   89,   90,   91,   92,   93,
         94,   95,   96,   97,  101,  102,  103,  104,  105,  106,  107,
        108,  109,  110,  115,  116,  117,  118,  120,  121,  122,  123,
        129, 

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/41 [00:00<?, ?it/s]

Error occurred for AdaBoostClassifier: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for BaggingClassifier: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for BernoulliNB: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for CalibratedClassifierCV: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)


Error occurred for CategoricalNB: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for ClassifierChain: __init__() missing 1 required positional argument: 'base_estimator'
Error occurred for ComplementNB: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for DecisionTreeClassifier: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for DummyClassifier: Found array with 0 sample(s) (shape=(0,)) while a minimum of 1 is required.
Error occurred for ExtraTreeClassifier: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for ExtraTreesClassifier: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for GaussianNB: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for GaussianProcessClassifier: Found array with 0 sample(s) (shape=(0, 19

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Error occurred for LogisticRegressionCV: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for MLPClassifier: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for MultiOutputClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for MultinomialNB: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for NearestCentroid: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for NuSVC: specified nu is infeasible
Error occurred for OneVsOneClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OneVsRestClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OutputCodeClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for PassiveAggressiveClassifier: Found array with 0 sampl

  0%|          | 0/41 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Error occurred for CategoricalNB: index 87 is out of bounds for axis 1 with size 46
Error occurred for ClassifierChain: __init__() missing 1 required positional argument: 'base_estimator'


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  probabilities /= normalizer
  probabilities /= normalizer
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modu

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Error occurred for MultiOutputClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for NuSVC: specified nu is infeasible
Error occurred for OneVsOneClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OneVsRestClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OutputCodeClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for RadiusNeighborsClassifier: No neighbors found for test samples array([  7,  11,  15,  21,  25,  27,  28,  29,  39,  40,  41,  42,  43,
        47,  51,  52,  53,  54,  55,  58,  59,  63,  64,  65,  66,  70,
        71,  72,  73,  74,  75,  76,  82,  83,  84,  85,  86,  87,  88,
        90,  91,  92,  93,  94,  95,  96,  97,  98,  99, 102, 103, 104,
       105, 106, 107, 108, 109, 110, 112, 115, 116, 120, 121, 122, 123,
       124, 125, 126, 127, 128, 129, 130, 131, 133, 135, 137, 138, 139,
       140, 141, 14

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  0%|          | 0/41 [00:00<?, ?it/s]

Error occurred for AdaBoostClassifier: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for BaggingClassifier: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for BernoulliNB: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for CalibratedClassifierCV: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for CategoricalNB: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for ClassifierChain: __init__() missing 1 required positional argument: 'base_estimator'
Error occurred for ComplementNB: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for DecisionTreeClassifier: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for DummyClassifier: Found array with 0 sample(s) (shape=(0,)) whi

  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)


Error occurred for ExtraTreesClassifier: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for GaussianNB: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for GaussianProcessClassifier: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for GradientBoostingClassifier: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for HistGradientBoostingClassifier: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for KNeighborsClassifier: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for LabelPropagation: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for LabelSpreading: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for LinearDiscriminantAn

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Error occurred for LogisticRegressionCV: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.




Error occurred for MLPClassifier: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for MultiOutputClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for MultinomialNB: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for NearestCentroid: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for NuSVC: specified nu is infeasible
Error occurred for OneVsOneClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OneVsRestClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OutputCodeClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for PassiveAggressiveClassifier: Found array with 0 sample(s) (shape=(0, 19)) while a minimum of 1 is required.
Error occurred for Perceptron: Found array with 0 sample(s) (shap

  0%|          | 0/41 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Error occurred for CategoricalNB: index 47 is out of bounds for axis 1 with size 33
Error occurred for ClassifierChain: __init__() missing 1 required positional argument: 'base_estimator'


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  probabilities /= normalizer
  probabilities /= normalizer
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modu

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Error occurred for MultiOutputClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for NuSVC: specified nu is infeasible
Error occurred for OneVsOneClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OneVsRestClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OutputCodeClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for RadiusNeighborsClassifier: No neighbors found for test samples array([  0,   2,   3,   8,  11,  12,  14,  16,  17,  19,  21,  22,  24,
        26,  27,  28,  29,  30,  33,  35,  36,  37,  38,  39,  40,  43,
        44,  45,  46,  47,  48,  49,  50,  54,  56,  57,  58,  59,  60,
        61,  62,  63,  64,  65,  66,  67,  68,  72,  73,  74,  75,  77,
        78,  79,  80,  81,  82,  83,  84,  86,  87,  89,  90,  91,  92,
        93,  94,  95,  97,  98,  99, 101, 102, 104, 105, 106, 107, 108,
       110, 111, 11

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [8]:
# Run experiment for all cities
X_train, X_test, y_train, y_test = data_splitter(data)
results_df, models = run_experiment(X_train, X_test, y_train, y_test)
results_df.to_csv("C:/Users/ASUS/OneDrive/DataScience/FINAL-PROJECT/data-science-Optimal-EV-station-placement-main/main_raw/data/raw_results/all_cities_random_shuffle.csv", index=False)

# Combine results for summary
result_files = glob.glob("C:/Users/ASUS/OneDrive/DataScience/FINAL-PROJECT/data-science-Optimal-EV-station-placement-main/main_raw/data/raw_results/*.csv")
combined_results = pd.concat([pd.read_csv(file) for file in result_files], ignore_index=True)


  0%|          | 0/41 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Error occurred for CategoricalNB: index 7 is out of bounds for axis 1 with size 7
Error occurred for ClassifierChain: __init__() missing 1 required positional argument: 'base_estimator'


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
  probabilities /= normalizer
  probabilities /= normalizer
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Error occurred for MultiOutputClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for NuSVC: specified nu is infeasible
Error occurred for OneVsOneClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OneVsRestClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for OutputCodeClassifier: __init__() missing 1 required positional argument: 'estimator'
Error occurred for RadiusNeighborsClassifier: No neighbors found for test samples array([   0,    1,    2, ..., 2022, 2023, 2025], dtype=int64), you can try using larger radius, giving a label for outliers, or considering removing them from your dataset.
Error occurred for StackingClassifier: __init__() missing 1 required positional argument: 'estimators'
Error occurred for VotingClassifier: __init__() missing 1 required positional argument: 'estimators'


In [11]:
results_df

Unnamed: 0,Model,Accuracy,Precision,Recall,F1-score,AUC,Balanced Accuracy
12,HistGradientBoostingClassifier,0.900296,0.767657,0.705403,0.73058,0.705403,0.705403
0,AdaBoostClassifier,0.900296,0.768031,0.7036,0.729434,0.7036,0.7036
11,GradientBoostingClassifier,0.899309,0.766435,0.694023,0.722009,0.694023,0.694023
8,ExtraTreesClassifier,0.900296,0.773813,0.681959,0.714751,0.681959,0.681959
26,RandomForestClassifier,0.898322,0.765717,0.680839,0.711726,0.680839,0.680839
16,LinearDiscriminantAnalysis,0.894373,0.750447,0.680403,0.707106,0.680403,0.680403
17,LinearSVC,0.895854,0.756853,0.675833,0.705392,0.675833,0.675833
1,BaggingClassifier,0.89388,0.74941,0.674713,0.702476,0.674713,0.674713
9,GaussianNB,0.866239,0.687214,0.707727,0.696568,0.707727,0.707727
20,MLPClassifier,0.901283,0.792555,0.653665,0.693599,0.653665,0.653665


In [18]:
# Calculate average AUC per model
average_auc_per_model = combined_results.groupby('Model')['AUC'].mean()

# Sort the models by average AUC in descending order
sorted_models = average_auc_per_model.sort_values(ascending=False)

# Select the top 5 models
top_5_models = sorted_models.head(5)

# Create a DataFrame from the average AUC dictionary
average_auc_df = pd.DataFrame(list(average_auc_per_model.items()), columns=['Model', 'Average AUC'])

# Sort the DataFrame by Average AUC in descending order
sorted_models = average_auc_df.sort_values(by='Average AUC', ascending=False)

# Select the top 5 models
top_5_models = sorted_models.head(5)


In [13]:
top_5_models

Unnamed: 0,Model,Average AUC
2,BernoulliNB,0.779203
22,NearestCentroid,0.766557
4,ComplementNB,0.671147
21,MultinomialNB,0.662987
11,GradientBoostingClassifier,0.655604


In [10]:
# Save the top 5 models' predictions for each city separately
for city in tqdm(data['city'].unique()):
    X_city = data[data['city'] == city].copy()  # Include 'geometry' in X_city

    for model_name in top_5_models['Model']:
        model = models[model_name]
        predictions = model.predict(X_city.drop(['city', 'geometry', 'EV_stations'], axis=1))
        
        # Create a DataFrame with the specified format
        predictions_df = X_city.copy()  # Include all columns from X_city
        predictions_df['predicted_EV_stations'] = predictions

        # Save the DataFrame to CSV
        file_path = f"C:/Users/ASUS/OneDrive/DataScience/FINAL-PROJECT/data-science-Optimal-EV-station-placement-main/main_raw/data/raw_results/predictions/predictions_{model_name}_{city}.csv"
        predictions_df.to_csv(file_path, index=False)


  0%|          | 0/8 [00:00<?, ?it/s]