In [42]:
import pandas as pd


In [43]:
df = pd.read_csv("../Data_120294_2023-03-29_cleaned.csv")


In [44]:
mean_wl_time = df['wl_time'].mean()
print(f"Mean waiting list time: {mean_wl_time}")

df['wl_time'] = df['wl_time'].apply(lambda x: 0 if x < mean_wl_time else 1)
print(df['wl_time'].value_counts(normalize=True))

one_hot_cols = [col for col in df.columns if df[col].nunique() == 2]


Mean waiting list time: 192.52839426430694
0    0.716807
1    0.283193
Name: wl_time, dtype: float64


In [45]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler

X = df.drop(['wl_time'], axis=1)
y = df['wl_time']

columns_to_scale = [col for col in X.columns if col not in one_hot_cols]
# scale all columns except the one-hot encoded ones
scaler = StandardScaler()
X[columns_to_scale] = scaler.fit_transform(X[columns_to_scale])

log = LogisticRegression(max_iter=1000)
f1_scores = cross_val_score(log, X, y, cv=5, scoring='f1').mean()
f1_macro_scores = cross_val_score(log, X, y, cv=5, scoring='f1_macro').mean()

print(f"F1 score: {f1_scores}")
print(f"F1 macro score: {f1_macro_scores}")


F1 score: 0.4207234479936403
F1 macro score: 0.6316114297647631


In [46]:
from sklearn.model_selection import cross_val_score
from sklearn.feature_selection import RFECV
import numpy as np

# Perform k-fold cross-validation and compute the initial F1 score
scores = cross_val_score(log, X, y, cv=5, scoring='f1')
avg_score = np.mean(scores)
print("Initial F1 score: {:.4f}".format(avg_score))

selector = RFECV(estimator=log, cv=5, scoring='f1', verbose=1)
print(f"Num of features: {X.shape[1]}")

selector.fit(X, y)

print(f"Optimal number of features: {selector.n_features_}")
print(f"The mask of the selected features: {selector.support_}")
print(f"The ranking of the features: {selector.ranking_}")

Initial F1 score: 0.4207
Num of features: 82
Fitting estimator with 82 features.
Fitting estimator with 81 features.



KeyboardInterrupt

