In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN, SpectralClustering
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davie

In [None]:
file_path = 'F1Drivers_Dataset.csv' 
f1_drivers_dataset = pd.read_csv(file_path)

In [None]:
numerical_columns = f1_drivers_dataset.select_dtypes(include=['float64', 'int64']).columns
data_for_clustering = f1_drivers_dataset[numerical_columns]
data_for_clustering.fillna(data_for_clustering.mean(), inplace=True)

In [None]:
scaler = StandardScaler()
scaled_data = scaler.fit_transform(data_for_clustering)

In [None]:
pca = PCA(n_components=2)  # Reduce to 2 dimensions for visualization purposes
pca_data = pca.fit_transform(scaled_data)

# Applying K-Means Clustering
kmeans = KMeans(n_clusters=3)  # Assume 3 clusters for simplicity
kmeans_labels = kmeans.fit_predict(pca_data)

# Applying Hierarchical Clustering
hierarchical = AgglomerativeClustering(n_clusters=3)
hierarchical_labels = hierarchical.fit_predict(pca_data)

# Applying DBSCAN
dbscan = DBSCAN(eps=0.5, min_samples=5)  # These parameters can be tuned
dbscan_labels = dbscan.fit_predict(pca_data)

# Applying Spectral Clustering
spectral = SpectralClustering(n_clusters=3, affinity='nearest_neighbors')
spectral_labels = spectral.fit_predict(pca_data)

In [None]:
silhouette_kmeans = silhouette_score(pca_data, kmeans_labels)
calinski_kmeans = calinski_harabasz_score(pca_data, kmeans_labels)
davies_kmeans = davies_bouldin_score(pca_data, kmeans_labels)

# For Hierarchical Clustering
silhouette_hierarchical = silhouette_score(pca_data, hierarchical_labels)
calinski_hierarchical = calinski_harabasz_score(pca_data, hierarchical_labels)
davies_hierarchical = davies_bouldin_score(pca_data, hierarchical_labels)

# For DBSCAN
silhouette_dbscan = silhouette_score(pca_data, dbscan_labels)
calinski_dbscan = calinski_harabasz_score(pca_data, dbscan_labels)
davies_dbscan = davies_bouldin_score(pca_data, dbscan_labels)

# For Spectral Clustering
silhouette_spectral = silhouette_score(pca_data, spectral_labels)
calinski_spectral = calinski_harabasz_score(pca_data, spectral_labels)
davies_spectral = davies_bouldin_score(pca_data, spectral_labels)
evaluation_metrics = {
    "Algorithm": ["K-Means", "Hierarchical", "DBSCAN", "Spectral"],
    "Silhouette Score": [silhouette_kmeans, silhouette_hierarchical, silhouette_dbscan, silhouette_spectral],
    "Calinski-Harabasz Score": [calinski_kmeans, calinski_hierarchical, calinski_dbscan, calinski_spectral],
    "Davies-Bouldin Score": [davies_kmeans, davies_hierarchical, davies_dbscan, davies_spectral]
}

evaluation_results = pd.DataFrame(evaluation_metrics)
print(evaluation_results)