In [None]:
import pandas as pd
from sklearn.cluster import KMeans, DBSCAN, SpectralClustering
from sklearn.mixture import GaussianMixture
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score, jaccard_score

In [None]:
data = pd.read_csv('/content/Crop_recommendation.csv')
data

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.717340,rice
...,...,...,...,...,...,...,...,...
2195,107,34,32,26.774637,66.413269,6.780064,177.774507,coffee
2196,99,15,27,27.417112,56.636362,6.086922,127.924610,coffee
2197,118,33,30,24.131797,67.225123,6.362608,173.322839,coffee
2198,117,32,34,26.272418,52.127394,6.758793,127.175293,coffee


In [None]:
# choose which columns to cluster
columns = data[['temperature','humidity','rainfall']]

In [None]:
# scale the data
scaler = StandardScaler()
scaled_data = scaler.fit_transform(columns)

In [None]:
# KMeans clustering
kmeans = KMeans(n_clusters=3, random_state=66)
data['cc_kmeans'] = kmeans.fit_predict(scaled_data)



In [None]:
# DBSCAN clustering
dbscan = DBSCAN(eps=0.5, min_samples=3)
data['cc_dbscan'] = dbscan.fit_predict(scaled_data)

In [None]:
# GMM clustering
gmm = GaussianMixture(n_components=3)
data['cc_gaussian']= gmm.fit_predict(scaled_data)

In [None]:
# Spectral clustering
spectral = SpectralClustering(n_clusters=3, random_state=66)
data['cc_spectral'] = spectral.fit_predict(scaled_data)

In [None]:
# evaluate using silhouette score
s_kmeans = silhouette_score(scaled_data, data['cc_kmeans'])
s_dbscan = silhouette_score(scaled_data, data['cc_dbscan'])
s_gaussian = silhouette_score(scaled_data, data['cc_gaussian'])
s_spectral = silhouette_score(scaled_data, data['cc_spectral'])


In [None]:
print("Clustering methods: KMeans, DBSCAN, Gaussian Mixture, Spectral")
print("Evaluated using Silhouette Score")
print(f"KMeans: {s_kmeans}")
print(f"DBSCAN: {s_dbscan}")
print(f"GMM: {s_gaussian}")
print(f"Spectral: {s_spectral}")

Clustering methods: KMeans, DBSCAN, Gaussian Mixture, Spectral
Evaluated using Silhouette Score
KMeans: 0.3375809171939447
DBSCAN: 0.17894064302275964
GMM: 0.33690817573484577
Spectral: 0.3644087090951758
