In [71]:
from bs4 import BeautifulSoup
import requests
import re
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.mixture import GaussianMixture
from sklearn.cluster import DBSCAN
from sklearn.cluster import AgglomerativeClustering
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.metrics import calinski_harabasz_score
import matplotlib.pyplot as plt


In [72]:
#Raw Data
data = pd.read_csv('https://raw.githubusercontent.com/FelAmore/Data-Science/main/tsunami.csv')

In [73]:
#Selected Data that wants to be Extracted
selected_columns = data[['Longitude', 'Latitude', 'Magnitude']]

In [74]:
#Scaled Data
scaler = StandardScaler()
scaled_data = scaler.fit_transform(selected_columns)

In [75]:
#Initialize the K-means Clustering
kmeans = KMeans(n_clusters=3, random_state = 42)
data['Kmeans_Clustering']= kmeans.fit_predict(scaled_data)



In [87]:
#Initialize the GMM Clustering
gmm = GaussianMixture(n_components=3)
data['GMM_Clustering']= gmm.fit_predict(scaled_data)

In [88]:
#Initialize the DBSCAN Clustering
dbscan = DBSCAN(eps =0.5, min_samples= 3)
data['DBSCAN_Clustering']= dbscan.fit_predict(scaled_data)

In [89]:
# Initialize the Hierarchial Clustering
agg_cluster = AgglomerativeClustering(n_clusters=3)
data['Cluster_Hierarchical'] = agg_cluster.fit_predict(scaled_data)

In [96]:
#Evaluate the Clustering using Calinski-Harabasz Score
calinski_kmeans = calinski_harabasz_score(scaled_data, data['Kmeans_Clustering'])
calinski_dbscan = calinski_harabasz_score(scaled_data, data['DBSCAN_Clustering'])
calinski_hierarchical = calinski_harabasz_score(scaled_data, data['Cluster_Hierarchical'])
calinski_gmm = calinski_harabasz_score(scaled_data, data['GMM_Clustering'])

In [98]:
#Printing the Outputs
print(f"The Clustering Methods used is: K-means, DBSCAN, Hierarchial, GMM")
print(f"The Evaluation Methods used is: Calinski-Harabasz")

print(f" ")
print(f"K means     : {calinski_kmeans} ")
print(f"DBSCAN      : {calinski_dbscan} ")
print(f"Hierarchial : {calinski_hierarchical} ")
print(f"GMM         : {calinski_gmm} ")

The Clustering Methods used is: K-means, DBSCAN, Hierarchial, GMM
The Evaluation Methods used is: Calinski-Harabasz
 
K means     : 24.69038169431235 
DBSCAN      : 9.52082519960284 
Hierarchial : 23.509005747983036 
GMM         : 17.02123330660256 
