In [None]:
import pandas as pd
from pathlib import Path
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

In [None]:
# Loading the ufo data
file_path = Path("../Resources/ufo_data_cleaned4.csv")
df_ufo = pd.read_csv(file_path)
df_ufo.head()

In [None]:
# Standardize data with StandardScaler
ufo_scaled = StandardScaler().fit_transform(df_ufo)
print(ufo_scaled[0:5])

In [None]:
# Applying PCA to reduce dimensions from 4 to 2

# Initialize PCA model
pca = PCA(n_components=2)

# Get two principal components for the ufo data
ufo_pca = pca.fit_transform(ufo_scaled)

In [None]:
ufo_scaled.shape

In [None]:
ufo_pca.shape

In [None]:
# Transform PCA data to a Dataframe
df_ufo_pca = pd.DataFrame(
    data=ufo_pca, columns=["principal component 1", "principal component 2"]
)
df_ufo_pca.head()

In [None]:
# Explained variance
pca.explained_variance_ratio_

In [None]:
#Finding the best value for k
inertia = []
k = list(range(1,11))

# Calculate the inertia for the range of k values
for i in k:
    km = KMeans(n_clusters=i, random_state=0)
    km.fit(df_ufo_pca)
    inertia.append(km.inertia_)

# Creating the Elbow Curve
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)

plt.plot(df_elbow['k'], df_elbow['inertia'])
plt.xticks(list(range(13)))
plt.title('Elbow Curve')
plt.xlabel('Number of clusters')
plt.ylabel('Inertia')
plt.show()

In [None]:
# Predicting clusters with k=3

# Initialize the k-means model
model = KMeans(n_clusters=3, random_state=0)

# Fit the model
model.fit(df_ufo_pca)

# Predict clusters
predictions = model.predict(df_ufo_pca)

# Add the predicted class column to the dataframe
df_ufo_pca["class"] = model.labels_
df_ufo_pca.head()

In [None]:
# Visualize the clusters
plt.scatter(x=df_ufo_pca['principal component 1'], y=df_ufo_pca['principal component 2'], c=df_ufo_pca['class'])
plt.xlabel('Principal component 1')
plt.ylabel('Principal component 2')
plt.title('UFO clusters')
plt.show()