In [40]:
# Import the modules
import numpy as np
import pandas as pd
import hvplot.pandas
from pathlib import Path
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.cluster import KMeans

import matplotlib.pyplot as plt
%matplotlib inline

---

In [41]:
# Read the CSV file from the Resources folder into a Pandas DataFrame
#file_path = Path("CTG3.csv")

file_path = Path("data_normal_path.csv")
#file_path = Path("data_normal_path.csv")
df = pd.read_csv(file_path)

# Review the DataFrame
df.head()

Unnamed: 0,LB,AC,FM,UC,DL,DS,DP,ASTV,MSTV,ALTV,MLTV,NSP
0,120,0,0,0,0,0,0,73,0.5,43,2.4,1
1,132,4,0,4,2,0,0,17,2.1,0,10.4,0
2,133,2,0,5,2,0,0,16,2.1,0,13.4,0
3,134,2,0,6,2,0,0,16,2.4,0,23.0,0
4,132,4,0,5,0,0,0,16,2.4,0,19.9,0


In [42]:
inertia = []

k = list(range(1, 6))

for i in k:
    k_model = KMeans(n_clusters=i, random_state=0)
    k_model.fit(df)
    inertia.append(k_model.inertia_)



In [43]:
# Define a DataFrame to hold the values for k and the corresponding inertia
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)

# Review the DataFrame
df_elbow.head()

Unnamed: 0,k,inertia
0,1,4608216.0
1,2,2331054.0
2,3,1618039.0
3,4,1261790.0
4,5,958012.3


In [44]:
# Plot the DataFrame
df_elbow.hvplot.line(
    x="k", 
    y="inertia", 
    title="Elbow Curve", 
    xticks=k
)

In [45]:
# Create and initialise the K-means model instance for 2 clusters
model = KMeans(n_clusters=3, random_state=1)

# Print the model
model

In [46]:
model.fit(df)



In [47]:
predictions = model.predict(df)

In [48]:
# Create a copy of the DataFrame
predictions_df = df.copy()

# Add a column to the DataFrame that contains the customer_ratings information
predictions_df['prediction'] = predictions

# Review the DataFrame
predictions_df.head()

Unnamed: 0,LB,AC,FM,UC,DL,DS,DP,ASTV,MSTV,ALTV,MLTV,NSP,prediction
0,120,0,0,0,0,0,0,73,0.5,43,2.4,1,2
1,132,4,0,4,2,0,0,17,2.1,0,10.4,0,0
2,133,2,0,5,2,0,0,16,2.1,0,13.4,0,0
3,134,2,0,6,2,0,0,16,2.4,0,23.0,0,0
4,132,4,0,5,0,0,0,16,2.4,0,19.9,0,0


In [49]:
# Plot the data points based on the prediction
predictions_df.hvplot.scatter(
    x="LB", 
    y="MSTV", 
    by="NSP"
)

In [50]:
# Plot the data points based on the prediction
predictions_df.hvplot.scatter(
    x="LB", 
    y="MSTV", 
    by="prediction"
)

In [51]:
predictions_df.hvplot.scatter(
    x="ASTV", 
    y="MSTV", 
    by="NSP"
)

In [52]:
predictions_df.hvplot.scatter(
    x="ASTV", 
    y="MSTV", 
    by="prediction"
)