In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

In [None]:
df = pd.read_csv('netflix.csv')
df.head()

In [None]:
# Select numeric columns
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
ignore_cols = ['id', 'ID', 'index', 'Unnamed: 0']
feature_cols = [c for c in numeric_cols if all(x not in c for x in ignore_cols)]

print("Feature columns:", feature_cols)
X = df[feature_cols].dropna()

# Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
wcss = []
max_k = 10
for i in range(1, max_k + 1):
    kmeans = KMeans(n_clusters=i, init='k-means++', random_state=42, n_init=10)
    kmeans.fit(X_scaled)
    wcss.append(kmeans.inertia_)

plt.figure(figsize=(10, 5))
plt.plot(range(1, max_k + 1), wcss, marker='o')
plt.title('Elbow Method')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('WCSS')
plt.show()

In [None]:
k = 3  # Choose optimal k based on elbow plot
kmeans = KMeans(n_clusters=k, init='k-means++', random_state=42, n_init=10)
clusters = kmeans.fit_predict(X_scaled)

df_clustered = df.loc[X.index].copy()
df_clustered['Cluster'] = clusters
df_clustered.head()

In [None]:
# Example visualization using first two features
if len(feature_cols) >= 2:
    x_axis = feature_cols[0]
    y_axis = feature_cols[1]
    
    plt.figure(figsize=(10, 6))
    sns.scatterplot(data=df_clustered, x=x_axis, y=y_axis, hue='Cluster', palette='viridis', s=50)
    plt.title(f"Clusters: {x_axis} vs {y_axis}")
    plt.show()

In [None]:
# Cluster Profiles
cluster_means = df_clustered.groupby('Cluster')[feature_cols].mean()
cluster_means