In [None]:
import pandas as pd

# Load the dataset
crypto_df = pd.read_csv('crypto_market_data.csv')

# Display summary statistics
crypto_df.describe()


In [None]:
from sklearn.preprocessing import StandardScaler

# Select numeric columns for scaling
features = crypto_df.drop(columns=["coin_id"])
scaler = StandardScaler()
scaled_data = scaler.fit_transform(features)

# Create DataFrame with scaled data and set 'coin_id' as index
scaled_df = pd.DataFrame(scaled_data, index=crypto_df['coin_id'], columns=features.columns)

# Display the first 5 rows of the scaled DataFrame
scaled_df.head()


In [None]:
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

# Elbow method to find the best k
inertia = []
k_values = list(range(1, 12))

for k in k_values:
    kmeans = KMeans(n_clusters=k, random_state=0)
    kmeans.fit(scaled_df)
    inertia.append(kmeans.inertia_)

# Plot the elbow curve
plt.plot(k_values, inertia, marker='o')
plt.xlabel('Number of clusters (k)')
plt.ylabel('Inertia')
plt.title('Elbow Method for Optimal k')
plt.show()


In [None]:
# Assume the best k found was 4 (adjust according to the elbow plot)
k = 4
kmeans = KMeans(n_clusters=k, random_state=0)
clusters = kmeans.fit_predict(scaled_df)

# Add clusters to the scaled DataFrame
scaled_df['Cluster'] = clusters

# Visualize clusters
import hvplot.pandas

scaled_df.hvplot.scatter(x='price_change_percentage_24h', 
                         y='price_change_percentage_7d', 
                         by='Cluster', 
                         hover_cols=['coin_id'], 
                         title='Cryptocurrency Clusters (k=4)')


In [None]:
from sklearn.decomposition import PCA

# Reduce to 3 principal components
pca = PCA(n_components=3)
pca_data = pca.fit_transform(scaled_df.drop(columns=["Cluster"]))

# Explained variance
explained_variance = pca.explained_variance_ratio_
total_explained_variance = sum(explained_variance)

print(f'Total explained variance: {total_explained_variance}')

# Create DataFrame with PCA data
pca_df = pd.DataFrame(pca_data, index=scaled_df.index, columns=["PC1", "PC2", "PC3"])

# Display the first 5 rows
pca_df.head()


In [None]:
# Elbow method for PCA data
inertia_pca = []

for k in k_values:
    kmeans_pca = KMeans(n_clusters=k, random_state=0)
    kmeans_pca.fit(pca_df)
    inertia_pca.append(kmeans_pca.inertia_)

# Plot elbow curve for PCA data
plt.plot(k_values, inertia_pca, marker='o')
plt.xlabel('Number of clusters (k)')
plt.ylabel('Inertia')
plt.title('Elbow Method for Optimal k (PCA Data)')
plt.show()

# Assuming the best k is found (e.g., 3)
k_pca = 3
kmeans_pca = KMeans(n_clusters=k_pca, random_state=0)
clusters_pca = kmeans_pca.fit_predict(pca_df)

# Add clusters to PCA DataFrame
pca_df['Cluster'] = clusters_pca

# Plot clusters using hvPlot
pca_df.hvplot.scatter(x='PC1', 
                      y='PC2', 
                      by='Cluster', 
                      hover_cols=['coin_id'], 
                      title='Cryptocurrency Clusters with PCA (k=3)')
