In [168]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import hvplot.pandas
from scipy import stats
import numpy as np

In [169]:
crypto_df = pd.read_csv('crypto_data.csv')

In [170]:
crypto_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1252 entries, 0 to 1251
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Unnamed: 0       1252 non-null   object 
 1   CoinName         1252 non-null   object 
 2   Algorithm        1252 non-null   object 
 3   IsTrading        1252 non-null   bool   
 4   ProofType        1252 non-null   object 
 5   TotalCoinsMined  744 non-null    float64
 6   TotalCoinSupply  1252 non-null   object 
dtypes: bool(1), float64(1), object(5)
memory usage: 60.0+ KB


In [171]:
crypto_data = crypto_df.dropna()

In [172]:
crypto_data.head()

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
2,404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
4,808,808,SHA-256,True,PoW/PoS,0.0,0
5,1337,EliteCoin,X13,True,PoW/PoS,29279420000.0,314159265359
7,BTC,Bitcoin,SHA-256,True,PoW,17927180.0,21000000


In [173]:
crypto_data = crypto_data[crypto_data['IsTrading'] == 1]
crypto_data = crypto_data[crypto_data['TotalCoinsMined'] != 0]

In [174]:
crypto_data = crypto_data.drop(columns=['IsTrading','Unnamed: 0'])

In [175]:
coins_name = pd.DataFrame(crypto_data['CoinName'].values, index=crypto_data.index, columns=['Coin Name'])

In [176]:
X = crypto_data.drop(columns = ['CoinName'])
X = pd.get_dummies(X)

In [177]:
X.head(2)

Unnamed: 0,TotalCoinsMined,Algorithm_1GB AES Pattern Search,Algorithm_536,Algorithm_Argon2d,Algorithm_BLAKE256,Algorithm_Blake,Algorithm_Blake2S,Algorithm_Blake2b,Algorithm_C11,Algorithm_Cloverhash,...,TotalCoinSupply_91388946,TotalCoinSupply_92000000000,TotalCoinSupply_9354000,TotalCoinSupply_9507271,TotalCoinSupply_9736000,TotalCoinSupply_98000000,TotalCoinSupply_98100000000,TotalCoinSupply_990000000000,TotalCoinSupply_999481516,TotalCoinSupply_9999999
0,41.99995,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1055185000.0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [178]:
X_scaled = StandardScaler().fit_transform(X)

In [179]:
pca = PCA(n_components=3)

In [180]:
X_pca = pca.fit_transform(X_scaled)

In [181]:
df_X_pca = pd.DataFrame(data=X_pca, columns = ['pc 1', 'pc 2', 'pc 3'])
df_X_pca.index = coins_name['Coin Name']

In [182]:
# df_X_pca = df_X_pca[(np.abs(stats.zscore(df_X_pca)) < .25).all(axis=1)]

In [183]:
df_X_pca

Unnamed: 0_level_0,pc 1,pc 2,pc 3
Coin Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
42 Coin,-0.169665,-0.114264,0.109692
404Coin,-0.151557,-0.109747,0.106406
EliteCoin,0.412157,0.028964,0.190020
Bitcoin,-0.201170,-0.145979,-0.171750
Ethereum,-0.218133,-0.222076,-0.089425
...,...,...,...
ZEPHYR,2.638324,-0.136887,0.299035
Gapcoin,-0.219046,-0.146799,0.035315
Beldex,-0.058894,-0.185918,-0.425497
Horizen,-0.274060,-0.139641,-0.185581


In [184]:
pca.explained_variance_ratio_

array([0.0083082 , 0.00792908, 0.00789197])

In [185]:
# Find the best value for K
inertia = []
k = list(range(1, 11))

# Calculate the inertia for the range of K values
for i in k:
    km = KMeans(n_clusters=i, random_state=0)
    km.fit(df_X_pca)
    inertia.append(km.inertia_)

# Create the elbow curve
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.hvplot.line(x="k", y="inertia", xticks=k, title="Elbow Curve")



In [188]:
# Initialize the K-means model
model = KMeans(n_clusters=8, random_state=0)

# Fit the model
model.fit(df_X_pca)

# Predict clusters
predictions = model.predict(df_X_pca)

# Add the predicted class columns
df_X_pca["class"] = model.labels_
df_X_pca.head()

Unnamed: 0_level_0,pc 1,pc 2,pc 3,class
Coin Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
42 Coin,-0.169665,-0.114264,0.109692,7
404Coin,-0.151557,-0.109747,0.106406,7
EliteCoin,0.412157,0.028964,0.19002,7
Bitcoin,-0.20117,-0.145979,-0.17175,0
Ethereum,-0.218133,-0.222076,-0.089425,0


In [189]:
df_X_pca.hvplot.scatter(
            x="pc 1",
            y="pc 2",
            hover_cols=["Coin Name"],
            by="class",
)