# Clustering Crypto

In [1]:
!pip install -U altair



In [2]:
# Initial imports
import requests
import pandas as pd
import matplotlib.pyplot as plt
import altair
import plotly.express as px
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

### Fetching Cryptocurrency Data

In [3]:
# Use the following endpoint to fetch json data
url = "https://min-api.cryptocompare.com/data/all/coinlist"

In [4]:
import json

# create a get enviroment 
response = requests.get(url)
# pulling the content 
content = response.content
data = json.loads(content)

# Create a DataFrame 
# HINT: You will need to use the 'Data' key from the json response, then transpose the DataFrame.
df = pd.DataFrame(data['Data'])
df = df.transpose()
df.columns

Index(['Id', 'Url', 'ImageUrl', 'ContentCreatedOn', 'Name', 'Symbol',
       'CoinName', 'FullName', 'Description', 'AssetTokenStatus', 'Algorithm',
       'ProofType', 'SortOrder', 'Sponsored', 'Taxonomy', 'Rating',
       'IsTrading', 'TotalCoinsMined', 'BlockNumber', 'NetHashesPerSecond',
       'BlockReward', 'BlockTime', 'AssetLaunchDate', 'MaxSupply',
       'MktCapPenalty', 'PlatformType', 'BuiltOn', 'SmartContractAddress',
       'DecimalPoints', 'Difficulty', 'IsUsedInDefi', 'IsUsedInNft'],
      dtype='object')

### Data Preprocessing

In [5]:
# Keep only necessary columns:
# 'CoinName','Algorithm','IsTrading','ProofType','TotalCoinsMined','TotalCoinSupply'
df1 = df[['CoinName','Algorithm','IsTrading','ProofType','TotalCoinsMined','MaxSupply']]
df1

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,MaxSupply
42,42 Coin,Scrypt,True,PoW/PoS,0,0
300,300 token,,True,,300,300
365,365Coin,X11,True,PoW/PoS,0,0
404,404Coin,Scrypt,True,PoW/PoS,0,0
433,433 Token,,False,,,
...,...,...,...,...,...,...
DORA,Dora Factory,,True,,10000000,-1
XSGD,XSGD,,True,,1.04076e+07,-1
TARA,Taraxa,,True,,10000000000,-1
WOA,Wrapped Origin Axie,,False,,248,-1


In [6]:
# Keep only cryptocurrencies that are trading

df1=df1[df1['IsTrading']==True]
df1

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,MaxSupply
42,42 Coin,Scrypt,True,PoW/PoS,0,0
300,300 token,,True,,300,300
365,365Coin,X11,True,PoW/PoS,0,0
404,404Coin,Scrypt,True,PoW/PoS,0,0
611,SixEleven,SHA-256,True,PoW,0,0
...,...,...,...,...,...,...
STR,Stater,,True,,100000000,-1
XYM,Symbol,,True,,,
DORA,Dora Factory,,True,,10000000,-1
XSGD,XSGD,,True,,1.04076e+07,-1


In [7]:
# Keep only cryptocurrencies with a working algorithm
df1 = df1[df1['Algorithm']!= 'N/A']
df1

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,MaxSupply
42,42 Coin,Scrypt,True,PoW/PoS,0,0
365,365Coin,X11,True,PoW/PoS,0,0
404,404Coin,Scrypt,True,PoW/PoS,0,0
611,SixEleven,SHA-256,True,PoW,0,0
808,808,SHA-256,True,PoW/PoS,0,0
...,...,...,...,...,...,...
BNB,Binance Coin,BEP-2,True,PoSA,1.70534e+08,1.70534e+08
AION,Aion,"Equihash210,9",True,PoW/PoS,492427074,-1
ACT,Achain,DPoS,True,DPoS,1000000000,0
RVN,Ravencoin,KAWPOW,True,,8.56498e+09,21000000000


In [8]:
# Remove the "IsTrading" column
df1.drop(columns=['IsTrading'])

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,MaxSupply
42,42 Coin,Scrypt,PoW/PoS,0,0
365,365Coin,X11,PoW/PoS,0,0
404,404Coin,Scrypt,PoW/PoS,0,0
611,SixEleven,SHA-256,PoW,0,0
808,808,SHA-256,PoW/PoS,0,0
...,...,...,...,...,...
BNB,Binance Coin,BEP-2,PoSA,1.70534e+08,1.70534e+08
AION,Aion,"Equihash210,9",PoW/PoS,492427074,-1
ACT,Achain,DPoS,DPoS,1000000000,0
RVN,Ravencoin,KAWPOW,,8.56498e+09,21000000000


In [9]:
# Remove rows with at least 1 null value
df1 = df1.dropna()
df1

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,MaxSupply
42,42 Coin,Scrypt,True,PoW/PoS,0,0
365,365Coin,X11,True,PoW/PoS,0,0
404,404Coin,Scrypt,True,PoW/PoS,0,0
611,SixEleven,SHA-256,True,PoW,0,0
808,808,SHA-256,True,PoW/PoS,0,0
...,...,...,...,...,...,...
BNB,Binance Coin,BEP-2,True,PoSA,1.70534e+08,1.70534e+08
AION,Aion,"Equihash210,9",True,PoW/PoS,492427074,-1
ACT,Achain,DPoS,True,DPoS,1000000000,0
RVN,Ravencoin,KAWPOW,True,,8.56498e+09,21000000000


In [10]:
# Remove rows with cryptocurrencies having no coins mined
df1 = df1[df['TotalCoinsMined']!= 0]
df1


Boolean Series key will be reindexed to match DataFrame index.



Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,MaxSupply
NVC,NovaCoin,Scrypt,True,PoW/PoS,3.18855e+06,-1
XCP,CounterParty,SHA-256,True,PoW,2.61511e+06,-1
NSR,NuShares,PoS,True,PoS,5.85537e+09,0
MONA,MonaCoin,Scrypt,True,PoW,8.10883e+07,-1
TRI,Triangles Coin,X13,True,PoW/PoS,175041,0
...,...,...,...,...,...,...
BNB,Binance Coin,BEP-2,True,PoSA,1.70534e+08,1.70534e+08
AION,Aion,"Equihash210,9",True,PoW/PoS,492427074,-1
ACT,Achain,DPoS,True,DPoS,1000000000,0
RVN,Ravencoin,KAWPOW,True,,8.56498e+09,21000000000


In [11]:
# Drop rows where there are 'N/A' text values
df1 = df1[df1 != 'N/A']
df1.dropna(inplace = True)
df1

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,MaxSupply
NVC,NovaCoin,Scrypt,True,PoW/PoS,3.18855e+06,-1
XCP,CounterParty,SHA-256,True,PoW,2.61511e+06,-1
NSR,NuShares,PoS,True,PoS,5.85537e+09,0
MONA,MonaCoin,Scrypt,True,PoW,8.10883e+07,-1
TRI,Triangles Coin,X13,True,PoW/PoS,175041,0
...,...,...,...,...,...,...
BTC,Bitcoin,SHA-256,True,PoW,18663637,2.1e+07
BNB,Binance Coin,BEP-2,True,PoSA,1.70534e+08,1.70534e+08
AION,Aion,"Equihash210,9",True,PoW/PoS,492427074,-1
ACT,Achain,DPoS,True,DPoS,1000000000,0


In [12]:
# Store the 'CoinName'column in its own DataFrame prior to dropping it from crypto_df
coin_df = pd.DataFrame(
    df1['CoinName'])
coin_df 

Unnamed: 0,CoinName
NVC,NovaCoin
XCP,CounterParty
NSR,NuShares
MONA,MonaCoin
TRI,Triangles Coin
...,...
BTC,Bitcoin
BNB,Binance Coin
AION,Aion
ACT,Achain


In [13]:
# Drop the 'CoinName' column since it's not going to be used on the clustering algorithm
df1.drop(columns = 'CoinName', inplace =True)
df1

Unnamed: 0,Algorithm,IsTrading,ProofType,TotalCoinsMined,MaxSupply
NVC,Scrypt,True,PoW/PoS,3.18855e+06,-1
XCP,SHA-256,True,PoW,2.61511e+06,-1
NSR,PoS,True,PoS,5.85537e+09,0
MONA,Scrypt,True,PoW,8.10883e+07,-1
TRI,X13,True,PoW/PoS,175041,0
...,...,...,...,...,...
BTC,SHA-256,True,PoW,18663637,2.1e+07
BNB,BEP-2,True,PoSA,1.70534e+08,1.70534e+08
AION,"Equihash210,9",True,PoW/PoS,492427074,-1
ACT,DPoS,True,DPoS,1000000000,0


In [14]:
# removing the Istrading column 
df1.drop(columns='IsTrading',inplace=True)

In [15]:
# finding out df1 index and saving it as a variable 
df1_index=df1.index
df1_index

Index(['NVC', 'XCP', 'NSR', 'MONA', 'TRI', 'EMC', 'SAFEX', 'CMTC', 'XSN',
       'CHAT',
       ...
       'BTS', 'BSV', 'BTG', 'BCD', 'BCH', 'BTC', 'BNB', 'AION', 'ACT', 'ETH'],
      dtype='object', length=117)

In [16]:
# Create dummy variables for text features
X = pd.get_dummies(data=df1,columns=['Algorithm','ProofType'])
X

Unnamed: 0,TotalCoinsMined,MaxSupply,Algorithm_BEP-2,Algorithm_BLAKE256,Algorithm_BMW512 / Echo512,Algorithm_Blake2B + SHA3,Algorithm_Blake2b,Algorithm_C31,Algorithm_CryptoNight,Algorithm_CryptoNight-Heavy,...,ProofType_PoW,ProofType_PoW/PoS,ProofType_PoW/PoSe,ProofType_PoW/nPoS,ProofType_Proof of Authority,ProofType_SPoS,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW,ProofType_mPoW
NVC,3.18855e+06,-1,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
XCP,2.61511e+06,-1,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
NSR,5.85537e+09,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
MONA,8.10883e+07,-1,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
TRI,175041,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
BTC,18663637,2.1e+07,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
BNB,1.70534e+08,1.70534e+08,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
AION,492427074,-1,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
ACT,1000000000,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [17]:
# Standardize data
from sklearn.preprocessing import StandardScaler
# initiating scaler & fitting the data 
scaler = StandardScaler().fit(X)
X=scaler.transform(X)
X = pd.DataFrame(X)
X

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,62,63,64,65,66,67,68,69,70,71
0,-0.176615,-0.137006,-0.092848,-0.131876,-0.092848,-0.092848,-0.131876,-0.131876,-0.131876,-0.092848,...,-1.008584,2.021622,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848
1,-0.176620,-0.137006,-0.092848,-0.131876,-0.092848,-0.092848,-0.131876,-0.131876,-0.131876,-0.092848,...,0.991489,-0.494652,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848
2,-0.131697,-0.137006,-0.092848,-0.131876,-0.092848,-0.092848,-0.131876,-0.131876,-0.131876,-0.092848,...,-1.008584,-0.494652,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848
3,-0.176017,-0.137006,-0.092848,-0.131876,-0.092848,-0.092848,-0.131876,-0.131876,-0.131876,-0.092848,...,0.991489,-0.494652,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848
4,-0.176639,-0.137006,-0.092848,-0.131876,-0.092848,-0.092848,-0.131876,-0.131876,-0.131876,-0.092848,...,-1.008584,2.021622,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
112,-0.176497,-0.136783,-0.092848,-0.131876,-0.092848,-0.092848,-0.131876,-0.131876,-0.131876,-0.092848,...,0.991489,-0.494652,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848
113,-0.175331,-0.135190,10.770330,-0.131876,-0.092848,-0.092848,-0.131876,-0.131876,-0.131876,-0.092848,...,-1.008584,-0.494652,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848
114,-0.172860,-0.137006,-0.092848,-0.131876,-0.092848,-0.092848,-0.131876,-0.131876,-0.131876,-0.092848,...,-1.008584,2.021622,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848
115,-0.168964,-0.137006,-0.092848,-0.131876,-0.092848,-0.092848,-0.131876,-0.131876,-0.131876,-0.092848,...,-1.008584,-0.494652,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848,-0.092848


### Reducing Dimensions Using PCA

In [18]:
# Use PCA to reduce dimensions to 3 principal components
pca = PCA(n_components = 3)
X_pca = pca.fit_transform(X)

In [19]:
# Create a DataFrame with the principal components data
pcs_df = pd.DataFrame(X_pca, columns =['PC 1','PC 2','PC 3'], index = df1_index)
pcs_df


Unnamed: 0,PC 1,PC 2,PC 3
NVC,-1.291200,-0.640821,-0.581734
XCP,1.674126,-0.218264,0.001178
NSR,-1.285683,0.068096,-0.498681
MONA,0.710928,-0.698143,-0.247388
TRI,-2.033220,-0.529232,-0.714372
...,...,...,...
BTC,1.674202,-0.218139,0.001193
BNB,-1.909194,-0.173635,2.341386
AION,-1.995914,-0.586731,-0.690056
ACT,-1.372312,3.299169,-1.891329


### Clustering Crytocurrencies Using K-Means

#### Find the Best Value for `k` Using the Elbow Curve

In [20]:
inertia = []
k = list(range(1, 11))

# Calculate the inertia for the range of k values
for i in k:
    km = KMeans(n_clusters=i, random_state=0)
    km.fit(pcs_df)
    inertia.append(km.inertia_)



Running K-Means with `k=<your best value for k here>`

In [21]:
# Initialize the K-Means model
model =KMeans(n_clusters=6,random_state =1)

# Fit the model
model.fit(pcs_df)

# Predict clusters
predictions = model.predict(pcs_df)


In [22]:
# Create a new DataFrame including predicted clusters and cryptocurrencies features
clustered_df = pd.concat([df1,pcs_df],axis =1)
clustered_df['Class']=predictions
clustered_df['CoinName']= coin_df
clustered_df

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,MaxSupply,PC 1,PC 2,PC 3,Class,CoinName
NVC,Scrypt,PoW/PoS,3.18855e+06,-1,-1.291200,-0.640821,-0.581734,3,NovaCoin
XCP,SHA-256,PoW,2.61511e+06,-1,1.674126,-0.218264,0.001178,1,CounterParty
NSR,PoS,PoS,5.85537e+09,0,-1.285683,0.068096,-0.498681,3,NuShares
MONA,Scrypt,PoW,8.10883e+07,-1,0.710928,-0.698143,-0.247388,1,MonaCoin
TRI,X13,PoW/PoS,175041,0,-2.033220,-0.529232,-0.714372,3,Triangles Coin
...,...,...,...,...,...,...,...,...,...
BTC,SHA-256,PoW,18663637,2.1e+07,1.674202,-0.218139,0.001193,1,Bitcoin
BNB,BEP-2,PoSA,1.70534e+08,1.70534e+08,-1.909194,-0.173635,2.341386,2,Binance Coin
AION,"Equihash210,9",PoW/PoS,492427074,-1,-1.995914,-0.586731,-0.690056,3,Aion
ACT,DPoS,DPoS,1000000000,0,-1.372312,3.299169,-1.891329,5,Achain


# Visualizing Results

#### 3D-Scatter with Clusters

In [23]:
altair.Chart(clustered_df).mark_circle(size=90).encode(
    x='PC 1',
    y='PC 2',
    color='CoinName',
    tooltip=["CoinName", "Algorithm", "TotalCoinsMined", "MaxSupply"]
).interactive()

#### Table of Tradable Cryptocurrencies

In [24]:
altair.Chart(clustered_df).mark_circle(size=90).encode(
    x='TotalCoinsMined',
    y='MaxSupply',
    color='Class',
    tooltip=["CoinName", "Algorithm", "TotalCoinsMined", "MaxSupply"]
).interactive()

In [25]:
# Print the total number of tradable cryptocurrencies
from pprint import pprint
pprint(clustered_df['MaxSupply'].to_dict())


{'AAC': -1,
 'ACT': 0,
 'ADA': 45000000000,
 'ADK': 0,
 'AEON': -1,
 'AION': -1,
 'AMB': -1,
 'AR': 66000000,
 'ARK': -1,
 'AUR': 21000000,
 'BCD': 210000000,
 'BCH': 20999999.9769,
 'BCHA': 20999999.9769,
 'BCN': 184470000000,
 'BDX': -1,
 'BEAM': 262800000,
 'BLK': 100000000,
 'BLOCK': -1,
 'BNB': 170533651.9,
 'BSV': 20999999.9769,
 'BTC': 20999999.9769,
 'BTCP': 22873588,
 'BTCVT': 21000000,
 'BTCZ': 21000000000,
 'BTG': 21000000,
 'BTH': 0,
 'BTM': 210000000,
 'BTS': 3600570502,
 'BTT': -1,
 'BURST': 2158812800,
 'CHAT': -1,
 'CKB': -1,
 'CLO': 6500000000,
 'CLOAK': -1,
 'CMTC': 0,
 'COVAL': -1,
 'DAPS': 70000000000,
 'DASH': 18900000,
 'DCR': 21000000,
 'DGB': 21000000000,
 'DOGE': -1,
 'EMC': -1,
 'EMC2': -1,
 'EOS': -1,
 'ETC': 210700000,
 'ETH': -1,
 'ETZ': -1,
 'EXP': 100000000,
 'FIRO': 21400000,
 'FTC': 336000000,
 'GRIN': -1,
 'GRS': 105000000,
 'HC': 84000000,
 'HNS': 2040000000,
 'ICX': -1,
 'INT': 1000000000,
 'IOC': 22000000,
 'KCASH': -1,
 'KMD': 200000000,
 'LSK': -1

In [26]:
#this is the total tradable coins
clustered_df['CoinName'].count()

117

#### Scatter Plot with Tradable Cryptocurrencies

In [27]:
# Scale data to create the scatter plot
clustered_df['MaxSupply'] = clustered_df['MaxSupply'].astype(float)/100000000
clustered_df['TotalCoinsMined'] = clustered_df['TotalCoinsMined'].astype(float)/100000000
display(clustered_df)

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,MaxSupply,PC 1,PC 2,PC 3,Class,CoinName
NVC,Scrypt,PoW/PoS,0.031886,-1.000000e-08,-1.291200,-0.640821,-0.581734,3,NovaCoin
XCP,SHA-256,PoW,0.026151,-1.000000e-08,1.674126,-0.218264,0.001178,1,CounterParty
NSR,PoS,PoS,58.553658,0.000000e+00,-1.285683,0.068096,-0.498681,3,NuShares
MONA,Scrypt,PoW,0.810883,-1.000000e-08,0.710928,-0.698143,-0.247388,1,MonaCoin
TRI,X13,PoW/PoS,0.001750,0.000000e+00,-2.033220,-0.529232,-0.714372,3,Triangles Coin
...,...,...,...,...,...,...,...,...,...
BTC,SHA-256,PoW,0.186636,2.100000e-01,1.674202,-0.218139,0.001193,1,Bitcoin
BNB,BEP-2,PoSA,1.705337,1.705337e+00,-1.909194,-0.173635,2.341386,2,Binance Coin
AION,"Equihash210,9",PoW/PoS,4.924271,-1.000000e-08,-1.995914,-0.586731,-0.690056,3,Aion
ACT,DPoS,DPoS,10.000000,0.000000e+00,-1.372312,3.299169,-1.891329,5,Achain


In [28]:
altair.Chart(clustered_df).mark_circle(size=90).encode(
    x='TotalCoinsMined',
    y='MaxSupply',
    color='Class',
    tooltip=["CoinName", "Algorithm", "TotalCoinsMined", "MaxSupply"]
).interactive()