# Clustering Crypto

In [54]:
# Initial imports
import requests
import pandas as pd
import matplotlib.pyplot as plt
import hvplot.pandas
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

### Fetching Cryptocurrency Data

In [55]:
# Use the following endpoint to fetch json data
url = "https://min-api.cryptocompare.com/data/all/coinlist"
r = requests.get(url)
crypto_data = r.json()['Data']

In [56]:
# Create a DataFrame 
crypto_df = pd.DataFrame(crypto_data).T
crypto_df.head()

Unnamed: 0,Id,Url,ImageUrl,ContentCreatedOn,Name,Symbol,CoinName,FullName,Description,AssetTokenStatus,...,MaxSupply,MktCapPenalty,IsUsedInDefi,IsUsedInNft,PlatformType,AlgorithmType,Difficulty,BuiltOn,SmartContractAddress,DecimalPoints
42,4321,/coins/42/overview,/media/35650717/42.jpg,1427211129,42,42,42 Coin,42 Coin (42),Everything about 42 coin is 42 - apart from th...,,...,42.0,0.0,0.0,0.0,blockchain,scrypt,0.000244,,,
300,749869,/coins/300/overview,/media/27010595/300.png,1517935016,300,300,300 token,300 token (300),300 token is an ERC20 token. This Token was cr...,,...,300.0,0.0,0.0,0.0,token,,,ETH,0xaec98a708810414878c3bcdf46aad31ded4a4557,18.0
365,33639,/coins/365/overview,/media/352070/365.png,1480032918,365,365,365Coin,365Coin (365),365Coin is a Proof of Work and Proof of Stake ...,,...,-1.0,0.0,0.0,0.0,blockchain,,,,,
404,21227,/coins/404/overview,/media/35650851/404-300x300.jpg,1466100361,404,404,404Coin,404Coin (404),404 is a PoW/PoS hybrid cryptocurrency that al...,,...,-1.0,0.0,0.0,0.0,blockchain,,,,,
433,926547,/coins/433/overview,/media/34836095/433.png,1541597321,433,433,433 Token,433 Token (433),433 Token is a decentralised soccer platform t...,Finished,...,,,,,,,,,,


### Data Preprocessing

In [57]:
# Keep only necessary columns:
# 'CoinName','Algorithm','IsTrading','ProofType','TotalCoinsMined','TotalCoinSupply'
crypto_df = crypto_df[['CoinName','Algorithm','IsTrading','ProofType','TotalCoinsMined','MaxSupply']]

In [58]:
# Keep only cryptocurrencies that are trading
crypto_df = crypto_df[crypto_df.IsTrading.eq(True)]
crypto_df.tail(10)

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,MaxSupply
ELY,Elysian,,True,,260000000.0,-1
EM,Eminer,,True,,2100000000.0,-1
ENCRYPG,EncrypGen,,True,,70938084.472832,-1
TSL,Energo,,True,,1000000000.0,-1
ENG,Enigma,,True,,150000000.0,-1
ENJ,Enjin Coin,,True,,1000000000.0,-1
ESS,Essentia,,True,,1755313373.0,-1
ETH,Ethereum,Ethash,True,PoW,120203789.0615,-1
FUEL,Etherparty,,True,,1000000000.0,-1
ARCONA,Arcona,,True,,15181707.013085,-1


In [59]:
# Keep only cryptocurrencies with a working algorithm
crypto_df= crypto_df[crypto_df.Algorithm != 'N/A']
crypto_df

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,MaxSupply
42,42 Coin,Scrypt,True,PoW/PoS,41.999952,42
365,365Coin,X11,True,PoW/PoS,0,-1
404,404Coin,Scrypt,True,PoW/PoS,0,-1
611,SixEleven,SHA-256,True,PoW,0,0
808,808,SHA-256,True,PoW/PoS,0,0
...,...,...,...,...,...,...
DCR,Decred,BLAKE256,True,PoW/PoS,13897411.564559,21000000
DBC,DeepBrain Chain,NEP-5,True,,10000000000,-1
DERO,Dero,CryptoNight,True,PoW,18400000,-1
DOGE,Dogecoin,Scrypt,True,PoW,133837866383.705231,-1


In [60]:
# Remove the "IsTrading" column
crypto_df.drop(['IsTrading'], axis = 1, inplace=True)
crypto_df

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,MaxSupply
42,42 Coin,Scrypt,PoW/PoS,41.999952,42
365,365Coin,X11,PoW/PoS,0,-1
404,404Coin,Scrypt,PoW/PoS,0,-1
611,SixEleven,SHA-256,PoW,0,0
808,808,SHA-256,PoW/PoS,0,0
...,...,...,...,...,...
DCR,Decred,BLAKE256,PoW/PoS,13897411.564559,21000000
DBC,DeepBrain Chain,NEP-5,,10000000000,-1
DERO,Dero,CryptoNight,PoW,18400000,-1
DOGE,Dogecoin,Scrypt,PoW,133837866383.705231,-1


In [61]:
# Remove rows with at least 1 null value
crypto_df.dropna(inplace=True)

In [62]:
# Remove rows with cryptocurrencies having no coins mined
crypto_df= crypto_df[crypto_df.TotalCoinsMined != 0]
crypto_df

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,MaxSupply
42,42 Coin,Scrypt,PoW/PoS,41.999952,42
NSR,NuShares,PoS,PoS,6175367197.9692,0
TRI,Triangles Coin,X13,PoW/PoS,191623.903871,0
CMTC,CometCoin,Scrypt,PoW,872830,0
CHAT,OpenChat,Scrypt,PoW/PoS,1000000000,-1
...,...,...,...,...,...
DCR,Decred,BLAKE256,PoW/PoS,13897411.564559,21000000
DBC,DeepBrain Chain,NEP-5,,10000000000,-1
DERO,Dero,CryptoNight,PoW,18400000,-1
DOGE,Dogecoin,Scrypt,PoW,133837866383.705231,-1


In [63]:
# Drop rows where there are 'N/A' text values
crypto_df= crypto_df[crypto_df.ProofType != 'N/A']
crypto_df

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,MaxSupply
42,42 Coin,Scrypt,PoW/PoS,41.999952,42
NSR,NuShares,PoS,PoS,6175367197.9692,0
TRI,Triangles Coin,X13,PoW/PoS,191623.903871,0
CMTC,CometCoin,Scrypt,PoW,872830,0
CHAT,OpenChat,Scrypt,PoW/PoS,1000000000,-1
...,...,...,...,...,...
DASH,Dash,X11,PoW/PoSe,10661223.863353,18900000
DCR,Decred,BLAKE256,PoW/PoS,13897411.564559,21000000
DERO,Dero,CryptoNight,PoW,18400000,-1
DOGE,Dogecoin,Scrypt,PoW,133837866383.705231,-1


In [64]:
# Store the 'CoinName'column in its own DataFrame prior to dropping it from crypto_df
coinname_df = crypto_df.iloc[:,0]
coinname_df

42             42 Coin
NSR           NuShares
TRI     Triangles Coin
CMTC         CometCoin
CHAT          OpenChat
             ...      
DASH              Dash
DCR             Decred
DERO              Dero
DOGE          Dogecoin
ETH           Ethereum
Name: CoinName, Length: 136, dtype: object

In [65]:
# Drop the 'CoinName' column since it's not going to be used on the clustering algorithm
clean_crypto_df = crypto_df.drop(columns='CoinName')
clean_crypto_df

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,MaxSupply
42,Scrypt,PoW/PoS,41.999952,42
NSR,PoS,PoS,6175367197.9692,0
TRI,X13,PoW/PoS,191623.903871,0
CMTC,Scrypt,PoW,872830,0
CHAT,Scrypt,PoW/PoS,1000000000,-1
...,...,...,...,...
DASH,X11,PoW/PoSe,10661223.863353,18900000
DCR,BLAKE256,PoW/PoS,13897411.564559,21000000
DERO,CryptoNight,PoW,18400000,-1
DOGE,Scrypt,PoW,133837866383.705231,-1


In [73]:
# Create dummy variables for text features
X = pd.get_dummies(clean_crypto_df, columns=['Algorithm', 'ProofType'])
X.head()

Unnamed: 0,TotalCoinsMined,MaxSupply,Algorithm_Autolykos,Algorithm_BEP-2,Algorithm_BEP-20 Token,Algorithm_BLAKE256,Algorithm_BMW512 / Echo512,Algorithm_Blake2B + SHA3,Algorithm_Blake2b,Algorithm_C31,...,ProofType_PoW/PoSe,ProofType_PoW/nPoS,ProofType_ProgPoW/PoS,ProofType_Proof of Authority,ProofType_Proof-of-Work,ProofType_SPoS,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW,ProofType_dPoW/PoW
42,41.999952,42,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
NSR,6175367197.9692,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
TRI,191623.903871,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CMTC,872830.0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CHAT,1000000000.0,-1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [71]:
# Standardize data
data_scaler = StandardScaler()
data_scaler.fit(X)
X_scaled = data_scaler.transform(X)
X_scaled[:1]

array([[-0.08786885, -0.09220965, -0.0860663 , -0.0860663 , -0.0860663 ,
        -0.12216944, -0.0860663 , -0.0860663 , -0.12216944, -0.12216944,
        -0.15018785, -0.0860663 , -0.0860663 , -0.0860663 , -0.23294541,
        -0.12216944, -0.0860663 , -0.0860663 , -0.0860663 , -0.29664794,
        -0.0860663 , -0.0860663 , -0.23294541, -0.0860663 , -0.0860663 ,
        -0.12216944, -0.0860663 , -0.0860663 , -0.0860663 , -0.0860663 ,
        -0.0860663 , -0.0860663 , -0.15018785, -0.0860663 , -0.0860663 ,
        -0.12216944, -0.19536617, -0.0860663 , -0.0860663 , -0.15018785,
        -0.12216944, -0.29664794, -0.12216944, -0.0860663 , -0.0860663 ,
        -0.0860663 ,  2.1602469 , -0.0860663 , -0.0860663 , -0.0860663 ,
        -0.0860663 , -0.19536617, -0.0860663 , -0.19536617, -0.12216944,
        -0.0860663 , -0.0860663 , -0.0860663 , -0.0860663 , -0.0860663 ,
        -0.25      , -0.0860663 , -0.0860663 , -0.12216944, -0.12216944,
        -0.0860663 , -0.32510161, -0.0860663 , -0.0

### Reducing Dimensions Using PCA

In [None]:
# Use PCA to reduce dimensions to 3 principal components


In [None]:
# Create a DataFrame with the principal components data


### Clustering Crytocurrencies Using K-Means

#### Find the Best Value for `k` Using the Elbow Curve

In [None]:
inertia = []
k = list(range(1, 11))

# Calculate the inertia for the range of k values


# Create the Elbow Curve using hvPlot


Running K-Means with `k=<your best value for k here>`

In [None]:
# Initialize the K-Means model

# Fit the model

# Predict clusters

# Create a new DataFrame including predicted clusters and cryptocurrencies features


### Visualizing Results

#### Scatter Plot with Tradable Cryptocurrencies

In [None]:
# Scale data to create the scatter plot


In [None]:
# Plot the scatter with x="TotalCoinsMined" and y="TotalCoinSupply"


#### Table of Tradable Cryptocurrencies

In [None]:
# Table with tradable cryptos


In [None]:
# Print the total number of tradable cryptocurrencies
