# Clustering Crypto

In [273]:
# Initial imports
import requests
import pandas as pd
import matplotlib.pyplot as plt
import hvplot.pandas
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

### Fetching Cryptocurrency Data

In [274]:
import json
# Use the following endpoint to fetch json data
url = "https://min-api.cryptocompare.com/data/all/coinlist"
res = requests.get(url)
data = res.json()

In [275]:
data.keys()



In [276]:
# Create a DataFrame 
# HINT: You will need to use the 'Data' key from the json response, then transpose the DataFrame.
crypto_df = pd.DataFrame(data['Data'])
crypto_df = crypto_df.transpose()
crypto_df

Unnamed: 0,Id,Url,ImageUrl,ContentCreatedOn,Name,Symbol,CoinName,FullName,Description,AssetTokenStatus,...,MaxSupply,MktCapPenalty,IsUsedInDefi,IsUsedInNft,PlatformType,AlgorithmType,Difficulty,BuiltOn,SmartContractAddress,DecimalPoints
42,4321,/coins/42/overview,/media/35650717/42.jpg,1427211129,42,42,42 Coin,42 Coin (42),Everything about 42 coin is 42 - apart from th...,,...,42,0,0,0,blockchain,scrypt,0.520037,,,
300,749869,/coins/300/overview,/media/27010595/300.png,1517935016,300,300,300 token,300 token (300),300 token is an ERC20 token. This Token was cr...,,...,300,0,0,0,token,,,ETH,0xaec98a708810414878c3bcdf46aad31ded4a4557,18
365,33639,/coins/365/overview,/media/352070/365.png,1480032918,365,365,365Coin,365Coin (365),365Coin is a Proof of Work and Proof of Stake ...,,...,-1,0,0,0,blockchain,,,,,
404,21227,/coins/404/overview,/media/35650851/404-300x300.jpg,1466100361,404,404,404Coin,404Coin (404),404 is a PoW/PoS hybrid cryptocurrency that al...,,...,-1,0,0,0,blockchain,,,,,
433,926547,/coins/433/overview,/media/34836095/433.png,1541597321,433,433,433 Token,433 Token (433),433 Token is a decentralised soccer platform t...,Finished,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
MTL,179164,/coins/mtl/overview,/media/37746820/mtl.png,1499860407,MTL,MTL,Metal,Metal (MTL),Metal Pay is a blockchain-based payment proces...,Finished,...,-1,0,0,0,token,,,ETH,0xF433089366899D83a9f26A773D59ec7eCF30355e,8
MET,916769,/coins/met/overview,/media/39500929/met.png,1531142904,MET,MET,Metronome,Metronome (MET),"Metronome released in December 2017, is a cros...",,...,-1,0,1,0,token,,,ETH,0xa3d58c4e56fedcae3a7c43a725aee9a71f0ece4e,18
MEX,932013,/coins/mex/overview,/media/35651488/mex.png,1568366527,MEX,MEX,MEX,MEX (MEX),MEX (MEX) is an Ethereum-based (ERC-20) crypto...,,...,-1,0,0,0,token,,,ETH,0x2ba6b1e4424e19816382d15937739959f7da5fd8,18
MINA,940786,/coins/mina/overview,/media/37747204/mina.png,1618907186,MINA,MINA,Mina Protocol,Mina Protocol (MINA),"Mina is a light blockchain, powered by partici...",,...,-1,0,0,0,blockchain,,,,,


In [277]:
# Alternatively, use the provided csv file:
from pathlib import Path
file_path = Path("Resources/crypto_data.csv")

# Create a DataFrame
control_df = pd.read_csv(file_path)
control_df

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,4.199995e+01,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1.055185e+09,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.000000e+00,0
...,...,...,...,...,...,...,...
1247,XBC,BitcoinPlus,Scrypt,True,PoS,1.283270e+05,1000000
1248,DVTC,DivotyCoin,Scrypt,False,PoW/PoS,2.149121e+07,100000000
1249,GIOT,Giotto Coin,Scrypt,False,PoW/PoS,,233100000
1250,OPSC,OpenSourceCoin,SHA-256,False,PoW/PoS,,21000000


### Data Preprocessing

In [278]:
crypto_df.columns

Index(['Id', 'Url', 'ImageUrl', 'ContentCreatedOn', 'Name', 'Symbol',
       'CoinName', 'FullName', 'Description', 'AssetTokenStatus', 'Algorithm',
       'ProofType', 'SortOrder', 'Sponsored', 'Taxonomy', 'Rating',
       'IsTrading', 'TotalCoinsMined', 'CirculatingSupply', 'BlockNumber',
       'NetHashesPerSecond', 'BlockReward', 'BlockTime', 'AssetLaunchDate',
       'AssetWhitepaperUrl', 'AssetWebsiteUrl', 'MaxSupply', 'MktCapPenalty',
       'IsUsedInDefi', 'IsUsedInNft', 'PlatformType', 'AlgorithmType',
       'Difficulty', 'BuiltOn', 'SmartContractAddress', 'DecimalPoints'],
      dtype='object')

In [279]:
# Keep only necessary columns:
# 'CoinName','Algorithm','IsTrading','ProofType','TotalCoinsMined','TotalCoinSupply'
columns = ['CoinName','Algorithm','IsTrading','ProofType','TotalCoinsMined','MaxSupply']
crypto_df = crypto_df[columns]
crypto_df

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,MaxSupply
42,42 Coin,Scrypt,True,PoW/PoS,41.999952,42
300,300 token,,True,,300,300
365,365Coin,X11,True,PoW/PoS,0,-1
404,404Coin,Scrypt,True,PoW/PoS,0,-1
433,433 Token,,False,,,
...,...,...,...,...,...,...
MTL,Metal,,True,PoPP,66588888,-1
MET,Metronome,,True,,14288649.244933,-1
MEX,MEX,,True,,1939999970,-1
MINA,Mina Protocol,,True,,938019772.840039,-1


In [280]:
# Keep only cryptocurrencies that are trading
crypto_df = crypto_df[crypto_df.IsTrading==True]
crypto_df

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,MaxSupply
42,42 Coin,Scrypt,True,PoW/PoS,41.999952,42
300,300 token,,True,,300,300
365,365Coin,X11,True,PoW/PoS,0,-1
404,404Coin,Scrypt,True,PoW/PoS,0,-1
611,SixEleven,SHA-256,True,PoW,0,0
...,...,...,...,...,...,...
MTL,Metal,,True,PoPP,66588888,-1
MET,Metronome,,True,,14288649.244933,-1
MEX,MEX,,True,,1939999970,-1
MINA,Mina Protocol,,True,,938019772.840039,-1


In [281]:
# Keep only cryptocurrencies with a working algorithm
crypto_df = crypto_df[crypto_df.Algorithm!='N/A']
crypto_df

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,MaxSupply
42,42 Coin,Scrypt,True,PoW/PoS,41.999952,42
365,365Coin,X11,True,PoW/PoS,0,-1
404,404Coin,Scrypt,True,PoW/PoS,0,-1
611,SixEleven,SHA-256,True,PoW,0,0
808,808,SHA-256,True,PoW/PoS,0,0
...,...,...,...,...,...,...
IQC,IQ.cash,NeoScrypt,True,,0,0
KCASH,Kcash,SHA-512,True,Zero-Knowledge Proof,1000000000,-1
KMD,Komodo,Equihash,True,dPoW/PoW,134762589.770923,200000000
LSK,Lisk,DPoS,True,DPoS,144818773,-1


In [282]:
# Remove the "IsTrading" column
crypto_df = crypto_df.drop('IsTrading', axis=1)
crypto_df.head()

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,MaxSupply
42,42 Coin,Scrypt,PoW/PoS,41.999952,42
365,365Coin,X11,PoW/PoS,0.0,-1
404,404Coin,Scrypt,PoW/PoS,0.0,-1
611,SixEleven,SHA-256,PoW,0.0,0
808,808,SHA-256,PoW/PoS,0.0,0


In [283]:
# Remove rows with at least 1 null value
crypto_df = crypto_df.dropna()
crypto_df

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,MaxSupply
42,42 Coin,Scrypt,PoW/PoS,41.999952,42
365,365Coin,X11,PoW/PoS,0,-1
404,404Coin,Scrypt,PoW/PoS,0,-1
611,SixEleven,SHA-256,PoW,0,0
808,808,SHA-256,PoW/PoS,0,0
...,...,...,...,...,...
IQC,IQ.cash,NeoScrypt,,0,0
KCASH,Kcash,SHA-512,Zero-Knowledge Proof,1000000000,-1
KMD,Komodo,Equihash,dPoW/PoW,134762589.770923,200000000
LSK,Lisk,DPoS,DPoS,144818773,-1


In [284]:
# Remove rows with cryptocurrencies having no coins mined
crypto_df = crypto_df[crypto_df.TotalCoinsMined!=0]
crypto_df

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,MaxSupply
42,42 Coin,Scrypt,PoW/PoS,41.999952,42
XMR,Monero,RandomX,PoW,18151189.374214,-1
AURORAC,Auroracoin,Scrypt,PoW/PoS,99865603.452017,-1
BLK,BlackCoin,Scrypt,PoS,61500330.346278,100000000
QRK,QuarkCoin,Quark,PoW/PoS,280716942.976099,-1
...,...,...,...,...,...
IOC,IOCoin,X11,PoW/PoS,19710977.974346,22000000
KCASH,Kcash,SHA-512,Zero-Knowledge Proof,1000000000,-1
KMD,Komodo,Equihash,dPoW/PoW,134762589.770923,200000000
LSK,Lisk,DPoS,DPoS,144818773,-1


In [286]:
crypto_df = crypto_df.astype({'TotalCoinsMined':'float', 'MaxSupply':'float'})
crypto_df

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,MaxSupply
42,42 Coin,Scrypt,PoW/PoS,4.199995e+01,42.0
XMR,Monero,RandomX,PoW,1.815119e+07,-1.0
AURORAC,Auroracoin,Scrypt,PoW/PoS,9.986560e+07,-1.0
BLK,BlackCoin,Scrypt,PoS,6.150033e+07,100000000.0
QRK,QuarkCoin,Quark,PoW/PoS,2.807169e+08,-1.0
...,...,...,...,...,...
IOC,IOCoin,X11,PoW/PoS,1.971098e+07,22000000.0
KCASH,Kcash,SHA-512,Zero-Knowledge Proof,1.000000e+09,-1.0
KMD,Komodo,Equihash,dPoW/PoW,1.347626e+08,200000000.0
LSK,Lisk,DPoS,DPoS,1.448188e+08,-1.0


In [287]:
# Drop rows where there are 'N/A' text values
crypto_df = crypto_df[crypto_df.ProofType!='N/A']
crypto_df

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,MaxSupply
42,42 Coin,Scrypt,PoW/PoS,4.199995e+01,42.0
XMR,Monero,RandomX,PoW,1.815119e+07,-1.0
AURORAC,Auroracoin,Scrypt,PoW/PoS,9.986560e+07,-1.0
BLK,BlackCoin,Scrypt,PoS,6.150033e+07,100000000.0
QRK,QuarkCoin,Quark,PoW/PoS,2.807169e+08,-1.0
...,...,...,...,...,...
IOC,IOCoin,X11,PoW/PoS,1.971098e+07,22000000.0
KCASH,Kcash,SHA-512,Zero-Knowledge Proof,1.000000e+09,-1.0
KMD,Komodo,Equihash,dPoW/PoW,1.347626e+08,200000000.0
LSK,Lisk,DPoS,DPoS,1.448188e+08,-1.0


In [288]:
# Store the 'CoinName'column in its own DataFrame prior to dropping it from crypto_df
coinname_df = crypto_df[['CoinName']]
coinname_df.head()

Unnamed: 0,CoinName
42,42 Coin
XMR,Monero
AURORAC,Auroracoin
BLK,BlackCoin
QRK,QuarkCoin


In [289]:
# Drop the 'CoinName' column since it's not going to be used on the clustering algorithm
crypto_df = crypto_df.drop('CoinName', axis=1)
crypto_df.head()

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,MaxSupply
42,Scrypt,PoW/PoS,41.99995,42.0
XMR,RandomX,PoW,18151190.0,-1.0
AURORAC,Scrypt,PoW/PoS,99865600.0,-1.0
BLK,Scrypt,PoS,61500330.0,100000000.0
QRK,Quark,PoW/PoS,280716900.0,-1.0


In [290]:
# Create dummy variables for text features
from sklearn.preprocessing import OneHotEncoder

In [291]:
enc=OneHotEncoder()

In [292]:
algo_values = crypto_df['Algorithm'].values.reshape(-1, 1)
enc.fit(algo_values)

OneHotEncoder()

In [293]:
algo_columns = enc.categories_
algo_columns

[array(['Autolykos', 'BEP-2', 'BEP-20 Token', 'BLAKE256',
        'BMW512 / Echo512', 'Blake2B + SHA3', 'Blake2b', 'C31',
        'CryptoNight', 'CryptoNight-Heavy', 'CryptoNight-Lite',
        'Cryptonight Haven', 'DPoS', 'Dagger', 'Dagger-Hashimoto',
        'ECC 256K1', 'Eaglesong', 'Equihash', 'Equihash210,9', 'EtcHash',
        'Ethash', 'Groestl', 'HEX', 'IMesh', 'KECCAK', 'Leased POS',
        'Loopchain', 'Lyra2REv2', 'Lyra2Z', 'MTP', 'Multiple', 'NeoScrypt',
        'Ouroboros', 'POS 3.0', 'PoS', 'ProgPowZ', 'Proof-of-Authority',
        'Quark', 'RandomX', 'SHA-256', 'SHA-512', 'SHA3', 'SPL Token',
        'SPoS', 'Scrypt', 'Scrypt-n', 'Shabal256', 'TRC-10', 'VBFT',
        'VeChainThor Authority', 'X11', 'X11GOST', 'X13', 'X16R', 'X16RT'],
       dtype=object)]

In [294]:
algo_encoded = enc.transform(algo_values).toarray()

In [295]:
algo_encoded_df = pd.DataFrame(
    algo_encoded, columns = algo_columns, index=crypto_df.index
)
algo_encoded_df

Unnamed: 0,Autolykos,BEP-2,BEP-20 Token,BLAKE256,BMW512 / Echo512,Blake2B + SHA3,Blake2b,C31,CryptoNight,CryptoNight-Heavy,...,Scrypt-n,Shabal256,TRC-10,VBFT,VeChainThor Authority,X11,X11GOST,X13,X16R,X16RT
42,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
XMR,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AURORAC,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
BLK,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
QRK,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
IOC,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
KCASH,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
KMD,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
LSK,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [296]:
# Adding a function to ingest multiple text columns and return 
# a dummy-encoded dataframe.

def encode_text_col(df, text_col):
    enc=OneHotEncoder()
    df_index = df.reset_index()
    df_index = df_index[['index']]
    df = df.reset_index(drop=True)
    for column in text_col:
        values = df[column].values.reshape(-1, 1)
        enc.fit(values)
        enc_columns = enc.categories_
        encoded = enc.transform(values).toarray()
        encoded_df = pd.DataFrame(
            encoded, columns = enc_columns
        )
        df = pd.concat([df, encoded_df], axis=1)
    df = df.drop(text_col, axis=1)
    df = pd.concat([df_index, df], axis=1)
    df = df.set_index(['index'])
    return df

In [297]:
# Define the text columns as a list and run the function
# to create the dummy-encoded dataframe
text_col = ['Algorithm', 'ProofType']
encode_text_col(crypto_df, text_col)

Unnamed: 0_level_0,TotalCoinsMined,MaxSupply,"(Autolykos,)","(BEP-2,)","(BEP-20 Token,)","(BLAKE256,)","(BMW512 / Echo512,)","(Blake2B + SHA3,)","(Blake2b,)","(C31,)",...,"(PoW/PoSe,)","(PoW/nPoS,)","(ProgPoW/PoS,)","(Proof of Authority,)","(Proof-of-Work,)","(SPoS,)","(TPoS,)","(Zero-Knowledge Proof,)","(dPoW,)","(dPoW/PoW,)"
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
42,4.199995e+01,42.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
XMR,1.815119e+07,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AURORAC,9.986560e+07,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
BLK,6.150033e+07,100000000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
QRK,2.807169e+08,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
IOC,1.971098e+07,22000000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
KCASH,1.000000e+09,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
KMD,1.347626e+08,200000000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
LSK,1.448188e+08,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [300]:
encoded_df = encode_text_col(crypto_df, text_col)
data_to_scale = encoded_df.iloc[:, :2]
data_to_scale

Unnamed: 0_level_0,TotalCoinsMined,MaxSupply
index,Unnamed: 1_level_1,Unnamed: 2_level_1
42,4.199995e+01,42.0
XMR,1.815119e+07,-1.0
AURORAC,9.986560e+07,-1.0
BLK,6.150033e+07,100000000.0
QRK,2.807169e+08,-1.0
...,...,...
IOC,1.971098e+07,22000000.0
KCASH,1.000000e+09,-1.0
KMD,1.347626e+08,200000000.0
LSK,1.448188e+08,-1.0


In [301]:
# Standardize data
scaler = StandardScaler()
scaler.fit(data_to_scale)
scaled_data = scaler.transform(data_to_scale)
features_scaled_data = pd.DataFrame(scaled_data, columns=encoded_df.iloc[:, :2].columns, index=crypto_df.index)
features_scaled_data.head()

Unnamed: 0,TotalCoinsMined,MaxSupply
42,-0.086922,-0.091201
XMR,-0.086922,-0.091201
AURORAC,-0.086921,-0.091201
BLK,-0.086921,-0.091145
QRK,-0.086919,-0.091201


In [302]:
# Join the dataframes
df = encoded_df.drop(['TotalCoinsMined', 'MaxSupply'], axis=1)
df = pd.concat([df, features_scaled_data], axis=1)
df

Unnamed: 0,"(Autolykos,)","(BEP-2,)","(BEP-20 Token,)","(BLAKE256,)","(BMW512 / Echo512,)","(Blake2B + SHA3,)","(Blake2b,)","(C31,)","(CryptoNight,)","(CryptoNight-Heavy,)",...,"(ProgPoW/PoS,)","(Proof of Authority,)","(Proof-of-Work,)","(SPoS,)","(TPoS,)","(Zero-Knowledge Proof,)","(dPoW,)","(dPoW/PoW,)",TotalCoinsMined,MaxSupply
42,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.086922,-0.091201
XMR,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.086922,-0.091201
AURORAC,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.086921,-0.091201
BLK,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.086921,-0.091145
QRK,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.086919,-0.091201
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
IOC,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.086922,-0.091189
KCASH,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,-0.086910,-0.091201
KMD,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,-0.086920,-0.091089
LSK,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.086920,-0.091201


### Reducing Dimensions Using PCA

In [303]:
# Use PCA to reduce dimensions to 3 principal components
from sklearn.decomposition import PCA

pca = PCA(n_components=3)
principalComponents = pca.fit_transform(df)

In [304]:
# Create a DataFrame with the principal components data
principal_df = pd.DataFrame(data=principalComponents, columns=['PC1', 'PC2', 'PC3'], index=crypto_df.index)
principal_df

Unnamed: 0,PC1,PC2,PC3
42,-0.173442,-0.079144,0.841399
XMR,-0.119567,0.029594,-0.579919
AURORAC,-0.173441,-0.079144,0.841399
BLK,-0.155871,-0.070996,0.484699
QRK,-0.150354,-0.075655,0.804276
...,...,...,...
IOC,-0.151779,-0.075735,0.779244
KCASH,-0.119553,-0.061528,0.291780
KMD,-0.128385,-0.059757,0.182545
LSK,-0.058496,-0.125170,0.414914


### Clustering Crytocurrencies Using K-Means

#### Find the Best Value for `k` Using the Elbow Curve

In [305]:
inertia = []
k = list(range(1, 11))

# Calculate the inertia for the range of k values
for i in k:
    k_model = KMeans(n_clusters=i, random_state=1)
    k_model.fit(final_df)
    inertia.append(k_model.inertia_)

# Create the Elbow Curve using hvPlot
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)

df_elbow.hvplot.line(
    x="k", 
    y="inertia", 
    title="Elbow Curve", 
    xticks=k
)

Running K-Means with `k=<your best value for k here>`

In [306]:
# Initialize the K-Means model
model = KMeans(n_clusters=4, random_state=1)

# Fit the model
model.fit(principal_df)

# Predict clusters
k_4 = model.predict(principal_df)

# Create a new DataFrame including predicted clusters and cryptocurrencies features
k4_principal_df = principal_df.copy()
k4_principal_df['Class'] = k_4
k4_principal_df

Unnamed: 0,PC1,PC2,PC3,Class
42,-0.173442,-0.079144,0.841399,0
XMR,-0.119567,0.029594,-0.579919,3
AURORAC,-0.173441,-0.079144,0.841399,0
BLK,-0.155871,-0.070996,0.484699,0
QRK,-0.150354,-0.075655,0.804276,0
...,...,...,...,...
IOC,-0.151779,-0.075735,0.779244,0
KCASH,-0.119553,-0.061528,0.291780,0
KMD,-0.128385,-0.059757,0.182545,0
LSK,-0.058496,-0.125170,0.414914,0


In [309]:
# Create a new DataFrame named clustered_df, that includes the following columns 
# "Algorithm", "ProofType", "TotalCoinsMined", "TotalCoinSupply", "PC 1", "PC 2", "PC 3", "CoinName", "Class".
crypto_concat = crypto_df[["Algorithm", "ProofType", "TotalCoinsMined",	"MaxSupply"]]
features_scaled_data.columns = ["TotalCoinsMinedScaled", "MaxSupplyScaled"]
clustered_df = pd.concat([crypto_concat, k4_principal_df, features_scaled_data, coinname_df], axis=1)
clustered_df

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,MaxSupply,PC1,PC2,PC3,Class,TotalCoinsMinedScaled,MaxSupplyScaled,CoinName
42,Scrypt,PoW/PoS,4.199995e+01,42.0,-0.173442,-0.079144,0.841399,0,-0.086922,-0.091201,42 Coin
XMR,RandomX,PoW,1.815119e+07,-1.0,-0.119567,0.029594,-0.579919,3,-0.086922,-0.091201,Monero
AURORAC,Scrypt,PoW/PoS,9.986560e+07,-1.0,-0.173441,-0.079144,0.841399,0,-0.086921,-0.091201,Auroracoin
BLK,Scrypt,PoS,6.150033e+07,100000000.0,-0.155871,-0.070996,0.484699,0,-0.086921,-0.091145,BlackCoin
QRK,Quark,PoW/PoS,2.807169e+08,-1.0,-0.150354,-0.075655,0.804276,0,-0.086919,-0.091201,QuarkCoin
...,...,...,...,...,...,...,...,...,...,...,...
IOC,X11,PoW/PoS,1.971098e+07,22000000.0,-0.151779,-0.075735,0.779244,0,-0.086922,-0.091189,IOCoin
KCASH,SHA-512,Zero-Knowledge Proof,1.000000e+09,-1.0,-0.119553,-0.061528,0.291780,0,-0.086910,-0.091201,Kcash
KMD,Equihash,dPoW/PoW,1.347626e+08,200000000.0,-0.128385,-0.059757,0.182545,0,-0.086920,-0.091089,Komodo
LSK,DPoS,DPoS,1.448188e+08,-1.0,-0.058496,-0.125170,0.414914,0,-0.086920,-0.091201,Lisk


### Visualizing Results

#### Scatter Plot with Tradable Cryptocurrencies

In [312]:
# Function to take in part of a dataframe, scale it, and join it 
# back up with the original dataframe.

def scale_data(df, columns_to_scale):
    data_to_scale = df[columns_to_scale]
    df = df.drop(columns_to_scale, axis=1)
    scaler = StandardScaler()
    scaler.fit(data_to_scale)
    scaled_data = scaler.transform(data_to_scale)
    scaled_df = pd.DataFrame(scaled_data, columns=columns_to_scale, index=df.index)
    scaled_df = pd.concat([df, scaled_df], axis=1)
    return scaled_df

In [313]:
# Scale data to create the scatter plot
columns_to_scale = ["TotalCoinsMined", "MaxSupply"]
scale_cluster_df = scale_data(clustered_df, columns_to_scale)
scale_cluster_df

Unnamed: 0,Algorithm,ProofType,PC1,PC2,PC3,Class,TotalCoinsMinedScaled,MaxSupplyScaled,CoinName,TotalCoinsMined,MaxSupply
42,Scrypt,PoW/PoS,-0.173442,-0.079144,0.841399,0,-0.086922,-0.091201,42 Coin,-0.086922,-0.091201
XMR,RandomX,PoW,-0.119567,0.029594,-0.579919,3,-0.086922,-0.091201,Monero,-0.086922,-0.091201
AURORAC,Scrypt,PoW/PoS,-0.173441,-0.079144,0.841399,0,-0.086921,-0.091201,Auroracoin,-0.086921,-0.091201
BLK,Scrypt,PoS,-0.155871,-0.070996,0.484699,0,-0.086921,-0.091145,BlackCoin,-0.086921,-0.091145
QRK,Quark,PoW/PoS,-0.150354,-0.075655,0.804276,0,-0.086919,-0.091201,QuarkCoin,-0.086919,-0.091201
...,...,...,...,...,...,...,...,...,...,...,...
IOC,X11,PoW/PoS,-0.151779,-0.075735,0.779244,0,-0.086922,-0.091189,IOCoin,-0.086922,-0.091189
KCASH,SHA-512,Zero-Knowledge Proof,-0.119553,-0.061528,0.291780,0,-0.086910,-0.091201,Kcash,-0.086910,-0.091201
KMD,Equihash,dPoW/PoW,-0.128385,-0.059757,0.182545,0,-0.086920,-0.091089,Komodo,-0.086920,-0.091089
LSK,DPoS,DPoS,-0.058496,-0.125170,0.414914,0,-0.086920,-0.091201,Lisk,-0.086920,-0.091201


In [315]:
sorted = scale_cluster_df.sort_values(by=['MaxSupply'])
sorted

Unnamed: 0,Algorithm,ProofType,PC1,PC2,PC3,Class,TotalCoinsMinedScaled,MaxSupplyScaled,CoinName,TotalCoinsMined,MaxSupply
BCMC1,Ethash,PoW/PoS,-0.154333,-0.074438,0.710728,0,-0.086820,-0.091201,BeforeCoinMarketCap,-0.086820,-0.091201
PURA,X11,PoW,-0.122486,0.027454,-0.525350,3,-0.086920,-0.091201,Pura,-0.086920,-0.091201
TAU,DPoS,DPoS,-0.058495,-0.125171,0.414914,0,-0.086919,-0.091201,Lamden Tau,-0.086919,-0.091201
SHIFT,DPoS,DPoS,-0.058497,-0.125169,0.414914,0,-0.086922,-0.091201,Shift,-0.086922,-0.091201
SC,Blake2b,PoW,-0.119076,0.029226,-0.579946,3,-0.086306,-0.091201,Siacoin,-0.086306,-0.091201
...,...,...,...,...,...,...,...,...,...,...,...
DAPS,Dagger,PoW/PoS/PoA,-0.095519,-0.029886,0.294045,0,-0.086177,-0.051786,DAPS Coin,-0.086177,-0.051786
VET,VeChainThor Authority,Proof of Authority,-0.088721,-0.022050,0.287970,0,-0.085894,-0.042375,VeChain,-0.085894,-0.042375
BCN,CryptoNight,PoW,-0.056143,0.110698,-0.561856,3,-0.084717,0.012670,ByteCoin,-0.084717,0.012670
WBBC,SHA-256,PoW,0.276254,0.544824,-0.614522,3,-0.074970,0.471888,Wibcoin,-0.074970,0.471888


In [311]:
# Plot the scatter with x="TotalCoinsMined" and y="TotalCoinSupply"
clustered_df.hvplot.scatter(
    x="TotalCoinsMinedScaled",
    y="MaxSupplyScaled",
    by="Class"
)

#### Table of Tradable Cryptocurrencies

In [317]:
# Table with tradable cryptos
table = clustered_df.hvplot.table(columns=["CoinName", "Algorithm", "ProofType", "MaxSupply", "TotalCoinsMined", "Class"])
table

In [None]:
# Print the total number of tradable cryptocurrencies
