In [78]:
import requests

import numpy as np
np.set_printoptions(suppress=True)
import pandas as pd

import altair as alt
import hvplot.pandas
import plotly.express as px

from sklearn.cluster import KMeans
from sklearn.compose import ColumnTransformer
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder, StandardScaler

---

CryptoCompare API [Home](https://min-api.cryptocompare.com)<br>

In [55]:
url = 'https://min-api.cryptocompare.com/data/all/coinlist'
response = requests.get(url).json()
crypto_df = pd.DataFrame(response['Data']).T
crypto_df = crypto_df[['CoinName', 'Algorithm', 'IsTrading', 'ProofType', 'TotalCoinsMined',]]# 'TotalCoinSupply']]
crypto_df.head()

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined
42,42 Coin,Scrypt,True,PoW/PoS,0.0
300,300 token,,True,,300.0
365,365Coin,X11,True,PoW/PoS,0.0
404,404Coin,Scrypt,True,PoW/PoS,0.0
433,433 Token,,False,,


---

In [91]:
crypto_df = pd.read_csv('crypto_data.csv', index_col=0)
crypto_df = crypto_df[crypto_df['IsTrading'] == True]     # keep only those cryptocurrencies that are trading
crypto_df = crypto_df[crypto_df['Algorithm'] != 'N/A']    # keep only those cryptocurrencies with a working algorithm
crypto_df = crypto_df.drop(columns=['IsTrading'], axis=1) # drop the IsTrading column
crypto_df = crypto_df.dropna()
crypto_df = crypto_df[crypto_df['TotalCoinsMined'] > 0]   # keep only those cryptocurrencies that have mined coins
coin_name = pd.DataFrame(crypto_df['CoinName'], index=crypto_df.index)
crypto_df = crypto_df.drop('CoinName', axis=1)            # drop the CoinName column
crypto_df['TotalCoinSupply'] = crypto_df['TotalCoinSupply'].astype(float)
crypto_df

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
42,Scrypt,PoW/PoS,4.199995e+01,4.200000e+01
404,Scrypt,PoW/PoS,1.055185e+09,5.320000e+08
1337,X13,PoW/PoS,2.927942e+10,3.141593e+11
BTC,SHA-256,PoW,1.792718e+07,2.100000e+07
ETH,Ethash,PoW,1.076842e+08,0.000000e+00
...,...,...,...,...
ZEPH,SHA-256,DPoS,2.000000e+09,2.000000e+09
GAP,Scrypt,PoW/PoS,1.493105e+07,2.500000e+08
BDX,CryptoNight,PoW,9.802226e+08,1.400223e+09
ZEN,Equihash,PoW,7.296538e+06,2.100000e+07


In [34]:
X = pd.get_dummies(crypto_df, columns=['Algorithm', 'ProofType'])
X = StandardScaler().fit_transform(X)                     # standardize the data
X

array([[-0.11710817, -0.1528703 , -0.0433963 , ..., -0.0433963 ,
        -0.0433963 , -0.0433963 ],
       [-0.09396955, -0.145009  , -0.0433963 , ..., -0.0433963 ,
        -0.0433963 , -0.0433963 ],
       [ 0.52494561,  4.48942416, -0.0433963 , ..., -0.0433963 ,
        -0.0433963 , -0.0433963 ],
       ...,
       [-0.09561336, -0.13217937, -0.0433963 , ..., -0.0433963 ,
        -0.0433963 , -0.0433963 ],
       [-0.11694817, -0.15255998, -0.0433963 , ..., -0.0433963 ,
        -0.0433963 , -0.0433963 ],
       [-0.11710536, -0.15285552, -0.0433963 , ..., -0.0433963 ,
        -0.0433963 , -0.0433963 ]])

In [56]:
n_comp = 3
principal_components = PCA(n_components=n_comp).fit_transform(X)
print(principal_components.shape)
pcs_df = pd.DataFrame(principal_components, columns=[f'PC {i}' for i in range(1, n_comp + 1)], index=crypto_df.index)

inertia = []
k = list(range(1, 11))
for i in k:
    km = KMeans(n_clusters=i, random_state=0)
    km.fit(pcs_df)
    inertia.append(km.inertia_)
elbow_data = {'k': k, 'inertia': inertia}
df_elbow = pd.DataFrame(elbow_data)

(532, 98)
(532, 3)


In [57]:
df_elbow.hvplot.line(x='k', y='inertia', xticks=k, title='Elbow Curve')

In [58]:
alt.Chart(df_elbow).mark_line().encode(x='k', y='inertia')

In [59]:
model = KMeans(n_clusters=4, random_state=0)
model.fit(pcs_df)
pred = model.predict(pcs_df)

clustered_df = pd.concat([crypto_df, pcs_df], axis=1, sort=False)
clustered_df['CoinName'] = coin_name['CoinName']
clustered_df['Class'] = model.labels_
clustered_df.head()

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply,PC 1,PC 2,PC 3,CoinName,Class
42,Scrypt,PoW/PoS,41.99995,42,-0.337823,0.978995,-0.588421,42 Coin,3
404,Scrypt,PoW/PoS,1055185000.0,532000000,-0.321167,0.979106,-0.588834,404Coin,3
1337,X13,PoW/PoS,29279420000.0,314159265359,2.313272,1.678143,-0.669669,EliteCoin,3
BTC,SHA-256,PoW,17927180.0,21000000,-0.147097,-1.317968,0.212701,Bitcoin,0
ETH,Ethash,PoW,107684200.0,0,-0.154361,-2.012828,0.41977,Ethereum,0


In [60]:
fig = px.scatter_3d(
    clustered_df,
    x='PC 1',
    y='PC 2',
    z='PC 3',
    color='Class',
    symbol='Class',
    hover_name='CoinName',
    hover_data=['Algorithm'],
    width=800,
)
fig.update_layout(legend=dict(x=0, y=1))

In [65]:
alt.Chart(clustered_df).mark_circle(size=60).encode(
    x='PC 1',
    y='PC 2',
    color=alt.Color('Class', scale=alt.Scale(domain=[0, 1, 2, 3], range=['red', 'green', 'blue', 'orange'])),
    tooltip=['CoinName', 'Algorithm', 'TotalCoinsMined', 'TotalCoinSupply'],
).interactive()

In [49]:
clustered_df[['CoinName', 'Algorithm', 'ProofType', 'TotalCoinSupply', 'TotalCoinsMined', 'Class']].hvplot.table()

In [62]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(clustered_df[['CoinName', 'Algorithm', 'ProofType', 'TotalCoinSupply', 'TotalCoinsMined', 'Class']])

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinSupply,TotalCoinsMined,Class
42,42 Coin,Scrypt,PoW/PoS,42.0,41.99995,3
404,404Coin,Scrypt,PoW/PoS,532000000.0,1055185000.0,3
1337,EliteCoin,X13,PoW/PoS,314159265359.0,29279420000.0,3
BTC,Bitcoin,SHA-256,PoW,21000000.0,17927180.0,0
ETH,Ethereum,Ethash,PoW,0.0,107684200.0,0
LTC,Litecoin,Scrypt,PoW,84000000.0,63039240.0,0
DASH,Dash,X11,PoW/PoS,22000000.0,9031294.0,3
XMR,Monero,CryptoNight-V7,PoW,0.0,17201140.0,0
ETC,Ethereum Classic,Ethash,PoW,210000000.0,113359700.0,0
ZEC,ZCash,Equihash,PoW,21000000.0,7383056.0,0


In [52]:
plot_data = MinMaxScaler().fit_transform(clustered_df[['TotalCoinSupply', 'TotalCoinsMined']])
plot_df = pd.DataFrame(plot_data, columns=['TotalCoinSupply', 'TotalCoinsMined'], index=clustered_df.index)
plot_df['CoinName'] = clustered_df['CoinName']
plot_df['Class'] = clustered_df['Class']
plot_df.hvplot.scatter(x='TotalCoinsMined', y='TotalCoinSupply', hover_cols=['Coinname'], by='Class')

In [66]:
alt.Chart(plot_df).mark_circle(size=60).encode(
    x='TotalCoinsMined',
    y='TotalCoinSupply',
    color=alt.Color('Class', scale=alt.Scale(domain=[0, 1, 2, 3], range=['red', 'green', 'blue', 'orange'])),
    tooltip=['CoinName', 'TotalCoinsMined', 'TotalCoinSupply'],
).interactive()

---