# Clustering Crypto

In [2]:
# Initial imports
import requests
import pandas as pd
import matplotlib.pyplot as plt
import hvplot.pandas
import plotly.express as px
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

import urllib.request, json 
from pathlib import Path

from returns.result import Result, safe

In [3]:
import sys
if "../lib" not in sys.path:
    print("Adding '../lib' to the path")
    sys.path.append("../lib")
import pandasPalmer as pp
import NLTK.fns as nl
import Classification.fns as cls


Adding '../lib' to the path


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\EPalmer\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\EPalmer\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\EPalmer\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\EPalmer\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


### Fetching Cryptocurrency Data

In [68]:
# Use the following endpoint to fetch json data
url_site = "https://min-api.cryptocompare.com/data/all/coinlist"

with urllib.request.urlopen(url_site) as url:
    data = json.loads(url.read().decode())

    if data["Response"]=="Success":
        all_data = data
    else:
        print("NOT ABLE TO READ THE COIN DATA!!!")

In [69]:
# Create a DataFrame 
# HINT: You will need to use the 'Data' key from the json response, then transpose the DataFrame.

df = pd.DataFrame(data=all_data["Data"]).T
df.head(2)

Unnamed: 0,Id,Url,ImageUrl,ContentCreatedOn,Name,Symbol,CoinName,FullName,Description,AssetTokenStatus,...,AssetLaunchDate,MaxSupply,MktCapPenalty,IsUsedInDefi,IsUsedInNft,PlatformType,BuiltOn,SmartContractAddress,DecimalPoints,Difficulty
42,4321,/coins/42/overview,/media/35650717/42.jpg,1427211129,42,42,42 Coin,42 Coin (42),Everything about 42 coin is 42 - apart from th...,,...,0000-00-00,0,0,0,0,,,,,
300,749869,/coins/300/overview,/media/27010595/300.png,1517935016,300,300,300 token,300 token (300),300 token is an ERC20 token. This Token was cr...,,...,2017-07-01,300,0,0,0,token,ETH,0xaec98a708810414878c3bcdf46aad31ded4a4557,18.0,


In [70]:
# Alternatively, use the provided csv file:
file_path = Path("./Resources/crypto_data.csv")

# Create a DataFrame
df_csv = pd.read_csv(file_path)
df_csv.head()

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.0,0


### Data Preprocessing

In [73]:
# Keep only necessary columns:
# 'CoinName','Algorithm','IsTrading','ProofType','TotalCoinsMined','TotalCoinSupply'
df = df[['CoinName','Algorithm','IsTrading','ProofType','TotalCoinsMined']]  #,'TotalCoinSupply'
df.head(2)

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined
42,42 Coin,Scrypt,True,PoW/PoS,0
300,300 token,,True,,300


In [74]:
# Keep only cryptocurrencies that are trading
trading_status = df.IsTrading == True
df = df[trading_status]
df.head(2)

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined
42,42 Coin,Scrypt,True,PoW/PoS,0
300,300 token,,True,,300


In [75]:
# Keep only cryptocurrencies with a working algorithm
working_algo = df.Algorithm != "N/A"
df = df[working_algo]
df.head(2)

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined
42,42 Coin,Scrypt,True,PoW/PoS,0
365,365Coin,X11,True,PoW/PoS,0


In [76]:
# Remove the 'IsTrading' column
if ("IsTrading" in df.columns):
    df.drop(axis='columns', columns='IsTrading', inplace=True)
else:
    print("The 'IsTrading' Column is already deleted")
df.head(2)

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined
42,42 Coin,Scrypt,PoW/PoS,0
365,365Coin,X11,PoW/PoS,0


In [87]:
# Remove rows with at least 1 null value
df.dropna(inplace=True)
df.isnull().sum()
_ = [print(f"Column \t'{col}'   \thas {df[col].isnull().sum()} null values") for col in df.columns]

Column 	'CoinName'   	has 0 null values
Column 	'Algorithm'   	has 0 null values
Column 	'ProofType'   	has 0 null values
Column 	'TotalCoinsMined'   	has 0 null values


In [78]:
# Remove rows with cryptocurrencies having no coins mined
df = df[df.TotalCoinsMined>0]
df.head(2)

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined
NVC,NovaCoin,Scrypt,PoW/PoS,3353133.721164
XCP,CounterParty,SHA-256,PoW,2614449.013579


In [79]:
# Drop rows where there are 'N/A' text values
Xnavalues = (df.CoinName!="N/A") & (df.Algorithm!="N/A") & (df.ProofType!="N/A")
df = df[Xnavalues]
df.head(2)

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined
NVC,NovaCoin,Scrypt,PoW/PoS,3353133.721164
XCP,CounterParty,SHA-256,PoW,2614449.013579


In [80]:
# Store the 'CoinName'column in its own DataFrame prior to dropping it from crypto_df 
crypto_df = df.copy(deep=True)
coin_name_df = pd.DataFrame(data=df.CoinName)
coin_name_df.head(2)

Unnamed: 0,CoinName
NVC,NovaCoin
XCP,CounterParty


In [81]:
# Drop the 'CoinName' column since it's not going to be used on the clustering algorithm
if ("CoinName" in crypto_df.columns):
    crypto_df.drop(axis="columns",columns="CoinName",inplace=True)
else:
    print("CoinName already deleted")
crypto_df.head()

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined
NVC,Scrypt,PoW/PoS,3353133.721164
XCP,SHA-256,PoW,2614449.013579
NSR,PoS,PoS,6163156588.0671
MONA,Scrypt,PoW,82667774.971579
TRI,X13,PoW/PoS,182156.759757


In [88]:
# Create dummy variables for text features
crypto_df_enc = pd.get_dummies(data=crypto_df)
crypto_df_enc.head()


Unnamed: 0,Algorithm_Autolykos,Algorithm_BEP-2,Algorithm_BLAKE256,Algorithm_BMW512 / Echo512,Algorithm_Blake2B + SHA3,Algorithm_Blake2b,Algorithm_C31,Algorithm_CryptoNight,Algorithm_CryptoNight-Heavy,Algorithm_CryptoNight-Lite,...,TotalCoinsMined_30366458741.187523,TotalCoinsMined_32276388101.97679,TotalCoinsMined_33208310289.47945,TotalCoinsMined_48565072992.0,TotalCoinsMined_62319462900.0,TotalCoinsMined_85985041177.0,TotalCoinsMined_130659266383.70522,TotalCoinsMined_184467440737.09552,TotalCoinsMined_989988481777.0397,TotalCoinsMined_1000016730264.435
NVC,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
XCP,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
NSR,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
MONA,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
TRI,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [93]:
# Standardize data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(crypto_df_enc)

print("Average",scaler.mean_[:5])
print("Scale", scaler.scale_[:5])


Average [0.00806452 0.00806452 0.01612903 0.00806452 0.00806452]
Scale [0.08943981 0.08943981 0.12597177 0.08943981 0.08943981]


### Reducing Dimensions Using PCA

In [None]:
# Use PCA to reduce dimensions to 3 principal components



In [None]:
# Create a DataFrame with the principal components data



### Clustering Crytocurrencies Using K-Means

#### Finding the Best Value for `k` Using the Elbow Curve

In [None]:
inertia = []
k = list(range(1, 11))

# Calculate the inertia for the range of k values
for i in k:


# Create the Elbow Curve using hvPlot



Running K-Means with `k=<your best value for k here>`

In [None]:
# Initialize the K-Means model

# Fit the model

# Predict clusters

# Create a new DataFrame including predicted clusters and cryptocurrencies features



### Visualizing Results

#### 3D-Scatter with Clusters

In [None]:
# Create a 3D-Scatter with the PCA data and the clusters



#### Table of Tradable Cryptocurrencies

In [None]:
# Table with tradable cryptos



In [None]:
# Print the total number of tradable cryptocurrencies

#### Scatter Plot with Tradable Cryptocurrencies

In [None]:
# Scale data to create the scatter plot



In [None]:
# Plot the scatter with x="TotalCoinsMined" and y="TotalCoinSupply"

