# Clustering Crypto

In [128]:
import requests
import json
from path import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import hvplot.pandas
import plotly.express as px
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans


### Fetching Cryptocurrency Data

In [129]:
url = "https://min-api.cryptocompare.com/data/all/coinlist"

# url = url + "?format=json"



# response_data = requests.get(url)
# #view if 200
# data = response_data.json()

#print respone 
# print(json.dumps(data,indent=4))

In [130]:
# Creating a DataFrame

file_path = Path("../Starter_Files/crypto_compare.csv")

df = pd.read_csv(file_path)

df.head(5)

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.0,0


In [131]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1252 entries, 0 to 1251
Data columns (total 7 columns):
Unnamed: 0         1252 non-null object
CoinName           1252 non-null object
Algorithm          1252 non-null object
IsTrading          1252 non-null bool
ProofType          1252 non-null object
TotalCoinsMined    744 non-null float64
TotalCoinSupply    1252 non-null object
dtypes: bool(1), float64(1), object(5)
memory usage: 60.0+ KB


### Data Preprocessing

In [132]:
# Traspose the data



df['IsTrading'] = df['IsTrading'].astype(str)

bool_list = ['True']

df = df[df.IsTrading.isin(bool_list)]

df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1144 entries, 0 to 1247
Data columns (total 7 columns):
Unnamed: 0         1144 non-null object
CoinName           1144 non-null object
Algorithm          1144 non-null object
IsTrading          1144 non-null object
ProofType          1144 non-null object
TotalCoinsMined    685 non-null float64
TotalCoinSupply    1144 non-null object
dtypes: float64(1), object(6)
memory usage: 71.5+ KB


In [133]:
df.head(5)

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.0,0


In [134]:
# Remove unnecessary columns

df.drop(columns=['IsTrading'],inplace=True)

# crypto_df.iloc[:,:].isnull().sum()

# df.replace('0',' ')

# df.TotalCoinsMined.dropna()



In [135]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1144 entries, 0 to 1247
Data columns (total 6 columns):
Unnamed: 0         1144 non-null object
CoinName           1144 non-null object
Algorithm          1144 non-null object
ProofType          1144 non-null object
TotalCoinsMined    685 non-null float64
TotalCoinSupply    1144 non-null object
dtypes: float64(1), object(5)
memory usage: 62.6+ KB


In [136]:
df['TotalCoinSupply'] =  pd.to_numeric(df['TotalCoinSupply'],errors='coerce')

In [137]:
df['TotalCoinsMined'] =  pd.to_numeric(df['TotalCoinsMined'],errors='coerce')

In [138]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1144 entries, 0 to 1247
Data columns (total 6 columns):
Unnamed: 0         1144 non-null object
CoinName           1144 non-null object
Algorithm          1144 non-null object
ProofType          1144 non-null object
TotalCoinsMined    685 non-null float64
TotalCoinSupply    1141 non-null float64
dtypes: float64(2), object(4)
memory usage: 62.6+ KB


In [139]:
df.dropna(axis=0,how='any',inplace=True)

In [140]:
df.replace(0,'one',inplace=True)

In [141]:
df.head()

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,PoW/PoS,42,42
2,404,404Coin,Scrypt,PoW/PoS,1.05518e+09,5.32e+08
4,808,808,SHA-256,PoW/PoS,one,one
5,1337,EliteCoin,X13,PoW/PoS,2.92794e+10,3.14159e+11
7,BTC,Bitcoin,SHA-256,PoW,1.79272e+07,2.1e+07


In [142]:
df2 = df[df != 'one']

df2.head()

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,PoW/PoS,42.0,42.0
2,404,404Coin,Scrypt,PoW/PoS,1055180000.0,532000000.0
4,808,808,SHA-256,PoW/PoS,,
5,1337,EliteCoin,X13,PoW/PoS,29279400000.0,314159000000.0
7,BTC,Bitcoin,SHA-256,PoW,17927200.0,21000000.0


In [143]:
df2 = df2.dropna(axis=0, how='any')

df2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 498 entries, 0 to 1247
Data columns (total 6 columns):
Unnamed: 0         498 non-null object
CoinName           498 non-null object
Algorithm          498 non-null object
ProofType          498 non-null object
TotalCoinsMined    498 non-null object
TotalCoinSupply    498 non-null object
dtypes: object(6)
memory usage: 27.2+ KB


In [144]:
df2.head()

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,PoW/PoS,42.0,42.0
2,404,404Coin,Scrypt,PoW/PoS,1055180000.0,532000000.0
5,1337,EliteCoin,X13,PoW/PoS,29279400000.0,314159000000.0
7,BTC,Bitcoin,SHA-256,PoW,17927200.0,21000000.0
9,LTC,Litecoin,Scrypt,PoW,63039200.0,84000000.0


In [145]:
crypto_df = df2

copy_df = crypto_df.copy()

In [146]:
# Dropping rows where there are 'N/A' text values
crypto_df.head(20)


Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,PoW/PoS,42.0,42.0
2,404,404Coin,Scrypt,PoW/PoS,1055180000.0,532000000.0
5,1337,EliteCoin,X13,PoW/PoS,29279400000.0,314159000000.0
7,BTC,Bitcoin,SHA-256,PoW,17927200.0,21000000.0
9,LTC,Litecoin,Scrypt,PoW,63039200.0,84000000.0
10,DASH,Dash,X11,PoW/PoS,9031290.0,22000000.0
12,ETC,Ethereum Classic,Ethash,PoW,113360000.0,210000000.0
13,ZEC,ZCash,Equihash,PoW,7383060.0,21000000.0
14,BTS,Bitshares,SHA-512,PoS,2741570000.0,3600570000.0
15,DGB,DigiByte,Multiple,PoW,11406200000.0,21000000000.0


In [147]:
copy_df.index = copy_df['Unnamed: 0'].values

copy_df.drop(columns=['Unnamed: 0', 'CoinName'],inplace=True)

copy_df.head(5)

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
42,Scrypt,PoW/PoS,42.0,42.0
404,Scrypt,PoW/PoS,1055180000.0,532000000.0
1337,X13,PoW/PoS,29279400000.0,314159000000.0
BTC,SHA-256,PoW,17927200.0,21000000.0
LTC,Scrypt,PoW,63039200.0,84000000.0


In [148]:
# Fetching the cryptocurrencies names prior to drop them from crypto_df
coins_name = pd.DataFrame(index = copy_df.index)



coins_name.head()

42
404
1337
BTC
LTC


In [149]:
# Removig the cryptocurrency name since it's not going to be used on the clustering algorithm




In [150]:
# Removig the cryptocurrency name since it's not going to be used on the clustering algorithm

# copy_df.reset_index(inplace=True)

# copy_df.drop(columns=['index'],inplace=True)

# copy_df.head()

In [151]:
# Creating dummies variables for text features

X = copy_df

In [152]:
# Standardizing data



### Reducing Dimensions Using PCA

In [None]:
# Using PCA to reduce dimension to 3 principal components



In [None]:
# Creating a DataFrame with the principal components data



### Clustering Crytocurrencies Using K-Means

#### Finding the Best Value for `k` Using the Elbow Curve

In [None]:
inertia = []
k = list(range(1, 11))

# Calculate the inertia for the range ok k values
for i in k:


# Create the Elbow Curve using hvPlot



Running K-Means with `k=<you best value for k here>`

In [None]:
# Initialize the K-Means model

# Fit the model

# Predict clusters

# Creating a new DataFrame including predicted clusters and cryptocurrencies features



### Visualizing Results

#### 3D-Scatter with Clusters

In [None]:
# Creating a 3D-Scatter with the PCA data and the clusters



#### Table of Tradable Cryptocurrencies

In [None]:
# Table with tradable cryptos



#### Scatter Plot with Tradable Cryptocurrencies

In [None]:
# Scaling data to create the scatter plot



In [None]:
# Plot the scatter with x="TotalCoinsMined" and y="TotalCoinSupply"

