Cryptocurrency Clusters


In [1]:
# Initial imports
import pandas as pd
from pathlib import Path
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt


# Read in Data 
# Loading data
file_path = Path("Resources/crypto_data.csv")
df = pd.read_csv(file_path)
df

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,4.199995e+01,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1.055185e+09,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.000000e+00,0
...,...,...,...,...,...,...,...
1247,XBC,BitcoinPlus,Scrypt,True,PoS,1.283270e+05,1000000
1248,DVTC,DivotyCoin,Scrypt,False,PoW/PoS,2.149121e+07,100000000
1249,GIOT,Giotto Coin,Scrypt,False,PoW/PoS,,233100000
1250,OPSC,OpenSourceCoin,SHA-256,False,PoW/PoS,,21000000


In [2]:
#drop not trading values 
#credit https://stackoverflow.com/questions/37213556/remove-rows-that-contain-false-in-a-column-of-pandas-dataframe
df2 = df.loc[df.IsTrading, :]
df2


Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,4.199995e+01,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1.055185e+09,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.000000e+00,0
...,...,...,...,...,...,...,...
1243,SERO,Super Zero,Ethash,True,PoW,,1000000000
1244,UOS,UOS,SHA-256,True,DPoI,,1000000000
1245,BDX,Beldex,CryptoNight,True,PoW,9.802226e+08,1400222610
1246,ZEN,Horizen,Equihash,True,PoW,7.296538e+06,21000000


In [3]:
#drop'IsTrading'column
df2 = df.drop(columns=["IsTrading"])
df2

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,PoW/PoS,4.199995e+01,42
1,365,365Coin,X11,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,PoW/PoS,1.055185e+09,532000000
3,611,SixEleven,SHA-256,PoW,,611000
4,808,808,SHA-256,PoW/PoS,0.000000e+00,0
...,...,...,...,...,...,...
1247,XBC,BitcoinPlus,Scrypt,PoS,1.283270e+05,1000000
1248,DVTC,DivotyCoin,Scrypt,PoW/PoS,2.149121e+07,100000000
1249,GIOT,Giotto Coin,Scrypt,PoW/PoS,,233100000
1250,OPSC,OpenSourceCoin,SHA-256,PoW/PoS,,21000000


In [4]:
# Find and remove null values 
#credit- https://www.journaldev.com/33492/pandas-dropna-drop-null-na-values-from-dataframe

df3 = pd.DataFrame(df2)
# drop all rows with any NaN and NaT values
df2 = df3.dropna()
df2



Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,PoW/PoS,4.199995e+01,42
2,404,404Coin,Scrypt,PoW/PoS,1.055185e+09,532000000
4,808,808,SHA-256,PoW/PoS,0.000000e+00,0
5,1337,EliteCoin,X13,PoW/PoS,2.927942e+10,314159265359
7,BTC,Bitcoin,SHA-256,PoW,1.792718e+07,21000000
...,...,...,...,...,...,...
1242,GAP,Gapcoin,Scrypt,PoW/PoS,1.493105e+07,250000000
1245,BDX,Beldex,CryptoNight,PoW,9.802226e+08,1400222610
1246,ZEN,Horizen,Equihash,PoW,7.296538e+06,21000000
1247,XBC,BitcoinPlus,Scrypt,PoS,1.283270e+05,1000000


Data Preperation

In [5]:
#remove total coins mined less than zero
#Credit-https://stackoverflow.com/questions/22649693/drop-rows-with-all-zeros-in-pandas-data-frame

df2 = df2[df2['TotalCoinsMined'] != 0]
df2

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,PoW/PoS,4.199995e+01,42
2,404,404Coin,Scrypt,PoW/PoS,1.055185e+09,532000000
5,1337,EliteCoin,X13,PoW/PoS,2.927942e+10,314159265359
7,BTC,Bitcoin,SHA-256,PoW,1.792718e+07,21000000
8,ETH,Ethereum,Ethash,PoW,1.076842e+08,0
...,...,...,...,...,...,...
1242,GAP,Gapcoin,Scrypt,PoW/PoS,1.493105e+07,250000000
1245,BDX,Beldex,CryptoNight,PoW,9.802226e+08,1400222610
1246,ZEN,Horizen,Equihash,PoW,7.296538e+06,21000000
1247,XBC,BitcoinPlus,Scrypt,PoS,1.283270e+05,1000000


In [6]:
#drop coin name
df3 = df2.drop("CoinName", axis=1)
df3

Unnamed: 0.1,Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,Scrypt,PoW/PoS,4.199995e+01,42
2,404,Scrypt,PoW/PoS,1.055185e+09,532000000
5,1337,X13,PoW/PoS,2.927942e+10,314159265359
7,BTC,SHA-256,PoW,1.792718e+07,21000000
8,ETH,Ethash,PoW,1.076842e+08,0
...,...,...,...,...,...
1242,GAP,Scrypt,PoW/PoS,1.493105e+07,250000000
1245,BDX,CryptoNight,PoW,9.802226e+08,1400222610
1246,ZEN,Equihash,PoW,7.296538e+06,21000000
1247,XBC,Scrypt,PoS,1.283270e+05,1000000


In [7]:
#use Pandas to create dummy variable

df2 = pd.get_dummies(df, columns=['Algorithm', 'ProofType'])
df2


Unnamed: 0.1,Unnamed: 0,CoinName,IsTrading,TotalCoinsMined,TotalCoinSupply,Algorithm_1GB AES Pattern Search,Algorithm_536,Algorithm_Argon2,Algorithm_Argon2d,Algorithm_Avesta hash,...,ProofType_PoW/PoS/PoC,ProofType_PoW/PoW,ProofType_PoW/nPoS,ProofType_Pos,ProofType_Proof of Authority,ProofType_Proof of Stake,ProofType_Proof of Trust,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW
0,42,42 Coin,True,4.199995e+01,42,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,365,365Coin,True,,2300000000,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,404,404Coin,True,1.055185e+09,532000000,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,611,SixEleven,True,,611000,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,808,808,True,0.000000e+00,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1247,XBC,BitcoinPlus,True,1.283270e+05,1000000,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1248,DVTC,DivotyCoin,False,2.149121e+07,100000000,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1249,GIOT,Giotto Coin,False,,233100000,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1250,OPSC,OpenSourceCoin,False,,21000000,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [8]:
df3 = df3.set_index('Unnamed: 0')
df3

Unnamed: 0_level_0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
42,Scrypt,PoW/PoS,4.199995e+01,42
404,Scrypt,PoW/PoS,1.055185e+09,532000000
1337,X13,PoW/PoS,2.927942e+10,314159265359
BTC,SHA-256,PoW,1.792718e+07,21000000
ETH,Ethash,PoW,1.076842e+08,0
...,...,...,...,...
GAP,Scrypt,PoW/PoS,1.493105e+07,250000000
BDX,CryptoNight,PoW,9.802226e+08,1400222610
ZEN,Equihash,PoW,7.296538e+06,21000000
XBC,Scrypt,PoS,1.283270e+05,1000000


In [9]:
#drop coin name
df3 = df2.drop("IsTrading", axis=1)
df3

Unnamed: 0.1,Unnamed: 0,CoinName,TotalCoinsMined,TotalCoinSupply,Algorithm_1GB AES Pattern Search,Algorithm_536,Algorithm_Argon2,Algorithm_Argon2d,Algorithm_Avesta hash,Algorithm_BLAKE256,...,ProofType_PoW/PoS/PoC,ProofType_PoW/PoW,ProofType_PoW/nPoS,ProofType_Pos,ProofType_Proof of Authority,ProofType_Proof of Stake,ProofType_Proof of Trust,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW
0,42,42 Coin,4.199995e+01,42,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,365,365Coin,,2300000000,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,404,404Coin,1.055185e+09,532000000,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,611,SixEleven,,611000,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,808,808,0.000000e+00,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1247,XBC,BitcoinPlus,1.283270e+05,1000000,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1248,DVTC,DivotyCoin,2.149121e+07,100000000,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1249,GIOT,Giotto Coin,,233100000,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1250,OPSC,OpenSourceCoin,,21000000,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [12]:

# Scale the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df[['TotalCoinsMined','TotalCoinSupply']])

scaled_data

ValueError: could not convert string to float: '20 000 000'

In [None]:
#Initialize PCA model
pca = PCA(n_components = 0.99)


Dimensionality Reduction 

In [None]:
PCA(n_components=0.99)

Cluster Analysis with K-Means

Recommendation