In [44]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import hvplot.pandas
import plotly.express as px

In [4]:
# Load data
file_path = "Resources/crypto_data.csv"
crypto_df = pd.read_csv(file_path, index_col="Unnamed: 0")
crypto_df

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,True,PoW/PoS,4.199995e+01,42
365,365Coin,X11,True,PoW/PoS,,2300000000
404,404Coin,Scrypt,True,PoW/PoS,1.055185e+09,532000000
611,SixEleven,SHA-256,True,PoW,,611000
808,808,SHA-256,True,PoW/PoS,0.000000e+00,0
...,...,...,...,...,...,...
XBC,BitcoinPlus,Scrypt,True,PoS,1.283270e+05,1000000
DVTC,DivotyCoin,Scrypt,False,PoW/PoS,2.149121e+07,100000000
GIOT,Giotto Coin,Scrypt,False,PoW/PoS,,233100000
OPSC,OpenSourceCoin,SHA-256,False,PoW/PoS,,21000000


In [5]:
# Remove non trading cryptocurrencies
crypto_df1 =crypto_df[crypto_df["IsTrading"]==True]
crypto_df1


Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,True,PoW/PoS,4.199995e+01,42
365,365Coin,X11,True,PoW/PoS,,2300000000
404,404Coin,Scrypt,True,PoW/PoS,1.055185e+09,532000000
611,SixEleven,SHA-256,True,PoW,,611000
808,808,SHA-256,True,PoW/PoS,0.000000e+00,0
...,...,...,...,...,...,...
SERO,Super Zero,Ethash,True,PoW,,1000000000
UOS,UOS,SHA-256,True,DPoI,,1000000000
BDX,Beldex,CryptoNight,True,PoW,9.802226e+08,1400222610
ZEN,Horizen,Equihash,True,PoW,7.296538e+06,21000000


In [6]:
# drop is trading column
crypto_df2 =crypto_df1.drop(["IsTrading"], axis=1)
crypto_df2

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,PoW/PoS,4.199995e+01,42
365,365Coin,X11,PoW/PoS,,2300000000
404,404Coin,Scrypt,PoW/PoS,1.055185e+09,532000000
611,SixEleven,SHA-256,PoW,,611000
808,808,SHA-256,PoW/PoS,0.000000e+00,0
...,...,...,...,...,...
SERO,Super Zero,Ethash,PoW,,1000000000
UOS,UOS,SHA-256,DPoI,,1000000000
BDX,Beldex,CryptoNight,PoW,9.802226e+08,1400222610
ZEN,Horizen,Equihash,PoW,7.296538e+06,21000000


In [7]:
# Check columns with isnull values
crypto_df2.isnull().sum()

CoinName             0
Algorithm            0
ProofType            0
TotalCoinsMined    459
TotalCoinSupply      0
dtype: int64

In [8]:
# Drop Isnull rows
crypto_df3= crypto_df2.dropna()
crypto_df3

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,PoW/PoS,4.199995e+01,42
404,404Coin,Scrypt,PoW/PoS,1.055185e+09,532000000
808,808,SHA-256,PoW/PoS,0.000000e+00,0
1337,EliteCoin,X13,PoW/PoS,2.927942e+10,314159265359
BTC,Bitcoin,SHA-256,PoW,1.792718e+07,21000000
...,...,...,...,...,...
ZEPH,ZEPHYR,SHA-256,DPoS,2.000000e+09,2000000000
GAP,Gapcoin,Scrypt,PoW/PoS,1.493105e+07,250000000
BDX,Beldex,CryptoNight,PoW,9.802226e+08,1400222610
ZEN,Horizen,Equihash,PoW,7.296538e+06,21000000


In [9]:
# Check the number of TotalCoins Mined less than or equal to zero
crypto_df4 =crypto_df3[crypto_df3["TotalCoinsMined"]<=0]
crypto_df4

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
808,808,SHA-256,PoW/PoS,0.000000e+00,0
XBS,Bitstake,X11,PoW/PoS,0.000000e+00,1300000
ACOIN,ACoin,SHA-256,PoW,0.000000e+00,1600000
AERO,Aero Coin,X13,PoS,0.000000e+00,7000000
APEX,ApexCoin,X13,PoW/PoS,0.000000e+00,6000000
...,...,...,...,...,...
PKB,ParkByte,SHA-256,PoW/PoS,0.000000e+00,25000000
DOT,Dotcoin,Scrypt,PoW,0.000000e+00,890000000
THC,The Hempcoin,Scrypt,PoW/PoS,0.000000e+00,300000000
FIII,Fiii,SHA3-256,DPoC,-5.917978e+09,5000000000


In [10]:
# Remove Cryptocurrencies without coins mined
crypto_df5 =crypto_df3[crypto_df3["TotalCoinsMined"] > 0]
crypto_df5

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,PoW/PoS,4.199995e+01,42
404,404Coin,Scrypt,PoW/PoS,1.055185e+09,532000000
1337,EliteCoin,X13,PoW/PoS,2.927942e+10,314159265359
BTC,Bitcoin,SHA-256,PoW,1.792718e+07,21000000
ETH,Ethereum,Ethash,PoW,1.076842e+08,0
...,...,...,...,...,...
ZEPH,ZEPHYR,SHA-256,DPoS,2.000000e+09,2000000000
GAP,Gapcoin,Scrypt,PoW/PoS,1.493105e+07,250000000
BDX,Beldex,CryptoNight,PoW,9.802226e+08,1400222610
ZEN,Horizen,Equihash,PoW,7.296538e+06,21000000


In [11]:
# Store the names of all cryptocurrencies on a DataFramed named coins_name, 
#and use the crypto_df.index as the index for this new DataFrame.
coins_name_df= pd.DataFrame(crypto_df5["CoinName"])
coins_name_df

Unnamed: 0,CoinName
42,42 Coin
404,404Coin
1337,EliteCoin
BTC,Bitcoin
ETH,Ethereum
...,...
ZEPH,ZEPHYR
GAP,Gapcoin
BDX,Beldex
ZEN,Horizen


In [12]:
#Remove the CoinName column.

crypto_df6= crypto_df5.drop(["CoinName"], axis=1)
crypto_df6

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
42,Scrypt,PoW/PoS,4.199995e+01,42
404,Scrypt,PoW/PoS,1.055185e+09,532000000
1337,X13,PoW/PoS,2.927942e+10,314159265359
BTC,SHA-256,PoW,1.792718e+07,21000000
ETH,Ethash,PoW,1.076842e+08,0
...,...,...,...,...
ZEPH,SHA-256,DPoS,2.000000e+09,2000000000
GAP,Scrypt,PoW/PoS,1.493105e+07,250000000
BDX,CryptoNight,PoW,9.802226e+08,1400222610
ZEN,Equihash,PoW,7.296538e+06,21000000


In [15]:
#Create dummies variables for all of the text features, and store the resulting data on a DataFrame named X.

X= pd.get_dummies(crypto_df6, columns=["Algorithm", "ProofType"])

X

Unnamed: 0,TotalCoinsMined,TotalCoinSupply,Algorithm_1GB AES Pattern Search,Algorithm_536,Algorithm_Argon2d,Algorithm_BLAKE256,Algorithm_Blake,Algorithm_Blake2S,Algorithm_Blake2b,Algorithm_C11,...,ProofType_PoW/PoS,ProofType_PoW/PoS.1,ProofType_PoW/PoW,ProofType_PoW/nPoS,ProofType_Pos,ProofType_Proof of Authority,ProofType_Proof of Trust,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW
42,4.199995e+01,42,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
404,1.055185e+09,532000000,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1337,2.927942e+10,314159265359,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
BTC,1.792718e+07,21000000,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ETH,1.076842e+08,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZEPH,2.000000e+09,2000000000,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
GAP,1.493105e+07,250000000,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
BDX,9.802226e+08,1400222610,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ZEN,7.296538e+06,21000000,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [16]:
# Creating the scaler instance
from sklearn.preprocessing import StandardScaler
data_scaler = StandardScaler()

In [17]:
# Fitting the scaler
crypto_df7 = data_scaler.fit_transform(X)
crypto_df7[:5]

array([[-0.11710817, -0.1528703 , -0.0433963 , -0.0433963 , -0.0433963 ,
        -0.06142951, -0.07530656, -0.0433963 , -0.06142951, -0.06142951,
        -0.0433963 , -0.0433963 , -0.19245009, -0.06142951, -0.09740465,
        -0.0433963 , -0.11547005, -0.07530656, -0.0433963 , -0.0433963 ,
        -0.15191091, -0.0433963 , -0.13118084, -0.0433963 , -0.0433963 ,
        -0.08703883, -0.0433963 , -0.0433963 , -0.0433963 , -0.0433963 ,
        -0.06142951, -0.0433963 , -0.08703883, -0.08703883, -0.08703883,
        -0.0433963 , -0.13118084, -0.13840913, -0.13840913, -0.0433963 ,
        -0.06142951, -0.0433963 , -0.07530656, -0.18168574, -0.0433963 ,
        -0.0433963 , -0.0433963 , -0.07530656, -0.15826614, -0.31491833,
        -0.0433963 , -0.08703883, -0.07530656, -0.06142951,  1.38675049,
        -0.0433963 , -0.0433963 , -0.06142951, -0.0433963 , -0.0433963 ,
        -0.0433963 , -0.0433963 , -0.0433963 , -0.0433963 , -0.0433963 ,
        -0.0433963 , -0.39879994, -0.0433963 , -0.1

In [18]:
# Initialize PCA model
pca = PCA(n_components=3)

In [22]:
# Get two principal components for the iris data.
crypto_pca = pca.fit_transform(crypto_df7)
crypto_pca

array([[-0.33368216,  1.03805813, -0.57649386],
       [-0.3170095 ,  1.03834419, -0.57693694],
       [ 2.31186699,  1.72782281, -0.67884974],
       ...,
       [ 0.32816668, -2.24427276,  0.44918897],
       [-0.12770144, -1.99635124,  0.38888979],
       [-0.29655047,  0.77484106, -0.26801918]])

In [25]:
crypto_pca_df= pd.DataFrame(crypto_pca, columns=["PC1","PC2", "PC3"], index=crypto_df6.index)
crypto_pca_df

Unnamed: 0,PC1,PC2,PC3
42,-0.333682,1.038058,-0.576494
404,-0.317010,1.038344,-0.576937
1337,2.311867,1.727823,-0.678850
BTC,-0.139892,-1.241116,0.137361
ETH,-0.150550,-1.995612,0.335014
...,...,...,...
ZEPH,2.468616,0.863042,-0.001958
GAP,-0.331726,1.037959,-0.576519
BDX,0.328167,-2.244273,0.449189
ZEN,-0.127701,-1.996351,0.388890


In [26]:
# Find the best value for K
inertia = []
k = list(range(1, 11))

# Calculate the inertia for the range of K values
for i in k:
	km = KMeans(n_clusters=i, random_state=0)
	km.fit(crypto_pca_df)
	inertia.append(km.inertia_)

# Create the elbow curve
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.hvplot.line(x="k", y="inertia", xticks=k, title="Elbow Curve")


In [27]:
# Initialize the K-means model
model = KMeans(n_clusters=5, random_state=0)

# Fit the model
model.fit(crypto_pca_df)

# Predict clusters
predictions = model.predict(crypto_pca_df)

# Add the predicted class columns
crypto_pca_df["class"] = model.labels_
crypto_pca_df.head()

Unnamed: 0,PC1,PC2,PC3,class
42,-0.333682,1.038058,-0.576494,1
404,-0.31701,1.038344,-0.576937,1
1337,2.311867,1.727823,-0.67885,4
BTC,-0.139892,-1.241116,0.137361,0
ETH,-0.15055,-1.995612,0.335014,0


In [40]:
clustered_df = pd.concat([crypto_df6,crypto_pca_df, coins_name_df], axis=1)
clustered_df

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply,PC1,PC2,PC3,class,CoinName
42,Scrypt,PoW/PoS,4.199995e+01,42,-0.333682,1.038058,-0.576494,1,42 Coin
404,Scrypt,PoW/PoS,1.055185e+09,532000000,-0.317010,1.038344,-0.576937,1,404Coin
1337,X13,PoW/PoS,2.927942e+10,314159265359,2.311867,1.727823,-0.678850,4,EliteCoin
BTC,SHA-256,PoW,1.792718e+07,21000000,-0.139892,-1.241116,0.137361,0,Bitcoin
ETH,Ethash,PoW,1.076842e+08,0,-0.150550,-1.995612,0.335014,0,Ethereum
...,...,...,...,...,...,...,...,...,...
ZEPH,SHA-256,DPoS,2.000000e+09,2000000000,2.468616,0.863042,-0.001958,4,ZEPHYR
GAP,Scrypt,PoW/PoS,1.493105e+07,250000000,-0.331726,1.037959,-0.576519,1,Gapcoin
BDX,CryptoNight,PoW,9.802226e+08,1400222610,0.328167,-2.244273,0.449189,0,Beldex
ZEN,Equihash,PoW,7.296538e+06,21000000,-0.127701,-1.996351,0.388890,0,Horizen


In [None]:
#Create a 3D scatter plot using Plotly Express to plot the clusters using the clustered_df DataFrame.

In [42]:
px.scatter_3d(
    clustered_df,
	x="PC1",
	y="PC2",
    z="PC3",
    hover_name="CoinName",
    hover_data=["Algorithm"],
	color="class",
)




In [46]:
#Use hvplot.table to create a data table with all the current tradable cryptocurrencies.
#The table should have the following columns: CoinName, Algorithm, ProofType, TotalCoinSupply, TotalCoinsMined, and Class.
clustered_df.hvplot.table(columns=['CoinName', 'Algorithm', 'ProofType', 'TotalCoinSupply','TotalCoinsMined','class'], sortable=True, selectable=True)


In [49]:
#Create a scatter plot using hvplot.scatter to present the clustered data about cryptocurrencies having x="TotalCoinsMined" and y="TotalCoinSupply" to contrast the number of available coins versus the total number of mined coins.
#Use the hover_cols=["CoinName"] parameter to include the cryptocurrency name on each data point.
clustered_df.hvplot.scatter(
	x="TotalCoinsMined",
	y="TotalCoinSupply",
	hover_cols=["CoinName"],
	by="class",
)
