In [46]:
import pandas as pd
import plotly.express as px
import hvplot.pandas
from sklearn.cluster import KMeans

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import AgglomerativeClustering

import holoviews as hv
from holoviews import opts

import plotly.figure_factory as ff

In [47]:
#read crypto csv
#iris_df =  pd.read_csv('./Resources/iris.csv')
#iris_df.head()

#read csv specify index col parameter index_col_#
crypto_df =  pd.read_csv('./Resources/crypto_data.csv',index_col=0)
crypto_df.head()



Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
365,365Coin,X11,True,PoW/PoS,,2300000000
404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
611,SixEleven,SHA-256,True,PoW,,611000
808,808,SHA-256,True,PoW/PoS,0.0,0


In [48]:
crypto_df.shape

(1252, 6)

In [49]:
#Remove all cryptocurrencies that aren’t trading
crypto_df_istrade=crypto_df[crypto_df['IsTrading'] == True]

In [50]:
crypto_df_istrade.shape

(1144, 6)

In [51]:
#Remove all cryptocurrencies that don’t have an algorithm defined
crypto_df_algo=crypto_df_istrade[crypto_df_istrade['Algorithm'] != 'Multiple']

In [52]:
crypto_df_algo.shape

(1126, 6)

In [53]:
#Remove the IsTrading column
crypto_df_extrade=crypto_df_algo.drop(['IsTrading'],axis=1)
crypto_df_extrade.head()

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,PoW/PoS,41.99995,42
365,365Coin,X11,PoW/PoS,,2300000000
404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
611,SixEleven,SHA-256,PoW,,611000
808,808,SHA-256,PoW/PoS,0.0,0


In [54]:
crypto_df_extrade.dtypes

CoinName            object
Algorithm           object
ProofType           object
TotalCoinsMined    float64
TotalCoinSupply     object
dtype: object

In [55]:
crypto_df_extrade.shape

(1126, 5)

In [56]:
#Remove all cryptocurrencies with at least one null value
for column in crypto_df_extrade.columns:
    print(f"Column{column} has {crypto_df_extrade[column].isnull().sum()} null values" )

ColumnCoinName has 0 null values
ColumnAlgorithm has 0 null values
ColumnProofType has 0 null values
ColumnTotalCoinsMined has 452 null values
ColumnTotalCoinSupply has 0 null values


In [57]:
#crypto_df_extrade.dropna(how='any',inplace=True)

crypto_df_extrade_null = crypto_df_extrade.dropna()

crypto_df_extrade_null.head()

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,PoW/PoS,41.99995,42
404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
808,808,SHA-256,PoW/PoS,0.0,0
1337,EliteCoin,X13,PoW/PoS,29279420000.0,314159265359
BTC,Bitcoin,SHA-256,PoW,17927180.0,21000000


In [58]:
crypto_df_extrade_null.shape

(674, 5)

In [59]:
#Remove all cryptocurrencies without coins mined
crypto_df_mined=crypto_df_extrade_null[crypto_df_extrade_null['TotalCoinsMined'] != 0]
crypto_df_mined.head()

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,PoW/PoS,41.99995,42
404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
1337,EliteCoin,X13,PoW/PoS,29279420000.0,314159265359
BTC,Bitcoin,SHA-256,PoW,17927180.0,21000000
ETH,Ethereum,Ethash,PoW,107684200.0,0


In [60]:
crypto_df_mined.shape

(524, 5)

In [61]:
#Store the names of all cryptocurrencies on a DataFramed named coins_name
#use the crypto_df.index as the index for this new DataFrame.

coins_name_df = crypto_df_mined.filter(['CoinName'], axis=1)
coins_name_df.shape

(524, 1)

In [62]:
coins_name_df.head()

Unnamed: 0,CoinName
42,42 Coin
404,404Coin
1337,EliteCoin
BTC,Bitcoin
ETH,Ethereum


In [63]:
#Remove the CoinName column
crypto_df_mined_col=crypto_df_mined.drop(['CoinName'],axis=1)
crypto_df_mined_col.head()

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
42,Scrypt,PoW/PoS,41.99995,42
404,Scrypt,PoW/PoS,1055185000.0,532000000
1337,X13,PoW/PoS,29279420000.0,314159265359
BTC,SHA-256,PoW,17927180.0,21000000
ETH,Ethash,PoW,107684200.0,0


In [64]:
#Create dummies variables
X_df = pd.get_dummies(crypto_df_mined_col,columns=['Algorithm','ProofType'])
X_df.head()

Unnamed: 0,TotalCoinsMined,TotalCoinSupply,Algorithm_1GB AES Pattern Search,Algorithm_536,Algorithm_Argon2d,Algorithm_BLAKE256,Algorithm_Blake,Algorithm_Blake2S,Algorithm_Blake2b,Algorithm_C11,...,ProofType_PoW/PoS,ProofType_PoW/PoS.1,ProofType_PoW/PoW,ProofType_PoW/nPoS,ProofType_Pos,ProofType_Proof of Authority,ProofType_Proof of Trust,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW
42,41.99995,42,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
404,1055185000.0,532000000,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1337,29279420000.0,314159265359,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
BTC,17927180.0,21000000,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ETH,107684200.0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [65]:
#Remove the Unnamed column
X_df_new = X_df.drop(X_df.columns[0], axis=1)

In [66]:
#standardize all of the data from the X DataFrame
X_df_scaled = StandardScaler().fit_transform(X_df_new)
print(X_df_scaled[0:5])


[[-0.15303594 -0.04372695 -0.04372695 -0.04372695 -0.06189845 -0.07588252
  -0.04372695 -0.06189845 -0.06189845 -0.04372695 -0.04372695 -0.19396846
  -0.06189845 -0.09815249 -0.04372695 -0.11636001 -0.07588252 -0.04372695
  -0.04372695 -0.15309311 -0.04372695 -0.13219579 -0.04372695 -0.04372695
  -0.0877058  -0.04372695 -0.04372695 -0.04372695 -0.04372695 -0.06189845
  -0.04372695 -0.0877058  -0.0877058  -0.0877058  -0.04372695 -0.13948209
  -0.13948209 -0.04372695 -0.06189845 -0.04372695 -0.07588252 -0.18311355
  -0.04372695 -0.04372695 -0.04372695 -0.07588252 -0.1595002  -0.31755367
  -0.04372695 -0.0877058  -0.07588252 -0.06189845 -0.04372695  1.3708103
  -0.04372695 -0.04372695 -0.06189845 -0.04372695 -0.04372695 -0.04372695
  -0.04372695 -0.04372695 -0.04372695 -0.04372695 -0.04372695 -0.40232142
  -0.04372695 -0.18311355 -0.04372695 -0.0877058  -0.0877058  -0.1076244
  -0.04372695 -0.04372695 -0.13219579 -0.04372695 -0.04372695 -0.04372695
  -0.04372695 -0.07588252 -0.43693145 -0

In [67]:
# Initialize PCA model
pca = PCA(n_components=3)

# Get two principal components for the iris data.
X_pca = pca.fit_transform(X_df_scaled)

In [68]:
df_X_pca = pd.DataFrame(data=X_pca,columns=["pc_1","pc_2","pc_3"])
df_X_pca.head()

Unnamed: 0,pc_1,pc_2,pc_3
0,-1.121871,-0.095584,-0.51229
1,-1.119998,-0.093523,-0.512601
2,-0.653185,1.578325,-0.83807
3,1.197694,-0.542335,0.202978
4,1.752958,-0.935188,0.457409


In [69]:
#Create an elbow curve to find the best value for K
inertia = []
k = list(range(1, 11))
# Calculate the inertia for the range of K values
for i in k:
   km = KMeans(n_clusters=i, random_state=0)
   km.fit(df_X_pca)
   inertia.append(km.inertia_)


In [70]:
# Define a DataFrame to plot the Elbow Curve using hvPlot
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.hvplot.line(x="k", y="inertia", title="Elbow Curve", xticks=k)

In [71]:
#K-means function
def get_clusters(k, data):
    #Create a copy of the DataFrame
    data = data.copy()       
    #Initialize the K-Means model   
    model = KMeans(n_clusters=k, random_state=0)   
    # Fit the model
    model.fit(data)   
    # Predict clusters
    predictions = model.predict(data)
    # Create return DataFrame with predicted clusters
    data["class"] = model.labels_
    return data

In [72]:
#run the K-means algorithm
X_clusters = get_clusters(4,df_X_pca)
X_clusters.head()

Unnamed: 0,pc_1,pc_2,pc_3,class
0,-1.121871,-0.095584,-0.51229,0
1,-1.119998,-0.093523,-0.512601,0
2,-0.653185,1.578325,-0.83807,0
3,1.197694,-0.542335,0.202978,1
4,1.752958,-0.935188,0.457409,1


In [73]:
X_clusters.shape

(524, 4)

In [74]:
#Rest col index for modified dataframes
#Create a new DataFrame named “clustered_df
crypto_df_mined_col.reset_index(drop=True, inplace=True)
coins_name_df.reset_index(drop=True, inplace=True)
clustered_df = pd.concat([crypto_df_mined_col, X_clusters,coins_name_df], axis=1,ignore_index=True, sort=False)

#Rename columns
clustered_df.columns = ['Algorithm','ProofType','TotalCoinsMined','TotalCoinSupply','pc_1','pc_2','pc_3','class','CoinName']
#Reorder columns
clustered_df = clustered_df[['Algorithm','ProofType','TotalCoinsMined','TotalCoinSupply','pc_1','pc_2','pc_3','CoinName','class']]
#Drop null rows
clustered_df.dropna(how='any',inplace=True)

clustered_df.head()

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply,pc_1,pc_2,pc_3,CoinName,class
0,Scrypt,PoW/PoS,41.99995,42,-1.121871,-0.095584,-0.51229,42 Coin,0
1,Scrypt,PoW/PoS,1055185000.0,532000000,-1.119998,-0.093523,-0.512601,404Coin,0
2,X13,PoW/PoS,29279420000.0,314159265359,-0.653185,1.578325,-0.83807,EliteCoin,0
3,SHA-256,PoW,17927180.0,21000000,1.197694,-0.542335,0.202978,Bitcoin,1
4,Ethash,PoW,107684200.0,0,1.752958,-0.935188,0.457409,Ethereum,1


In [75]:
#Create a 3D scatter plot using Plotly Express; hover_name="CoinName" and hover_data=["Algorithm"]

# Plotting the clusters with three features
fig = px.scatter_3d(clustered_df, x="pc_1", y="pc_2", z="pc_3", color="class", symbol="class",hover_name="CoinName",hover_data=["Algorithm"],height=500,width=800)
fig.update_layout(legend=dict(x=0,y=1))
fig.show()

In [76]:
#Use hvplot.table to create a data table with all the current tradable cryptocurrencies
#CoinName, Algorithm, ProofType, TotalCoinSupply, TotalCoinsMined, and Class


clustered_df.hvplot.table(columns=['CoinName','Algorithm','ProofType','TotalCoinSupply','TotalCoinsMined','class'],width=400)


In [77]:
#Create a scatter plot using hvplot.scatter 
#having x="TotalCoinsMined" and y="TotalCoinSupply"

# Add a new class column to the df_iris
clustered_df.hvplot.scatter(x="TotalCoinsMined", y="TotalCoinSupply",by="class")