In [206]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.decomposition import PCA
from sklearn.cluster import AgglomerativeClustering
import hvplot.pandas
from sklearn.cluster import KMeans
import plotly.express as px

In [207]:
file_path = "Resources/crypto_data.csv"
crypto_df = pd.read_csv(file_path)
crypto_df.head()

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.0,0


In [208]:
crypto_df.dtypes

Unnamed: 0          object
CoinName            object
Algorithm           object
IsTrading             bool
ProofType           object
TotalCoinsMined    float64
TotalCoinSupply     object
dtype: object

In [209]:
#Filter out non-trading currencies
trading_crypto_df=crypto_df.loc[crypto_df["IsTrading"]==True]
trading_crypto_df.head()

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.0,0


In [210]:
trading_crypto_df.groupby("Algorithm").count()

Unnamed: 0_level_0,Unnamed: 0,CoinName,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1GB AES Pattern Search,1,1,1,1,1,1
536,2,2,2,2,1,2
Argon2,2,2,2,2,1,2
Argon2d,1,1,1,1,1,1
BLAKE256,2,2,2,2,2,2
...,...,...,...,...,...,...
XEVAN,6,6,6,6,6,6
XG Hash,1,1,1,1,0,1
YescryptR16,1,1,1,1,0,1
Zhash,1,1,1,1,1,1


In [211]:
#Remove the IsTrading column
trading_crypto_df=trading_crypto_df.drop(['IsTrading'], axis=1)
trading_crypto_df.head()

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,PoW/PoS,41.99995,42
1,365,365Coin,X11,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
3,611,SixEleven,SHA-256,PoW,,611000
4,808,808,SHA-256,PoW/PoS,0.0,0


In [212]:
#Remove all items that have at least one null value
trading_crypto_df = trading_crypto_df.dropna()
trading_crypto_df.head()

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,PoW/PoS,41.99995,42
2,404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
4,808,808,SHA-256,PoW/PoS,0.0,0
5,1337,EliteCoin,X13,PoW/PoS,29279420000.0,314159265359
7,BTC,Bitcoin,SHA-256,PoW,17927180.0,21000000


In [213]:
#Remove Currencies where coins are not mined
trading_crypto_mined_df=trading_crypto_df.loc[crypto_df["TotalCoinsMined"]>0]
trading_crypto_mined_df.head()

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,PoW/PoS,41.99995,42
2,404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
5,1337,EliteCoin,X13,PoW/PoS,29279420000.0,314159265359
7,BTC,Bitcoin,SHA-256,PoW,17927180.0,21000000
8,ETH,Ethereum,Ethash,PoW,107684200.0,0


In [214]:
#Create a new data frame for the Coin Name
trading_crypto_mined_df.set_index("Unnamed: 0", inplace= True)
trading_crypto_mined_df.head()

Unnamed: 0_level_0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
42,42 Coin,Scrypt,PoW/PoS,41.99995,42
404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
1337,EliteCoin,X13,PoW/PoS,29279420000.0,314159265359
BTC,Bitcoin,SHA-256,PoW,17927180.0,21000000
ETH,Ethereum,Ethash,PoW,107684200.0,0


In [215]:
coins_name=pd.DataFrame(data=trading_crypto_mined_df["CoinName"], index=trading_crypto_mined_df.index)
coins_name.head()

Unnamed: 0_level_0,CoinName
Unnamed: 0,Unnamed: 1_level_1
42,42 Coin
404,404Coin
1337,EliteCoin
BTC,Bitcoin
ETH,Ethereum


In [216]:
#Remove the CoinName column
trading_crypto_mined_df=trading_crypto_mined_df.drop(['CoinName'], axis=1)
trading_crypto_mined_df.index.name = None
trading_crypto_mined_df.head()

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
42,Scrypt,PoW/PoS,41.99995,42
404,Scrypt,PoW/PoS,1055185000.0,532000000
1337,X13,PoW/PoS,29279420000.0,314159265359
BTC,SHA-256,PoW,17927180.0,21000000
ETH,Ethash,PoW,107684200.0,0


In [217]:
#Encode Values
#le = LabelEncoder()
#X['Algorithm'] = le.fit_transform(X['Algorithm']) 
#X['ProofType'] = le.fit_transform(X['ProofType']) 
X = trading_crypto_mined_df.copy()
X = pd.get_dummies(trading_crypto_mined_df, columns=["Algorithm", "ProofType"])
X.head()

Unnamed: 0,TotalCoinsMined,TotalCoinSupply,Algorithm_1GB AES Pattern Search,Algorithm_536,Algorithm_Argon2d,Algorithm_BLAKE256,Algorithm_Blake,Algorithm_Blake2S,Algorithm_Blake2b,Algorithm_C11,...,ProofType_PoW/PoS,ProofType_PoW/PoS.1,ProofType_PoW/PoW,ProofType_PoW/nPoS,ProofType_Pos,ProofType_Proof of Authority,ProofType_Proof of Trust,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW
42,41.99995,42,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
404,1055185000.0,532000000,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1337,29279420000.0,314159265359,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
BTC,17927180.0,21000000,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ETH,107684200.0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [218]:
#Scale the data
df_scaled = StandardScaler().fit_transform(X)
print(df_scaled[0:5])

[[-0.11710817 -0.1528703  -0.0433963  -0.0433963  -0.0433963  -0.06142951
  -0.07530656 -0.0433963  -0.06142951 -0.06142951 -0.0433963  -0.0433963
  -0.19245009 -0.06142951 -0.09740465 -0.0433963  -0.11547005 -0.07530656
  -0.0433963  -0.0433963  -0.15191091 -0.0433963  -0.13118084 -0.0433963
  -0.0433963  -0.08703883 -0.0433963  -0.0433963  -0.0433963  -0.0433963
  -0.06142951 -0.0433963  -0.08703883 -0.08703883 -0.08703883 -0.0433963
  -0.13118084 -0.13840913 -0.13840913 -0.0433963  -0.06142951 -0.0433963
  -0.07530656 -0.18168574 -0.0433963  -0.0433963  -0.0433963  -0.07530656
  -0.15826614 -0.31491833 -0.0433963  -0.08703883 -0.07530656 -0.06142951
   1.38675049 -0.0433963  -0.0433963  -0.06142951 -0.0433963  -0.0433963
  -0.0433963  -0.0433963  -0.0433963  -0.0433963  -0.0433963  -0.0433963
  -0.39879994 -0.0433963  -0.18168574 -0.0433963  -0.08703883 -0.08703883
  -0.10680283 -0.0433963  -0.13118084 -0.0433963  -0.0433963  -0.0433963
  -0.0433963  -0.07530656 -0.43911856 -0.04339

In [219]:
#Initialize PCA
pca = PCA(n_components=3)

In [220]:
# Get three principal components for the data.
crypto_pca = pca.fit_transform(X)

In [221]:
# Transform PCA darta to a data frame
pcs_df = pd.DataFrame (
    data=crypto_pca, columns=["PC 1", "PC 2","PC 3"], index = trading_crypto_mined_df.index)
pcs_df.head(10)

Unnamed: 0,PC 1,PC 2,PC 3
42,-11629340000.0,-550257400.0,0.814728
404,-10640880000.0,-1197852000.0,0.814484
1337,275050700000.0,131237000000.0,0.989919
BTC,-11602190000.0,-555276700.0,-0.712858
ETH,-11575480000.0,-643502600.0,-0.706031
LTC,-11525070000.0,-562827600.0,-0.574821
DASH,-11605770000.0,-547073500.0,0.773963
XMR,-11620740000.0,-565152100.0,-0.696025
ETC,-11390800000.0,-543376200.0,-0.705865
ZEC,-11607460000.0,-546146400.0,-0.713007


In [222]:
# Create elbow curve
# Find the best value for K
inertia = []
k = list(range(1, 11))

# Calculate the inertia for the range of K values
for i in k:
    km = KMeans(n_clusters=i, random_state=0)
    km.fit(pcs_df)
    inertia.append(km.inertia_)

# Create the elbow curve
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.hvplot.line(x="k", y="inertia", xticks=k, title="Elbow Curve")

In [223]:
def get_clusters(k, data):
    # Create a copy of the DataFrame
    data = data.copy()       
    # Initialize the K-Means model   
    model = KMeans(n_clusters=k, random_state=0)   
    # Fit the model   
    model.fit(data)   
    # Predict clusters   
    predictions = model.predict(data)   
    # Create return DataFrame with predicted clusters   
    data["class"] = model.labels_   
    return data

In [224]:
# Run K Means 
clusters_df=get_clusters(4, pcs_df)
clusters_df.head()

Unnamed: 0,PC 1,PC 2,PC 3,class
42,-11629340000.0,-550257400.0,0.814728,0
404,-10640880000.0,-1197852000.0,0.814484,0
1337,275050700000.0,131237000000.0,0.989919,2
BTC,-11602190000.0,-555276700.0,-0.712858,0
ETH,-11575480000.0,-643502600.0,-0.706031,0


In [225]:
# Create data frame
clustered_df = trading_crypto_mined_df.join(clusters_df)
clustered_df = clustered_df.join(coins_name)
clustered_df=clustered_df[['Algorithm','ProofType','TotalCoinsMined','TotalCoinSupply','PC 1','PC 2','PC 3','CoinName','class']]
clustered_df.head()

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply,PC 1,PC 2,PC 3,CoinName,class
42,Scrypt,PoW/PoS,41.99995,42,-11629340000.0,-550257400.0,0.814728,42 Coin,0
404,Scrypt,PoW/PoS,1055185000.0,532000000,-10640880000.0,-1197852000.0,0.814484,404Coin,0
1337,X13,PoW/PoS,29279420000.0,314159265359,275050700000.0,131237000000.0,0.989919,EliteCoin,2
BTC,SHA-256,PoW,17927180.0,21000000,-11602190000.0,-555276700.0,-0.712858,Bitcoin,0
ETH,Ethash,PoW,107684200.0,0,-11575480000.0,-643502600.0,-0.706031,Ethereum,0


In [226]:
# Create a 3d Plot
fig = px.scatter_3d(
    clustered_df,
    x="PC 1",
    y="PC 2",
    z="PC 3",
    color="class",
    symbol="class",
    hover_name="CoinName",
    hover_data=["Algorithm"],
    width=800,
)
fig.update_layout(legend=dict(x=0, y=1))
fig.show()

In [227]:
# Create a data table with all the current tradable cryptocurrencies
clustered_df.hvplot.table(columns=['CoinName', 'Algorithm', 'ProofType', 'TotalCoinSupply', "class"], width=700)

In [228]:
clustered_df.hvplot.scatter(
    x="TotalCoinsMined",
    y="TotalCoinSupply",
    hover_cols=["CoinName"],
    by="class",
)