In [None]:
# Import Dependancies
import pandas as pd
import hvplot.pandas
from path import Path
import plotly.express as px
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

## Preprocessing the Data

In [None]:
# Read the CSV into Pandas
file_path = ('Resources/crypto_data.csv')
crypto_df = pd.read_csv(file_path)
crypto_df = crypto_df.set_index('Unnamed: 0')

crypto_df.index.name = None
print(crypto_df.shape)
crypto_df

In [None]:
# Discard the cryptocurrencies not being traded
crypto_df = crypto_df.loc[crypto_df["IsTrading"] == True]
print(crypto_df.shape)

crypto_df.head(10)

In [None]:
# Keep cryptocurrencies that have a working algorithm
crypto_df = crypto_df.dropna(subset = ["Algorithm"])
print(crypto_df.shape)
crypto_df.head(10)

In [None]:
# Drop the IsTrading column from the dataframe
crypto_df = crypto_df.drop(columns = "IsTrading")
print(crypto_df.shape)
crypto_df

In [None]:
# Remove all rows that have at least one null value
crypto_df = crypto_df.dropna()
print(crypto_df.shape)
crypto_df.head(10)

In [None]:
# Filter for cryptocurrencies that have been mined (greater than Zero)
crypto_df = crypto_df.loc[crypto_df["TotalCoinsMined"] > 0]
print(crypto_df.shape)
crypto_df.head(10)

In [None]:
# Create a new DataFrame that only holds the cryptocurrency names
crypto_name_df = pd.DataFrame(crypto_df["CoinName"])
print(crypto_name_df.shape)
crypto_name_df.head()

In [None]:
# Delete "CoinName" from the original DataFrame since it does not contribute to the analysis
crypto_df = crypto_df.drop(columns = "CoinName")
print(crypto_df.shape)
crypto_df.head(10)

In [None]:
# Convert remaining text variables into numerical data using get_dummies
X = pd.get_dummies(crypto_df, columns=["Algorithm","ProofType"])

print(X.shape)
X

In [None]:
# Standardize the dataset
scaler = StandardScaler()
crypto_scaled = scaler.fit_transform(X)
crypto_scaled[0:5]

# Dimentionality Reduction

In [None]:
# Perform dimensionality with PCA
pca = PCA(n_components=3)

crypto_pca = pca.fit_transform(crypto_scaled)

crypto_pca

In [None]:
# Create a DataFrame with the 3 Principal Components
crypto_pca_df = pd.DataFrame(
            data = crypto_pca, columns = ["PC 1", "PC 2", "PC 3"], index=crypto_name_df.index
            )
print(crypto_pca_df.shape)
crypto_pca_df

# Cluster Analysis with K-Means

In [None]:
# Create an Elbow Curve to find the best value for K
inertia = []

k = list(range(1,11))
for i in k:
    km = KMeans(n_clusters = i, random_state = 0)
    km.fit(crypto_pca_df)
    inertia.append(km.inertia_)
    
    
elbow_data = {"k": k, "inertia" : inertia}
elbow_df = pd.DataFrame(elbow_data)
elbow_df.hvplot.line(x = "k", y = "inertia", title = "Elbow Curve", xticks = k)

In [None]:
# Initialize the K-Means Model
km_model = KMeans(n_clusters=4, random_state=0)

# Fit the Model
km_model.fit(crypto_pca_df)

# Predict Clusters
predictions = km_model.predict(crypto_pca_df)
predictions

In [None]:
# Create a new DataFrame including predicted clusters and cryptocurrencies features.
# Concatentate the crypto_df and pcs_df DataFrames on the same columns.
clustered_df = pd.concat([crypto_df, crypto_pca_df], axis = 1, join = "inner")

#  Add a new column, "CoinName" to the clustered_df DataFrame that holds the names of the cryptocurrencies. 
clustered_df["CoinName"] = crypto_name_df.CoinName

#  Add a new column, "Class" to the clustered_df DataFrame that holds the predictions.
clustered_df["Class"] = km_model.labels_

# Print the shape of the clustered_df
print(clustered_df.shape)
clustered_df.head(10)

# Visualize the Results

In [None]:
# Create a 3D Scatter Plot with the data & the Clusters
fig = px.scatter_3d(clustered_df, x = "PC 1", y = "PC 2", z = "PC 3", color= "Class", symbol="Class", width = 800,
                   hover_name = "CoinName", hover_data = ["Algorithm"])
fig.update_layout(legend = dict(x=0, y=1))
fig.show()

In [None]:
# Create a Table with Tradable Cryptocurrencies
tradable_crypto_table = clustered_df.hvplot.table(columns = ['CoinName', 'Algorithm', 'ProofType', 'TotalCoinSupply', 
                                                  'TotalCoinsMined', 'Class'], sortable = True, selectable = True)
tradable_crypto_table

In [None]:
# Print the Total # of Tradable Cryptocurrencies
print(f"There are {len(clustered_df)} tradable cryptocurrencies")

In [None]:
# Scale the data to create a Scatter Plot with Tradable Cryptocurrencies
Tradable_cluster = clustered_df[['TotalCoinSupply', 'TotalCoinsMined']].copy()
Tradable_cluster_scaled = MinMaxScaler().fit_transform(Tradable_cluster)
Tradable_cluster_scaled

In [None]:
# Create a new DataFrame that has the scaled data with the clustered_df DataFrame index.
plot_df = pd.DataFrame(Tradable_cluster_scaled, columns = ["TotalCoinSupply", "TotalCoinsMined"], index = clustered_df.index)

# Add the "CoinName" column from the clustered_df DataFrame to the new DataFrame.
plot_df['CoinName'] = clustered_df.CoinName

# Add the "Class" column from the clustered_df DataFrame to the new DataFrame. 
plot_df['Class'] = clustered_df.Class

plot_df.head(10)

In [None]:
# Create a hvplot.scatter plot using x="TotalCoinsMined" and y="TotalCoinSupply".
plot_df.hvplot.scatter(x='TotalCoinsMined', y='TotalCoinSupply', hover_cols='CoinName', by='Class')

# Recommendation

Can the cryptocurrencies be clustered together and if so, how many clusters?