In [None]:
import os
  import io
  import json
  import numpy as np
  import pandas as pd
  from path import Path
  import matplotlib.pyplot as plt
import pandas as pd
import hvplot.pandas
from pathlib import Path
import plotly.express as px
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

In [None]:
import sagemaker
  import sagemaker.amazon.common as smac
  from sagemaker.predictor import csv_serializer, json_deserializer
  from sagemaker import get_execution_role
  from sagemaker.amazon.amazon_estimator import get_image_uri
  import boto3

In [None]:
file_path = Path("Resources/crypto_data.csv")
crypto_df = pd.read_csv(file_path, index_col=0)
crypto_df.head(10)

In [None]:
Ncrypto_df = crypto_df.loc[crypto_df["IsTrading"] == True ]
Ncrypto_df.head(5)

In [None]:
import math
Ncrypto_df = Ncrypto_df.loc[Ncrypto_df["Algorithm"] != "N/A"]
Ncrypto_df.head()

In [None]:
Ncrypto_df.drop(columns=["IsTrading"], inplace=True)
Ncrypto_df.head()

In [None]:
for column in Ncrypto_df.columns:
    print(f"Column {column} has {Ncrypto_df[column].isnull().sum()} null values")

In [None]:
Ncrypto_df.dropna()

In [None]:
Ncrypto_df = Ncrypto_df.loc[Ncrypto_df["TotalCoinsMined"] >= 0]
Ncrypto_df.head()

In [None]:
Coin_names_df = crypto_df["CoinName"].to_frame()
Coin_names_df.head()

In [None]:
Ncrypto_df.drop(columns=["CoinName"], inplace=True)
Ncrypto_df.head()

In [None]:
X =  pd.get_dummies(columns=["Algorithm", "ProofType"], data=Ncrypto_df)
X.head()

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler().fit(X)
X = scaler.transform(X)

In [None]:
pca = PCA(n_components=3)

In [None]:
Crypto_pca = pca.fit_transform(X)
Crypto_pca

In [None]:
inertia = []
k = list(range(1, 11))

# Calculate the inertia for the range ok k values
for i in k:
    km = KMeans(n_clusters=i, random_state=0)
    km.fit(Crypto_pca)
    inertia.append(km.inertia_)


# Create the Elbow Curve using hvPlot
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.hvplot.line(x="k", y="inertia", xticks=k, title="Elbow Curve")

In [None]:
model = KMeans(n_clusters=4, random_state=0)

# Fit the model
model.fit(Crypto_pca)

# Predict clusters
predictions = model.predict(Crypto_pca)

# Create a new DataFrame including predicted clusters and cryptocurrencies features
df_Crypto_pca = pd.DataFrame(
    data=Crypto_pca,
    columns=["principal component 1", "principal component 2", "principal component 3"], index=Ncrypto_df.index
)
df_Crypto_pca["class"] = model.labels_
df_Crypto_pca.head()

In [None]:
df_Crypto_pca.head()

In [None]:
clustered_df = pd.concat([df_Crypto_pca, Ncrypto_df], sort=False, axis = 1)
clustered_df.head()

In [None]:
fig = px.scatter_3d(
    df_Crypto_pca,
    x= "principal component 1",
    y= "principal component 2",
    z= "principal component 3",
    color="class",
    symbol="class",
    width=800,
)
fig.update_layout(legend=dict(x=0, y=1))
fig.show()

In [None]:
crypto_table = clustered_df
crypto_table.hvplot.table()

In [None]:
Coin_names_scaled = StandardScaler().fit_transform(Coin_names_df)
print(Coin_names_scaled[0:5])

In [None]:
print(crypto_table.count())

In [None]:
Coin_names_scaled = StandardScaler().fit_transform(clustered_df[["TotalCoinsMined", "TotalCoinSupply"]])
print(Coin_names_scaled[0:2])

In [None]:
fig = px.scatter_3d(
    Crypto_pca,
    x="TotalCoinsMined",
    y="TotalCoinSupply",
    z="principal component 1",
    color="class",
    symbol="class",
    width=800,
)
fig.update_layout(legend=dict(x=0, y=1))
fig.show()

In [None]:
sagemaker.Session().delete_endpoint(linear_predictor.endpoint)