#  Data Preparation.


In [None]:
# Initial imports
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.decomposition import PCA
from sklearn.datasets import load_digits
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans

from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression


Load the data into a Pandas DataFrame, name it as `crypto_df`.

In [None]:
crypto_df = pd.read_csv("./crypto_data.csv")
crypto_df.head(10)

In [None]:
crypto_df.columns

List the DataFrame's data types to ensure they're aligned to the type of data stored on each column.

In [None]:
crypto_df.dtypes



 We can drop the `isTrading` column from the Dataframe

In [None]:
# filter for currencies that are currently being traded. 
crypto_df['IsTrading'].value_counts()

In [None]:
# Remove the isTrading Column
crypto_df = crypto_df.drop(columns=["IsTrading"])
crypto_df.head()

Remove all rows with `null` values if any.

In [None]:
print("Number of rows before removing nulls:", len(crypto_df))

df = crypto_df.dropna()

print("Number of rows after removing nulls:", len(df))

In [None]:
df

Filter for cryptocurrencies that have been mined

In [None]:
mined_df = df.sort_values("TotalCoinsMined")

mined_df.head()

 

In [None]:
# That is, the total coins mined should be greater than zero.

mined = df.loc[df["TotalCoinsMined"]>0]

mined

In order for your dataset to be comprehensible to a machine learning algorithm, its data should be numeric. Since the coin names do not contribute to the analysis of the data, delete the `CoinName` from the original dataframe

In [None]:
df1 = df.drop(columns=["CoinName"])
df1.head()

Convert the remaining features with text values, `Algorithm` and `ProofType`, into numerical

In [None]:
df2 = pd.get_dummies(df1)

df2.head()

In [None]:
# Scale the data
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df2)

# Create a DataFrame with the transformed data
df2_scaled = pd.DataFrame(scaled_data, columns=df2.columns)


# Preview DataFrame
df2_scaled.head()

### Dimensionality Reduction

In [None]:
# Perform dimensionality reduction with PCA

df2_scaled = StandardScaler().fit_transform(df2)


# Initialize PCA model
pca = PCA(n_components=0.99)

# Get two principal components for the data.
df3 = pca.fit_transform(df2_scaled)

df3.shape

In [None]:
# Next, further reduce the dataset dimensions with t-SNE and visually inspect the results

# Initialize t-SNE model
tsne = TSNE(n_components=2, perplexity=20.0)

# Apply a fit-transform
tsne_features = tsne.fit_transform(df3)
tsne_features.shape


In [None]:
digits = load_digits()
X = digits.data
y = digits.target


plt.scatter(x=tsne_features[:,0], y=tsne_features[:,1])
plt.title('Digits')
plt.show()

# Cluster Analysis with k-Means

In [None]:
model = KMeans(n_clusters=2, random_state=10)

model.fit(df3)

In [None]:
# Get predictions
predictions = model.predict(df3)
print(predictions)

In [None]:
# Check the centroids
model.cluster_centers_

In [None]:
# Check the inertia
model.inertia_

In [None]:
inertia = []
k_list = [1,2,3,4,5,6,7,8,9,10]
# Looking for the best k
for k in k_list:
    km = KMeans(n_clusters=k, random_state=10)
    km.fit(df3)
    inertia.append(km.inertia_)

# Plot the Elbow Curve
plt.plot(k_list, inertia)
plt.xticks(k_list)
plt.xlabel('Number of clusters')
plt.ylabel('Inertia')
plt.show()

The value of K is 9

In [None]:
# Fit model
model = KMeans(n_clusters=9, random_state=0)
model.fit(df3)

# Add predictions
df3["class"] = model.predict(df_3).astype("str")

df3.head()

In [None]:
sns.pairplot(df3, hue="class", diag_kind="hist")
plt.show()

 ## Recommendations based on the findings.

From the findings I think cryptocurrencies be clustered together. Ithink they can be grouped into 9 clusters.