In [None]:
pip install Path

In [None]:
import pandas as pd
from path import Path
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

### Data Preparation

In [None]:
file = Path('crypto_data.csv')

In [None]:
df = pd.read_csv(file, index_col=0)
df.head(10)

In [None]:
# List all algorithms in use
algorithms = set(df.Algorithm.values)
print(algorithms)

In [None]:
# print the counts of stocks being currently traded vs. not being currently traded 
df['IsTrading'].value_counts()

In [None]:
# Select only cryptocurrencies that are being traded
df = df.loc[df['IsTrading'] == True]
df['IsTrading'].value_counts()


In [None]:
# Delete the IsTrading column; it is no longer necessary
df = df.drop(columns = 'IsTrading')
df.head()

In [None]:
# Delete all rows with null values
df = df.dropna(axis=0, how = 'any')
df.shape

In [None]:
# Sort values in TotalCoinsMined to identify negative or zero values
df.TotalCoinsMined.sort_values()

In [None]:
# Select only rows with positive TotalCoinsMined values
df = df[df['TotalCoinsMined'] > 0]
len(df)

In [None]:
# Delete the CoinName column from the original dataframe
df = df.drop(columns = 'CoinName' )
df.head()

In [None]:
# Create dummy variables for columns with string values
x = pd.get_dummies(data=df, columns = ['Algorithm', 'ProofType'])
print(x.shape)
x.head()

In [None]:
# Standardize the data
scaler = StandardScaler()
x_scaled = scaler.fit_transform(x)

In [None]:
# print the scaled data
x_scaled[0]

In [None]:
# Identify the numbers of rows and columns in the scaled data
x_scaled.shape

### Reduce dataset dimensions with PCA

In [None]:
# Reduce dimensions with PCA 99% of data
pca = PCA(n_components = 0.99)
prin_components = pca.fit_transform(x_scaled)
prin_components.shape

In [None]:
pca.explained_variance_.sum()

In [None]:
# Reduce dimensions with PCA 90% of data
pca = PCA(n_components = 0.90)
prin_components = pca.fit_transform(x_scaled)
prin_components.shape

In [None]:
# The sum of the explained variance of the principal components
pca.explained_variance_.sum()

In [None]:
# Reduce dimensions with t-SNE
tsne1=TSNE(perplexity = 50)
tsne1features = tsne1.fit_transform(prin_components)
tsne1features.shape

In [None]:
# Plot t-SNE output
x = tsne1features[:,0]
y = tsne1features[:,1]
plt.scatter(x, y)
plt.show()

In [None]:
# Identify clusters with k-means
inertia=[]
k = [1,2,4,5,6,7,8,9,10]
for value in k : 
    kmeans = KMeans(n_clusters = value, random_state = 0)
    kmeans.fit(prin_components)
    inertia.append(kmeans.inertia_)

In [None]:
# Create an elbow plot
elbowPlot = pd.DataFrame({'K':k, 'Inertia':inertia})
elbowPlot.plot.line(x = 'K', y = 'Inertia')
plt.ylabel('Inertia')
plt.xlabel('K Value')
plt.title('Elbow Plot of Scaled Data')
plt.show()

Cryptocurrencies cannot be clustered together. Either there is only one cluster, or there are no meaningful clusters amongst the data set. 