In [1]:
#Import Dependencies

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans

In [2]:
#Read crypto_data.csv into Pandas

cryptocurrency_df = pd.read_csv('crypto_data.csv', index_col = 0)
cryptocurrency_df.head()

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
365,365Coin,X11,True,PoW/PoS,,2300000000
404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
611,SixEleven,SHA-256,True,PoW,,611000
808,808,SHA-256,True,PoW/PoS,0.0,0


In [6]:
#Filter for currencies that are currently being traded
cryptocurrency_df = cryptocurrency_df[cryptocurrency_df['IsTrading'] == True]

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
365,365Coin,X11,True,PoW/PoS,,2300000000
404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
611,SixEleven,SHA-256,True,PoW,,611000
808,808,SHA-256,True,PoW/PoS,0.0,0


In [7]:
#Drop the IsTrading column from the dataframe.
cryptocurrency_df = cryptocurrency_df.drop (columns = 'IsTrading')
cryptocurrency_df.head()

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,PoW/PoS,41.99995,42
365,365Coin,X11,PoW/PoS,,2300000000
404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
611,SixEleven,SHA-256,PoW,,611000
808,808,SHA-256,PoW/PoS,0.0,0


In [8]:
#Remove all rows that have at least one null value.
cryptocurrency_df = cryptocurrency_df.dropna()

In [9]:
#Filter for cryptocurrencies that have been mined. Mined > 0
cryptocurrency_df = cryptocurrency_df[cryptocurrency_df["TotalCoinsMined"]>0]
cryptocurrency_df.head()

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,PoW/PoS,41.99995,42
404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
1337,EliteCoin,X13,PoW/PoS,29279420000.0,314159265359
BTC,Bitcoin,SHA-256,PoW,17927180.0,21000000
ETH,Ethereum,Ethash,PoW,107684200.0,0


In [11]:
#Delete the CoinName from the original dataframe
cryptocurrency_df.drop(['CoinName'], axis = 1, inplace = True)
cryptocurrency_df.head()

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
42,Scrypt,PoW/PoS,41.99995,42
404,Scrypt,PoW/PoS,1055185000.0,532000000
1337,X13,PoW/PoS,29279420000.0,314159265359
BTC,SHA-256,PoW,17927180.0,21000000
ETH,Ethash,PoW,107684200.0,0


In [12]:
#Convert the remaining features with text values, Algorithm and ProofType, into numerical data
data_convert = pd.get_dummies(cryptocurrency_df, columns = ['Algorithm', 'ProofType'], drop_first = True, dtype = float)
data_convert.head()

Unnamed: 0,TotalCoinsMined,TotalCoinSupply,Algorithm_536,Algorithm_Argon2d,Algorithm_BLAKE256,Algorithm_Blake,Algorithm_Blake2S,Algorithm_Blake2b,Algorithm_C11,Algorithm_Cloverhash,...,ProofType_PoW/PoS,ProofType_PoW/PoS.1,ProofType_PoW/PoW,ProofType_PoW/nPoS,ProofType_Pos,ProofType_Proof of Authority,ProofType_Proof of Trust,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW
42,41.99995,42,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
404,1055185000.0,532000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1337,29279420000.0,314159265359,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
BTC,17927180.0,21000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ETH,107684200.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
#Standardize your dataset so that columns that contain larger values do not unduly influence the outcome
scaled_data = StandardScaler()
scaled_crypto = scaled_data.fit_transform(data_convert)
scaled_crypto

array([[-0.11710817, -0.1528703 , -0.0433963 , ..., -0.0433963 ,
        -0.0433963 , -0.0433963 ],
       [-0.09396955, -0.145009  , -0.0433963 , ..., -0.0433963 ,
        -0.0433963 , -0.0433963 ],
       [ 0.52494561,  4.48942416, -0.0433963 , ..., -0.0433963 ,
        -0.0433963 , -0.0433963 ],
       ...,
       [-0.09561336, -0.13217937, -0.0433963 , ..., -0.0433963 ,
        -0.0433963 , -0.0433963 ],
       [-0.11694817, -0.15255998, -0.0433963 , ..., -0.0433963 ,
        -0.0433963 , -0.0433963 ],
       [-0.11710536, -0.15285552, -0.0433963 , ..., -0.0433963 ,
        -0.0433963 , -0.0433963 ]])

In [16]:
#Perform dimensionality reduction with PCA. Rather than specify the number of principal components when you instantiate the PCA model, it is possible to state the desired explained variance
#Preserve 90% of the explained variance in dimensionality reduction
pca_data = PCA(n_components = 0.90)
pca_crypto = pca_data.fit_transform(scaled_crypto)
pca_crypto


array([[-3.35913620e-01,  1.04340531e+00, -5.37409264e-01, ...,
        -2.27792445e-15,  6.33690480e-15, -3.31784127e-15],
       [-3.19248850e-01,  1.04354697e+00, -5.37804309e-01, ...,
        -2.72379490e-15,  6.39183835e-15, -3.50871853e-15],
       [ 2.30471594e+00,  1.66999561e+00, -6.24076993e-01, ...,
        -9.62652659e-14,  8.58854759e-14, -1.95134419e-15],
       ...,
       [ 3.24179380e-01, -2.32278089e+00,  3.87363001e-01, ...,
        -1.78084853e-15,  9.46035347e-14,  3.34023942e-14],
       [-1.49546262e-01, -2.05899360e+00,  3.79619710e-01, ...,
        -7.59236464e-15,  7.84039123e-15,  1.27866416e-15],
       [-2.90587486e-01,  8.27023611e-01, -2.51533368e-01, ...,
        -7.11278526e-15,  7.68453563e-15, -1.81521208e-15]])

In [17]:
# Further reduce the dataset dimensions with t-SNE and visually inspect the results
# run t-SNE on the principal components: the output of the PCA transformation. Then create a scatter plot of the t-SNE output

tsne = TSNE(learning_rate = 35)
tsne_features = tsne.fit_transform(pca_crypto)


In [18]:
#Scatterplot
pca_crypto ['x'] = tsne_features[:,0]
pca_crypto ['y'] = tsne_features[:,1]

plt.scatter(pca_crypto['x'], pca_crypto['y'])
plt.show()

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [None]:
#Create an elbow plot to identify the best number of clusters. Use a for-loop to determine the inertia for each k between 1 through 10



In [None]:
## Conclusion
# Based on your findings, make a brief (1-2 sentences) recommendation to your clients. Can the cryptocurrencies be clustered together? If so, into how many clusters?
