In [27]:
import pandas as pd
import csv
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.manifold import TSNE

In [13]:
df = pd.read_csv('crypto_data.csv', index_col=0)  

In [14]:
trading_df = df[df.IsTrading != False]

In [15]:
trading_df.dropna()

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,True,PoW/PoS,4.199995e+01,42
404,404Coin,Scrypt,True,PoW/PoS,1.055185e+09,532000000
808,808,SHA-256,True,PoW/PoS,0.000000e+00,0
1337,EliteCoin,X13,True,PoW/PoS,2.927942e+10,314159265359
BTC,Bitcoin,SHA-256,True,PoW,1.792718e+07,21000000
...,...,...,...,...,...,...
ZEPH,ZEPHYR,SHA-256,True,DPoS,2.000000e+09,2000000000
GAP,Gapcoin,Scrypt,True,PoW/PoS,1.493105e+07,250000000
BDX,Beldex,CryptoNight,True,PoW,9.802226e+08,1400222610
ZEN,Horizen,Equihash,True,PoW,7.296538e+06,21000000


In [16]:
trading_df = trading_df[trading_df.TotalCoinsMined >= 1]

In [17]:
trading_df.drop(columns=['CoinName'], inplace=True)

In [18]:
X=pd.get_dummies(trading_df,columns=['Algorithm','ProofType'])
X.head()

Unnamed: 0,IsTrading,TotalCoinsMined,TotalCoinSupply,Algorithm_1GB AES Pattern Search,Algorithm_536,Algorithm_Argon2d,Algorithm_BLAKE256,Algorithm_Blake,Algorithm_Blake2S,Algorithm_Blake2b,...,ProofType_PoW/PoS,ProofType_PoW/PoS.1,ProofType_PoW/PoW,ProofType_PoW/nPoS,ProofType_Pos,ProofType_Proof of Authority,ProofType_Proof of Trust,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW
42,True,41.99995,42,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
404,True,1055185000.0,532000000,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1337,True,29279420000.0,314159265359,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
BTC,True,17927180.0,21000000,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ETH,True,107684200.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [19]:
X_scaled = StandardScaler().fit_transform(X)
X_scaled

array([[ 0.        , -0.11710817, -0.1528703 , ..., -0.0433963 ,
        -0.0433963 , -0.0433963 ],
       [ 0.        , -0.09396955, -0.145009  , ..., -0.0433963 ,
        -0.0433963 , -0.0433963 ],
       [ 0.        ,  0.52494561,  4.48942416, ..., -0.0433963 ,
        -0.0433963 , -0.0433963 ],
       ...,
       [ 0.        , -0.09561336, -0.13217937, ..., -0.0433963 ,
        -0.0433963 , -0.0433963 ],
       [ 0.        , -0.11694817, -0.15255998, ..., -0.0433963 ,
        -0.0433963 , -0.0433963 ],
       [ 0.        , -0.11710536, -0.15285552, ..., -0.0433963 ,
        -0.0433963 , -0.0433963 ]])

In [25]:
pca = PCA(n_components=0.99)
crypto_pca = pca.fit_transform(X_scaled)

array([[-0.33509881,  1.03218907, -0.59071344, ...,  0.13391619,
        -0.11593793,  0.00934363],
       [-0.31843395,  1.03233143, -0.59112555, ...,  0.13399077,
        -0.11564619,  0.00918792],
       [ 2.30546811,  1.65638302, -0.68361667, ..., -0.07341934,
         0.17914389, -1.41905933],
       ...,
       [ 0.32434854, -2.31230764,  0.42151537, ...,  0.23019201,
        -0.01101053,  0.47286829],
       [-0.14936365, -2.04933553,  0.41296826, ...,  0.35568904,
         0.08258903, -0.07056385],
       [-0.2899575 ,  0.82119451, -0.27632685, ...,  1.03190166,
        -0.05476628, -0.9014263 ]])

In [29]:
tsne = TSNE(learning_rate=50)
tsne_features = tsne.fit_transform(crypto_pca)
tsne_features

array([[ 12.678858 ,   4.0785766],
       [ 13.695726 ,  11.170385 ],
       [ 24.061956 ,   2.0312638],
       ...,
       [-22.315968 ,  -8.37927  ],
       [-13.191516 , -10.593125 ],
       [  6.3652024, -11.570207 ]], dtype=float32)