In [1]:
# Initial imports
import pandas as pd
from pathlib import Path
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder

#### Read crypto_data.csv into Pandas. The dataset was obtained from CryptoCompare.

In [2]:
# Data loading
file_path = Path("../Instructions/crypto_data.csv")
df_crypto = pd.read_csv(file_path)
df_crypto

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,4.199995e+01,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1.055185e+09,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.000000e+00,0
...,...,...,...,...,...,...,...
1247,XBC,BitcoinPlus,Scrypt,True,PoS,1.283270e+05,1000000
1248,DVTC,DivotyCoin,Scrypt,False,PoW/PoS,2.149121e+07,100000000
1249,GIOT,Giotto Coin,Scrypt,False,PoW/PoS,,233100000
1250,OPSC,OpenSourceCoin,SHA-256,False,PoW/PoS,,21000000


#### Discard all cryptocurrencies that are not being traded. 
#### In other words, filter for currencies that are currently being traded. 
#### Once you have done this, drop the IsTrading column from the dataframe.

In [3]:
test = df_crypto[df_crypto['IsTrading'] == False]
test

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
97,FSTC,FastCoin,Scrypt,False,PoW,,165888000
119,KEYC,KeyCoin,X13,False,PoW/PoS,,1000000
132,MINRL,Minerals Coin,X11,False,PoW/PoS,,1000000
201,XCSH,Xcash,Scrypt,False,PoW/PoS,,2400000
269,CAMC,Camcoin,X11,False,PoW/PoS,,10000000
...,...,...,...,...,...,...,...
1236,ZILLA,ChainZilla,Equihash,False,DPoW,1.100007e+07,11000000
1248,DVTC,DivotyCoin,Scrypt,False,PoW/PoS,2.149121e+07,100000000
1249,GIOT,Giotto Coin,Scrypt,False,PoW/PoS,,233100000
1250,OPSC,OpenSourceCoin,SHA-256,False,PoW/PoS,,21000000


In [4]:
dropped_false = df_crypto.drop(df_crypto.loc[df_crypto['IsTrading']==False].index)

In [5]:
# Show False has been removed. 108 total False. Prior count was 1252, now is 1144 = 108 difference. 
dropped_false

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,4.199995e+01,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1.055185e+09,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.000000e+00,0
...,...,...,...,...,...,...,...
1243,SERO,Super Zero,Ethash,True,PoW,,1000000000
1244,UOS,UOS,SHA-256,True,DPoI,,1000000000
1245,BDX,Beldex,CryptoNight,True,PoW,9.802226e+08,1400222610
1246,ZEN,Horizen,Equihash,True,PoW,7.296538e+06,21000000


In [6]:
# Drop IsTrading column 

noTrading_df = dropped_false.drop(columns=["IsTrading"])
noTrading_df.head()

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,PoW/PoS,41.99995,42
1,365,365Coin,X11,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
3,611,SixEleven,SHA-256,PoW,,611000
4,808,808,SHA-256,PoW/PoS,0.0,0


#### Remove all rows that have at least one null value

In [7]:
for column in noTrading_df.columns:
    print(f"Column {column} has {noTrading_df[column].isnull().sum()} null values")

Column Unnamed: 0 has 0 null values
Column CoinName has 0 null values
Column Algorithm has 0 null values
Column ProofType has 0 null values
Column TotalCoinsMined has 459 null values
Column TotalCoinSupply has 0 null values


In [8]:
# Find duplicate entries
print(f"Duplicate entries: {noTrading_df.duplicated().sum()}")

Duplicate entries: 0


In [9]:
new_df = noTrading_df.dropna(axis='columns')

In [10]:
for column in new_df.columns:
    print(f"Column {column} has {new_df[column].isnull().sum()} null values")

Column Unnamed: 0 has 0 null values
Column CoinName has 0 null values
Column Algorithm has 0 null values
Column ProofType has 0 null values
Column TotalCoinSupply has 0 null values


In [11]:
new_df

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinSupply
0,42,42 Coin,Scrypt,PoW/PoS,42
1,365,365Coin,X11,PoW/PoS,2300000000
2,404,404Coin,Scrypt,PoW/PoS,532000000
3,611,SixEleven,SHA-256,PoW,611000
4,808,808,SHA-256,PoW/PoS,0
...,...,...,...,...,...
1243,SERO,Super Zero,Ethash,PoW,1000000000
1244,UOS,UOS,SHA-256,DPoI,1000000000
1245,BDX,Beldex,CryptoNight,PoW,1400222610
1246,ZEN,Horizen,Equihash,PoW,21000000


#### Drop all untraded coin 

In [12]:
drop_coin_df = new_df.drop(new_df.loc[new_df['TotalCoinSupply']== '0'].index)

In [13]:
drop_coin_df

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinSupply
0,42,42 Coin,Scrypt,PoW/PoS,42
1,365,365Coin,X11,PoW/PoS,2300000000
2,404,404Coin,Scrypt,PoW/PoS,532000000
3,611,SixEleven,SHA-256,PoW,611000
5,1337,EliteCoin,X13,PoW/PoS,314159265359
...,...,...,...,...,...
1243,SERO,Super Zero,Ethash,PoW,1000000000
1244,UOS,UOS,SHA-256,DPoI,1000000000
1245,BDX,Beldex,CryptoNight,PoW,1400222610
1246,ZEN,Horizen,Equihash,PoW,21000000


In [14]:
# Drop CoinName from Column List
active_coin = drop_coin_df.drop(columns=["CoinName"])
active_coin

Unnamed: 0.1,Unnamed: 0,Algorithm,ProofType,TotalCoinSupply
0,42,Scrypt,PoW/PoS,42
1,365,X11,PoW/PoS,2300000000
2,404,Scrypt,PoW/PoS,532000000
3,611,SHA-256,PoW,611000
5,1337,X13,PoW/PoS,314159265359
...,...,...,...,...
1243,SERO,Ethash,PoW,1000000000
1244,UOS,SHA-256,DPoI,1000000000
1245,BDX,CryptoNight,PoW,1400222610
1246,ZEN,Equihash,PoW,21000000


#### Convert the features with text values, Algorithm and ProofType, into numerical data. 

In [25]:
convert_coin = pd.get_dummies(active_coin["Algorithm"], ["ProofType"])
convert_coin

Unnamed: 0,['ProofType']_1GB AES Pattern Search,['ProofType']_536,['ProofType']_Argon2,['ProofType']_BLAKE256,['ProofType']_Blake,['ProofType']_Blake2S,['ProofType']_Blake2b,['ProofType']_C11,['ProofType']_Cloverhash,['ProofType']_Counterparty,...,['ProofType']_VeChainThor Authority,['ProofType']_X11,['ProofType']_X11GOST,['ProofType']_X13,['ProofType']_X14,['ProofType']_X15,['ProofType']_X16R,['ProofType']_XEVAN,['ProofType']_YescryptR16,['ProofType']_Zhash
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1243,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1244,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1245,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1246,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [23]:
# X_dummies = pd.get_dummies(active_coin)
# print(X_dummies.columns)
# X_dummies