This Notebook will be used to evaluate and model various CryptoCurrency's available for investment.  the Analysis and results will be presented to the Advisory Services Team.

In [81]:
# import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [82]:
# read in the data set
df = pd.read_csv('crypto_data.csv')
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1252 entries, 0 to 1251
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Unnamed: 0       1252 non-null   object 
 1   CoinName         1252 non-null   object 
 2   Algorithm        1252 non-null   object 
 3   IsTrading        1252 non-null   bool   
 4   ProofType        1252 non-null   object 
 5   TotalCoinsMined  744 non-null    float64
 6   TotalCoinSupply  1252 non-null   object 
dtypes: bool(1), float64(1), object(5)
memory usage: 60.0+ KB


In [83]:
# count all unique values in the IsTrading Column
df['IsTrading'].value_counts()


True     1144
False     108
Name: IsTrading, dtype: int64

In [84]:

#  drop all rows where IsTrading = False
df = df[df['IsTrading'] == True]

# count all unique values in the IsTrading Column
df['IsTrading'].value_counts()



True    1144
Name: IsTrading, dtype: int64

In [85]:
# review for null values
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1144 entries, 0 to 1247
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Unnamed: 0       1144 non-null   object 
 1   CoinName         1144 non-null   object 
 2   Algorithm        1144 non-null   object 
 3   IsTrading        1144 non-null   bool   
 4   ProofType        1144 non-null   object 
 5   TotalCoinsMined  685 non-null    float64
 6   TotalCoinSupply  1144 non-null   object 
dtypes: bool(1), float64(1), object(5)
memory usage: 63.7+ KB


In [86]:
# drop all rows where where atleast one column has null values
df = df.dropna()
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 685 entries, 0 to 1247
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Unnamed: 0       685 non-null    object 
 1   CoinName         685 non-null    object 
 2   Algorithm        685 non-null    object 
 3   IsTrading        685 non-null    bool   
 4   ProofType        685 non-null    object 
 5   TotalCoinsMined  685 non-null    float64
 6   TotalCoinSupply  685 non-null    object 
dtypes: bool(1), float64(1), object(5)
memory usage: 38.1+ KB


In [87]:
# sort the df by TotalCoinsMined in ascending order
df = df.sort_values(by='TotalCoinsMined', ascending=True)



In [88]:
# drop all rows where TotalCoinsMined is null 0 or less
df = df[df['TotalCoinsMined'] > 0]
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 532 entries, 0 to 1234
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Unnamed: 0       532 non-null    object 
 1   CoinName         532 non-null    object 
 2   Algorithm        532 non-null    object 
 3   IsTrading        532 non-null    bool   
 4   ProofType        532 non-null    object 
 5   TotalCoinsMined  532 non-null    float64
 6   TotalCoinSupply  532 non-null    object 
dtypes: bool(1), float64(1), object(5)
memory usage: 29.6+ KB


In [89]:
# drop the CoinName and Unnamed: Columns
df = df.drop(['CoinName', 'Unnamed: 0'], axis=1)
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 532 entries, 0 to 1234
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Algorithm        532 non-null    object 
 1   IsTrading        532 non-null    bool   
 2   ProofType        532 non-null    object 
 3   TotalCoinsMined  532 non-null    float64
 4   TotalCoinSupply  532 non-null    object 
dtypes: bool(1), float64(1), object(3)
memory usage: 21.3+ KB


In [90]:
# convert the Algorithim and ProofType to numerical values using get_dummies
dummy_df = pd.get_dummies(df, columns=['Algorithm', 'ProofType', 'IsTrading'])


In [92]:

# comvert dummy df to a dataframe
dummy_df = pd.DataFrame(dummy_df)
dummy_df.head()



Unnamed: 0,TotalCoinsMined,TotalCoinSupply,Algorithm_1GB AES Pattern Search,Algorithm_536,Algorithm_Argon2d,Algorithm_BLAKE256,Algorithm_Blake,Algorithm_Blake2S,Algorithm_Blake2b,Algorithm_C11,...,ProofType_PoW/PoS,ProofType_PoW/PoW,ProofType_PoW/nPoS,ProofType_Pos,ProofType_Proof of Authority,ProofType_Proof of Trust,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW,IsTrading_True
0,41.999954,42,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
45,88.0,384000000000,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
175,1177.0,1000,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
54,42579.476901,500000,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1001,84300.0,600000518,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [93]:
# standardize all the data so that columns that contain larger values do not unduly influence the outcome
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(dummy_df)
scaled_df = scaler.transform(dummy_df)
scaled_df = pd.DataFrame(scaled_df, columns=dummy_df.columns)
scaled_df.head()

Unnamed: 0,TotalCoinsMined,TotalCoinSupply,Algorithm_1GB AES Pattern Search,Algorithm_536,Algorithm_Argon2d,Algorithm_BLAKE256,Algorithm_Blake,Algorithm_Blake2S,Algorithm_Blake2b,Algorithm_C11,...,ProofType_PoW/PoS,ProofType_PoW/PoW,ProofType_PoW/nPoS,ProofType_Pos,ProofType_Proof of Authority,ProofType_Proof of Trust,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW,IsTrading_True
0,-0.117108,-0.15287,-0.043396,-0.043396,-0.043396,-0.06143,-0.075307,-0.043396,-0.06143,-0.06143,...,-0.043396,-0.043396,-0.043396,-0.043396,-0.043396,-0.043396,-0.043396,-0.043396,-0.043396,0.0
1,-0.117108,5.521452,-0.043396,-0.043396,-0.043396,-0.06143,-0.075307,-0.043396,-0.06143,-0.06143,...,-0.043396,-0.043396,-0.043396,-0.043396,-0.043396,-0.043396,-0.043396,-0.043396,-0.043396,0.0
2,-0.117108,-0.15287,-0.043396,-0.043396,-0.043396,-0.06143,-0.075307,-0.043396,-0.06143,-0.06143,...,-0.043396,-0.043396,-0.043396,-0.043396,-0.043396,-0.043396,-0.043396,-0.043396,-0.043396,0.0
3,-0.117107,-0.152863,-0.043396,-0.043396,-0.043396,-0.06143,-0.075307,-0.043396,-0.06143,-0.06143,...,-0.043396,-0.043396,-0.043396,-0.043396,-0.043396,-0.043396,-0.043396,-0.043396,-0.043396,0.0
4,-0.117106,-0.144004,-0.043396,-0.043396,-0.043396,-0.06143,-0.075307,-0.043396,-0.06143,-0.06143,...,-0.043396,-0.043396,-0.043396,-0.043396,-0.043396,-0.043396,-0.043396,-0.043396,-0.043396,0.0
