# Import Libraries

In [1]:
# Data manipulation and scientific computing
import pandas as pd
import numpy as np
import scipy

# Data Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Others
from utils import read_data
from utils import convert_data_to_dataframe
from utils import check_date_integrity
from utils import check_shape_integrity
from utils import check_period_integrity
from utils import drop_indicator
from utils import align_dataframes

# Read Dataset

In [2]:
# Base directory where dataset is stored
BASE_DIR = r'../datasets/'

# Read datasets (indicators)
data = read_data(BASE_DIR)




Enter ____ miner-flows ____ Directory
Start Reading miner-outflow-mean...
miner-outflow-mean loaded succesfully :)

Start Reading miner-inflow-total...
miner-inflow-total loaded succesfully :)

Start Reading miner-inflow-mean-ma7...
miner-inflow-mean-ma7 loaded succesfully :)

Start Reading miner-depositing-transactions...
miner-depositing-transactions loaded succesfully :)

Start Reading miner-reserve-usd...
miner-reserve-usd loaded succesfully :)

Start Reading miner-outflow-total...
miner-outflow-total loaded succesfully :)

Start Reading miner-depositing-addresses...
miner-depositing-addresses loaded succesfully :)

Start Reading miner-inflow-top10...
miner-inflow-top10 loaded succesfully :)

Start Reading miner-outflow-top10...
miner-outflow-top10 loaded succesfully :)

Start Reading miner-withdrawing-addresses...
miner-withdrawing-addresses loaded succesfully :)

Start Reading miner-in-house-transactions...
miner-in-house-transactions loaded succesfully :)

Start Reading miner

In [3]:
# Convert indicator data from list to pandas dataframe
all_dataframes = convert_data_to_dataframe(data)

# Error Handling and Integrity Check

## Check for Datafram Date Integrity

In [4]:
check_date_integrity(all_dataframes['exchange-flows'], 'exchange-flows')
check_date_integrity(all_dataframes['flow-indicator'], 'flow-indicator')
check_date_integrity(all_dataframes['market-indicator'], 'market-indicator')
check_date_integrity(all_dataframes['network-indicator'], 'network-indicator')
check_date_integrity(all_dataframes['miner-flows'], 'miner-flows')
check_date_integrity(all_dataframes['derivatives'], 'derivatives')
check_date_integrity(all_dataframes['fund-data'], 'fund-data')
check_date_integrity(all_dataframes['market-data'], 'market-data')
check_date_integrity(all_dataframes['addresses'], 'addresses')
check_date_integrity(all_dataframes['fees-and-revenue'], 'fees-and-revenue')
check_date_integrity(all_dataframes['network-stats'], 'network-stats')
check_date_integrity(all_dataframes['supply'], 'supply')
check_date_integrity(all_dataframes['transactions'], 'transactions')
check_date_integrity(all_dataframes['inter-entity-flows'], 'inter-entity-flows')
check_date_integrity(all_dataframes['bank-flows'], 'bank-flows')
check_date_integrity(all_dataframes['research'], 'research')

Not all DataFrames have the same Date values for 'exchange-flows' directory.

Error: Can only compare identically-labeled Series objects for flow-indicator

All DataFrames have the same Date values for 'market-indicator' directory.

Not all DataFrames have the same Date values for 'network-indicator' directory.

Error: Can only compare identically-labeled Series objects for miner-flows

All DataFrames have the same Date values for 'derivatives' directory.

Error: Can only compare identically-labeled Series objects for fund-data

Error: Can only compare identically-labeled Series objects for market-data

All DataFrames have the same Date values for 'addresses' directory.

All DataFrames have the same Date values for 'fees-and-revenue' directory.

All DataFrames have the same Date values for 'network-stats' directory.

All DataFrames have the same Date values for 'supply' directory.

All DataFrames have the same Date values for 'transactions' directory.

Error: Can only compare identical

## Check for Datafram Shape Integrity

In [5]:
check_shape_integrity(all_dataframes['exchange-flows'], 'exchange-flows')
check_shape_integrity(all_dataframes['flow-indicator'], 'flow-indicator')
check_shape_integrity(all_dataframes['market-indicator'], 'market-indicator')
check_shape_integrity(all_dataframes['network-indicator'], 'network-indicator')
check_shape_integrity(all_dataframes['miner-flows'], 'miner-flows')
check_shape_integrity(all_dataframes['derivatives'], 'derivatives')
check_shape_integrity(all_dataframes['fund-data'], 'fund-data')
check_shape_integrity(all_dataframes['market-data'], 'market-data')
check_shape_integrity(all_dataframes['addresses'], 'addresses')
check_shape_integrity(all_dataframes['fees-and-revenue'], 'fees-and-revenue')
check_shape_integrity(all_dataframes['network-stats'], 'network-stats')
check_shape_integrity(all_dataframes['supply'], 'supply')
check_shape_integrity(all_dataframes['transactions'], 'transactions')
check_shape_integrity(all_dataframes['inter-entity-flows'], 'inter-entity-flows')
check_shape_integrity(all_dataframes['bank-flows'], 'bank-flows')
check_shape_integrity(all_dataframes['research'], 'research')

All DataFrames have the same shape values for 'exchange-flows' directory.
The shape is 1097.

Not all DataFrames have the same Date values for 'flow-indicator' directory.
Refrence: miners-position-index-mpi:1096 - Current:exchange-stablecoins-ratio-usd:1000

All DataFrames have the same shape values for 'market-indicator' directory.
The shape is 1096.

All DataFrames have the same shape values for 'network-indicator' directory.
The shape is 1096.

Not all DataFrames have the same Date values for 'miner-flows' directory.
Refrence: miner-outflow-mean:1097 - Current:miner-depositing-addresses:1096

All DataFrames have the same shape values for 'derivatives' directory.
The shape is 1097.

Not all DataFrames have the same Date values for 'fund-data' directory.
Refrence: fund-price-usd:754 - Current:fund-market-premium:773

Not all DataFrames have the same Date values for 'market-data' directory.
Refrence: market-cap:1095 - Current:coinbase-premium-index:1096

All DataFrames have the same sh

## Check for Datafram Period

### exchange-flows

In [6]:
check_period_integrity(all_dataframes['exchange-flows'], 'exchange-flows')

Period Integrity for _______ exchange-flows _______

('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-12', '2024-03-12')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')


MIN: 12
MAX: 11

### flow-indicator

In [7]:
check_period_integrity(all_dataframes['flow-indicator'], 'flow-indicator')

Period Integrity for _______ flow-indicator _______

('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-06-15', '2024-03-10')
('2021-06-15', '2024-03-10')


MIN: 11
MAX: 10

ToDo: Delete last two indicators.

In [8]:
all_dataframes['flow-indicator'] = drop_indicator(all_dataframes['flow-indicator'], based_on='start', month='06')

In [9]:
check_period_integrity(all_dataframes['flow-indicator'], 'flow-indicator')

Period Integrity for _______ flow-indicator _______

('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')


### market-indicator

In [10]:
check_period_integrity(all_dataframes['market-indicator'], 'market-indicator')

Period Integrity for _______ market-indicator _______

('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')


MIN: 11
MAX: 10

### network-indicator

In [11]:
check_period_integrity(all_dataframes['network-indicator'], 'network-indicator')

Period Integrity for _______ network-indicator _______

('2021-03-12', '2024-03-11')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-12', '2024-03-11')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')


MIN: 12
MAX: 10

### miner-flows

In [12]:
check_period_integrity(all_dataframes['miner-flows'], 'miner-flows')

Period Integrity for _______ miner-flows _______

('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-12', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-12', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-12', '2024-03-11')
('2021-03-12', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')


MIN: 12
MAX: 11

### derivatives

In [13]:
check_period_integrity(all_dataframes['derivatives'], 'derivatives')

Period Integrity for _______ derivatives _______

('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-11', '2024-03-11')


MIN: 11
MAX: 11

### fund-data

In [14]:
check_period_integrity(all_dataframes['fund-data'], 'fund-data')

Period Integrity for _______ fund-data _______

('2021-03-12', '2024-03-11')
('2021-03-12', '2024-03-08')
('2021-03-12', '2024-03-11')
('2021-03-12', '2024-03-08')


MIN: 12
MAX: 08

### market-data

In [15]:
check_period_integrity(all_dataframes['market-data'], 'market-data')

Period Integrity for _______ market-data _______

('2021-03-12', '2024-03-10')
('2021-03-12', '2024-03-11')
('2021-03-12', '2024-03-10')
('2021-03-12', '2024-03-10')
('2021-03-12', '2024-03-10')
('2021-03-12', '2024-03-11')
('2021-03-12', '2024-03-11')
('2021-03-12', '2024-03-10')


MIN: 12
MAX: 10

### addresses

In [16]:
check_period_integrity(all_dataframes['addresses'], 'addresses')

Period Integrity for _______ addresses _______

('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')


MIN: 11
MAX: 10

### fees-and-revenue

In [17]:
check_period_integrity(all_dataframes['fees-and-revenue'], 'fees-and-revenue')

Period Integrity for _______ fees-and-revenue _______

('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')
('2021-03-11', '2024-03-10')


MIN: 11
MAX: 10

### network-stats

In [18]:
check_period_integrity(all_dataframes['network-stats'], 'network-stats')

Period Integrity for _______ network-stats _______

('2021-03-12', '2024-03-10')
('2021-03-12', '2024-03-10')
('2021-03-12', '2024-03-10')
('2021-03-12', '2024-03-10')
('2021-03-12', '2024-03-10')
('2021-03-12', '2024-03-10')


MIN: 12
MAX: 10

### supply

In [19]:
check_period_integrity(all_dataframes['supply'], 'supply')

Period Integrity for _______ supply _______

('2021-03-12', '2024-03-10')
('2021-03-12', '2024-03-10')
('2021-03-12', '2024-03-10')


MIN: 12
MAX: 10

### transactions

In [20]:
check_period_integrity(all_dataframes['transactions'], 'transactions')

Period Integrity for _______ transactions _______

('2021-03-12', '2024-03-10')
('2021-03-12', '2024-03-10')
('2021-03-12', '2024-03-10')
('2021-03-12', '2024-03-10')
('2021-03-12', '2024-03-10')


MIN: 12
MAX: 10

### inter-entity-flows

In [21]:
check_period_integrity(all_dataframes['inter-entity-flows'], 'inter-entity-flows')

Period Integrity for _______ inter-entity-flows _______

('2021-03-12', '2024-03-10')
('2021-03-12', '2024-03-11')
('2021-03-12', '2024-03-11')
('2021-03-12', '2024-03-10')
('2021-03-12', '2024-03-08')
('2021-03-12', '2024-03-12')
('2021-03-12', '2024-03-10')
('2021-03-12', '2024-03-08')
('2021-03-12', '2024-03-12')
('2021-03-12', '2024-03-12')
('2021-03-12', '2024-03-11')
('2021-03-12', '2024-01-26')
('2021-03-12', '2024-03-11')
('2021-03-12', '2024-01-26')
('2021-03-12', '2024-03-11')
('2021-03-12', '2024-03-12')
('2021-03-12', '2024-01-26')
('2021-03-12', '2024-03-12')
('2021-03-12', '2024-03-08')
('2021-03-12', '2024-03-12')
('2021-03-12', '2024-03-11')


MIN: 12
MAX: 08

ToDo: Delete 11, 13, 16

In [22]:
all_dataframes['inter-entity-flows'] = drop_indicator(all_dataframes['inter-entity-flows'], based_on='end', month='01')

In [23]:
check_period_integrity(all_dataframes['inter-entity-flows'], 'inter-entity-flows')

Period Integrity for _______ inter-entity-flows _______

('2021-03-12', '2024-03-10')
('2021-03-12', '2024-03-11')
('2021-03-12', '2024-03-11')
('2021-03-12', '2024-03-10')
('2021-03-12', '2024-03-08')
('2021-03-12', '2024-03-12')
('2021-03-12', '2024-03-10')
('2021-03-12', '2024-03-08')
('2021-03-12', '2024-03-12')
('2021-03-12', '2024-03-12')
('2021-03-12', '2024-03-11')
('2021-03-12', '2024-03-11')
('2021-03-12', '2024-03-11')
('2021-03-12', '2024-03-12')
('2021-03-12', '2024-03-12')
('2021-03-12', '2024-03-08')
('2021-03-12', '2024-03-12')
('2021-03-12', '2024-03-11')


### bank-flows

In [24]:
check_period_integrity(all_dataframes['bank-flows'], 'bank-flows')

Period Integrity for _______ bank-flows _______

('2021-03-12', '2024-03-11')
('2021-03-12', '2024-03-11')
('2021-03-12', '2024-03-11')
('2021-03-12', '2024-03-11')
('2021-03-12', '2024-03-11')
('2021-03-12', '2024-03-11')
('2021-03-12', '2024-03-11')
('2021-03-12', '2024-03-11')
('2021-03-12', '2024-03-11')
('2021-03-12', '2024-03-11')
('2021-03-12', '2024-03-11')
('2021-03-12', '2024-03-11')
('2021-03-12', '2024-03-11')
('2021-03-12', '2024-03-11')
('2021-03-12', '2024-03-11')


MIN: 12
MAX: 11

### research

In [25]:
check_period_integrity(all_dataframes['research'], 'research')

Period Integrity for _______ research _______

('2021-03-12', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-12', '2024-03-11')
('2021-03-18', '2024-03-11')
('2021-03-12', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-12', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-12', '2024-03-11')
('2021-03-12', '2024-03-11')
('2021-03-18', '2024-03-11')


MIN: 18
MAX: 11

ToDo: Delete 3 and -1

In [26]:
all_dataframes['research'] = drop_indicator(all_dataframes['research'], based_on='start', day='18')

In [27]:
check_period_integrity(all_dataframes['research'], 'research')

Period Integrity for _______ research _______

('2021-03-12', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-12', '2024-03-11')
('2021-03-12', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-12', '2024-03-11')
('2021-03-11', '2024-03-11')
('2021-03-12', '2024-03-11')
('2021-03-12', '2024-03-11')


Indeed the aligned period is '2021-03-12', '2024-03-08'.

# Align DataFrames

In [28]:
align_dataframes(all_dataframes['exchange-flows'], 'exchange-flows')
align_dataframes(all_dataframes['flow-indicator'], 'flow-indicator')
align_dataframes(all_dataframes['market-indicator'], 'market-indicator')
align_dataframes(all_dataframes['network-indicator'], 'network-indicator')
align_dataframes(all_dataframes['miner-flows'], 'miner-flows')
align_dataframes(all_dataframes['derivatives'], 'derivatives')
align_dataframes(all_dataframes['fund-data'], 'fund-data')
align_dataframes(all_dataframes['market-data'], 'market-data')
align_dataframes(all_dataframes['addresses'], 'addresses')
align_dataframes(all_dataframes['fees-and-revenue'], 'fees-and-revenue')
align_dataframes(all_dataframes['network-stats'], 'network-stats')
align_dataframes(all_dataframes['supply'], 'supply')
align_dataframes(all_dataframes['transactions'], 'transactions')
align_dataframes(all_dataframes['inter-entity-flows'], 'inter-entity-flows')
align_dataframes(all_dataframes['bank-flows'], 'bank-flows')
align_dataframes(all_dataframes['research'], 'research')

exchange-flows DataFrames aligned succeessfully :)
flow-indicator DataFrames aligned succeessfully :)
market-indicator DataFrames aligned succeessfully :)
network-indicator DataFrames aligned succeessfully :)
miner-flows DataFrames aligned succeessfully :)


derivatives DataFrames aligned succeessfully :)
fund-data DataFrames aligned succeessfully :)
market-data DataFrames aligned succeessfully :)
addresses DataFrames aligned succeessfully :)
fees-and-revenue DataFrames aligned succeessfully :)
network-stats DataFrames aligned succeessfully :)
supply DataFrames aligned succeessfully :)
transactions DataFrames aligned succeessfully :)
inter-entity-flows DataFrames aligned succeessfully :)
bank-flows DataFrames aligned succeessfully :)
research DataFrames aligned succeessfully :)


# Recheck for alignment

In [30]:
check_date_integrity(all_dataframes['exchange-flows'], 'exchange-flows')
check_date_integrity(all_dataframes['flow-indicator'], 'flow-indicator')
check_date_integrity(all_dataframes['market-indicator'], 'market-indicator')
check_date_integrity(all_dataframes['network-indicator'], 'network-indicator')
check_date_integrity(all_dataframes['miner-flows'], 'miner-flows')
check_date_integrity(all_dataframes['derivatives'], 'derivatives')
check_date_integrity(all_dataframes['fund-data'], 'fund-data')
check_date_integrity(all_dataframes['market-data'], 'market-data')
check_date_integrity(all_dataframes['addresses'], 'addresses')
check_date_integrity(all_dataframes['fees-and-revenue'], 'fees-and-revenue')
check_date_integrity(all_dataframes['network-stats'], 'network-stats')
check_date_integrity(all_dataframes['supply'], 'supply')
check_date_integrity(all_dataframes['transactions'], 'transactions')
check_date_integrity(all_dataframes['inter-entity-flows'], 'inter-entity-flows')
check_date_integrity(all_dataframes['bank-flows'], 'bank-flows')
check_date_integrity(all_dataframes['research'], 'research')

All DataFrames have the same Date values for 'exchange-flows' directory.

All DataFrames have the same Date values for 'flow-indicator' directory.

All DataFrames have the same Date values for 'market-indicator' directory.

All DataFrames have the same Date values for 'network-indicator' directory.

All DataFrames have the same Date values for 'miner-flows' directory.

All DataFrames have the same Date values for 'derivatives' directory.

Error: Can only compare identically-labeled Series objects for fund-data

All DataFrames have the same Date values for 'market-data' directory.

All DataFrames have the same Date values for 'addresses' directory.

All DataFrames have the same Date values for 'fees-and-revenue' directory.

All DataFrames have the same Date values for 'network-stats' directory.

All DataFrames have the same Date values for 'supply' directory.

All DataFrames have the same Date values for 'transactions' directory.

All DataFrames have the same Date values for 'inter-enti