# Amalgamator

In [1]:
import pandas as pd
from datetime import datetime, timedelta


In [2]:
coinmetrics = pd.read_parquet('coin-metrics.parquet')
yahoo = pd.read_parquet('yahoo-finance.parquet')

In [3]:
combined_df = pd.merge(coinmetrics, yahoo, left_index=True, right_index=True, how='outer')

yfinance_metrics = [
    "ETH-BTC_High",
    "ETH-BTC_Low",
    "ETH-CAD_High",
    "ETH-CAD_Low",
    "BTC-CAD_High",
    "BTC-CAD_Low",
    'Global_Liquidity_Index'
]

coinmetrics_metrics = [
    "AdrBalNtv0.01Cnt",
    "AdrBalNtv0.1Cnt",
    "AdrBalNtv1Cnt",
    "AdrBalNtv10Cnt",
    "BlkSizeMeanByte",
    "CapRealUSD",
    "FeeByteMeanNtv",
    "FlowInExNtv",
    "FlowOutExNtv",
    "FlowTfrFromExCnt",
    "NDF",
    "SplyAct1d",
    "SplyActPct1yr",
    "TxCnt",
    "VelCur1yr"
]

# For BTC
btc_prefixed_metrics = ['btc_' + metric for metric in coinmetrics_metrics]

# For ETH
eth_prefixed_metrics = ['eth_' + metric for metric in coinmetrics_metrics]

# Combine both lists if you need a single list with all prefixed metrics
combined_prefixed_metrics = btc_prefixed_metrics + eth_prefixed_metrics

metrics = yfinance_metrics + combined_prefixed_metrics
combined_df = combined_df[metrics]

In [4]:
combined_df.dropna(inplace=True)

In [5]:
combined_df 

Unnamed: 0,ETH-BTC_High,ETH-BTC_Low,ETH-CAD_High,ETH-CAD_Low,BTC-CAD_High,BTC-CAD_Low,Global_Liquidity_Index,btc_AdrBalNtv0.01Cnt,btc_AdrBalNtv0.1Cnt,btc_AdrBalNtv1Cnt,...,eth_CapRealUSD,eth_FeeByteMeanNtv,eth_FlowInExNtv,eth_FlowOutExNtv,eth_FlowTfrFromExCnt,eth_NDF,eth_SplyAct1d,eth_SplyActPct1yr,eth_TxCnt,eth_VelCur1yr
2019-01-01,0.036718,0.035544,192.649857,180.575409,5246.870117,5057.234375,10677.402020,6457193.0,2539967.0,702986.0,...,28877687727.31675,0.000004,392978.197625,419726.133113,15602.0,0.635803,11221087.882036,72.295246,448168.0,11.758691
2019-01-02,0.039858,0.036676,213.886246,191.795868,5388.129883,5191.100586,11978.834450,6458964.0,2541704.0,703693.0,...,28990812437.411285,0.000003,790231.396805,729568.379931,21191.0,0.636376,10343209.345505,72.264375,589959.0,11.643999
2019-01-03,0.039754,0.038425,212.573837,198.451218,5362.917969,5159.898926,11919.945247,6459493.0,2542722.0,704873.0,...,28981719897.825188,0.000003,654169.139428,650444.543236,20336.0,0.636168,9498193.91421,72.221087,596620.0,11.529535
2019-01-04,0.040691,0.038730,211.151260,199.472687,5172.040527,5087.920898,11670.483300,6463332.0,2543644.0,705051.0,...,29094778907.792999,0.000003,680241.364599,1184203.323531,21142.0,0.636041,14212229.620577,72.193628,549398.0,11.365511
2019-01-05,0.041418,0.040067,215.118729,206.441116,5223.182617,5131.535645,10776.359592,6474689.0,2545280.0,705233.0,...,29046828453.278553,0.000005,631193.304059,1490287.90147,17419.0,0.637131,12886643.253894,72.080216,511189.0,11.22179
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-02-22,0.057708,0.056463,4080.433105,3889.075195,70823.007812,68408.390625,149267.862256,12514606.0,4557892.0,1018084.0,...,177970045703.737518,0.000005,338244.178803,369824.650815,115266.0,0.680495,12715855.813516,35.492136,1082822.0,6.038789
2024-02-23,0.058400,0.056821,4086.931885,3920.955078,70136.312500,68749.429688,148970.534800,12502806.0,4557570.0,1018299.0,...,177548830056.101593,0.000004,252603.100873,277404.199378,123997.0,0.680803,11815753.076549,35.465276,1159988.0,6.035038
2024-02-24,0.058169,0.057328,4035.814941,3928.526855,69447.539062,68190.140625,145602.136981,12500030.0,4558351.0,1018191.0,...,178232114484.84201,0.000002,155983.333289,186391.16255,108806.0,0.680875,11198452.093627,35.44651,1101339.0,6.028215
2024-02-25,0.058235,0.057437,4050.259277,3921.470459,69703.890625,68222.062500,145897.798533,12490326.0,4557653.0,1018266.0,...,179512181502.223663,0.000002,227567.047768,215196.061444,105301.0,0.681513,10462790.19109,35.402746,1102202.0,6.025079


In [6]:
import pandas as pd

# Assuming combined_df is already defined and contains the columns 'ETH-CAD_High' and 'ETH-CAD_Low'

# Calculate the percentage difference between 'ETH-CAD_High' and 'ETH-CAD_Low' for each row
combined_df['Perc_Difference'] = ((combined_df['ETH-CAD_High'] - combined_df['ETH-CAD_Low']) / combined_df['ETH-CAD_Low']) * 100

# Select the last 365 rows and calculate the average percentage difference
average_perc_difference_last_365 = combined_df['Perc_Difference'].tail(365).mean()
med_perc_difference_last_365 = combined_df['Perc_Difference'].tail(365).median()

# Display the average percentage difference
print(f"The average percentage difference between ETH-CAD_High and ETH-CAD_Low for the last 365 rows is: {average_perc_difference_last_365:.2f}%")
# Display the average percentage difference
print(f"The median percentage difference between ETH-CAD_High and ETH-CAD_Low for the last 365 rows is: {med_perc_difference_last_365:.2f}%")


The average percentage difference between ETH-CAD_High and ETH-CAD_Low for the last 365 rows is: 3.39%
The median percentage difference between ETH-CAD_High and ETH-CAD_Low for the last 365 rows is: 2.91%


In [7]:
combined_df.to_parquet('amalgamated.parquet')