# Amalgamator

In [1]:
import pandas as pd
from datetime import datetime, timedelta


In [2]:
coinmetrics = pd.read_parquet('coin-metrics.parquet')
yahoo = pd.read_parquet('yahoo-finance.parquet')

In [3]:
combined_df = pd.merge(coinmetrics, yahoo, left_index=True, right_index=True, how='outer')

yfinance_metrics = [
    "ETH-BTC_High",
    "ETH-BTC_Low",
    "ETH-CAD_High",
    "ETH-CAD_Low",
    "BTC-CAD_High",
    "BTC-CAD_Low",
    'Global_Liquidity_Index'
]

coinmetrics_metrics = [
    "AdrBalNtv0.01Cnt",
    "AdrBalNtv0.1Cnt",
    "AdrBalNtv1Cnt",
    "AdrBalNtv10Cnt",
    "BlkSizeMeanByte",
    "CapRealUSD",
    "FeeByteMeanNtv",
    "FlowInExNtv",
    "FlowOutExNtv",
    "FlowTfrFromExCnt",
    "NDF",
    "SplyAct1d",
    "SplyActPct1yr",
    "TxCnt",
    "VelCur1yr"
]

# For BTC
btc_prefixed_metrics = ['btc_' + metric for metric in coinmetrics_metrics]

# For ETH
eth_prefixed_metrics = ['eth_' + metric for metric in coinmetrics_metrics]

# Combine both lists if you need a single list with all prefixed metrics
combined_prefixed_metrics = btc_prefixed_metrics + eth_prefixed_metrics

metrics = yfinance_metrics + combined_prefixed_metrics
combined_df = combined_df[metrics]

combined_df.dropna(inplace=True)

# Perform weekly aggregation using the average for each column
combined_df = combined_df.resample('W').median()

In [4]:
combined_df 

Unnamed: 0,ETH-BTC_High,ETH-BTC_Low,ETH-CAD_High,ETH-CAD_Low,BTC-CAD_High,BTC-CAD_Low,Global_Liquidity_Index,btc_AdrBalNtv0.01Cnt,btc_AdrBalNtv0.1Cnt,btc_AdrBalNtv1Cnt,...,eth_CapRealUSD,eth_FeeByteMeanNtv,eth_FlowInExNtv,eth_FlowOutExNtv,eth_FlowTfrFromExCnt,eth_NDF,eth_SplyAct1d,eth_SplyActPct1yr,eth_TxCnt,eth_VelCur1yr
2019-01-06,0.039806,0.038577,212.861397,198.961952,5304.894043,5124.658447,11340.606080,6461412.5,2543183.0,704962.0,...,29018820445.344917,0.000003,667205.252013,690006.461584,19225.5,0.636272,10782148.613771,72.207358,530293.5,11.447523
2019-01-13,0.037518,0.034565,201.255341,167.438217,5376.820801,4848.249023,11612.244253,6482061.0,2548197.0,706522.0,...,28663670823.5494,0.000003,559481.952289,496416.452759,20872.0,0.638397,10112044.745759,71.725174,550964.0,10.583022
2019-01-20,0.033768,0.033124,166.959579,159.659409,4936.204590,4808.473633,11061.689523,6547185.0,2558280.0,708796.0,...,28314417747.238216,0.000004,438547.508157,458874.107464,18192.0,0.638271,11738107.990957,71.951333,538899.0,9.72648
2019-01-27,0.032671,0.032355,157.590332,153.457352,4829.049805,4749.490723,10902.737330,6588638.0,2562562.0,711914.0,...,27934971350.679241,0.000003,390411.930389,366666.745414,16613.0,0.637236,11044056.233823,71.550401,520130.0,9.205846
2019-02-03,0.031439,0.030559,144.951599,139.241577,4613.910645,4527.285645,10448.179303,6649261.0,2568307.0,714664.0,...,27622742238.969894,0.000003,386331.94357,400485.468348,17297.0,0.637222,10427630.110494,70.949492,517057.0,8.835763
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-02-04,0.053801,0.053481,3113.128662,3042.415771,58429.691406,56831.171875,122454.180197,12619292.0,4572105.0,1019807.0,...,166826521384.526001,0.000003,333278.414166,335365.852424,102151.0,0.677078,12173746.145186,36.08737,1115710.0,6.050232
2024-02-11,0.053729,0.053706,3289.232178,3173.687988,59694.683594,57678.031250,125879.094272,12572725.0,4567171.0,1019356.0,...,168141406775.576691,0.000005,347428.606404,369227.537384,103803.0,0.678218,11882806.095639,35.867592,1075272.0,6.048036
2024-02-18,0.053777,0.053241,3774.248291,3555.355225,70401.203125,66844.632812,146207.883586,12511730.0,4556762.0,1017938.0,...,174331122786.240967,0.000003,306990.77022,337812.22673,96898.0,0.67948,12355803.333117,35.737851,1102512.0,6.039153
2024-02-25,0.057843,0.056463,4050.259277,3889.075195,70593.679688,68691.062500,148970.534800,12502508.0,4557570.0,1018191.0,...,177742339420.690826,0.000004,338244.178803,369824.650815,107578.0,0.680504,11852417.755627,35.479217,1102202.0,6.035038


In [5]:
import pandas as pd

# Assuming combined_df is already defined and contains the columns 'ETH-CAD_High' and 'ETH-CAD_Low'

# Calculate the percentage difference between 'ETH-CAD_High' and 'ETH-CAD_Low' for each row
combined_df['Perc_Difference'] = ((combined_df['ETH-CAD_High'] - combined_df['ETH-CAD_Low']) / combined_df['ETH-CAD_Low']) * 100

# Select the last 365 rows and calculate the average percentage difference
average_perc_difference_last_365 = combined_df['Perc_Difference'].tail(365).mean()
med_perc_difference_last_365 = combined_df['Perc_Difference'].tail(365).median()

# Display the average percentage difference
print(f"The average percentage difference between ETH-CAD_High and ETH-CAD_Low for the last 365 rows is: {average_perc_difference_last_365:.2f}%")
# Display the average percentage difference
print(f"The median percentage difference between ETH-CAD_High and ETH-CAD_Low for the last 365 rows is: {med_perc_difference_last_365:.2f}%")


The average percentage difference between ETH-CAD_High and ETH-CAD_Low for the last 365 rows is: 5.77%
The median percentage difference between ETH-CAD_High and ETH-CAD_Low for the last 365 rows is: 4.56%


In [6]:
combined_df.to_parquet('amalgamated.parquet')