In [1]:
import pandas as pd
from os import listdir
from os.path import isfile, join
from cryptocmd import CmcScraper #market cap data
from rich.progress import track

In [2]:
#Import all csv and save them in dict with filename (no extension, no USDT) as key
path = "Crypto_Data_Hourly//"
file_names = [f for f in listdir(path) if isfile(join(path, f))]
file_names.remove("BTTCUSDT.csv") #super tiny & no market cap data

dataframe_dict = {}

for file_name in file_names:
    dataframe_dict[file_name[:-8]] = pd.read_csv(path + file_name)
    dataframe_dict[file_name[:-8]]["Open Time"] = pd.to_datetime(dataframe_dict[file_name[:-8]]["Open Time"])

In [3]:
#getting marketcap data
## fixing some names for coinmarketcap data
coinmarketcap_names = {}
for crypto in dataframe_dict:
    coinmarketcap_names[crypto] = crypto
coinmarketcap_names["IOTA"] = "MIOTA" #actually only iota had problems

##scraping coinmarketcap and joining with original dfs
for crypto in track(dataframe_dict):
    scraper = CmcScraper(coinmarketcap_names[crypto])
    market_cap = scraper.get_dataframe()[["Date", "Market Cap"]]
    market_cap = market_cap.rename(columns = {"Date": "Open Time", "Market Cap": "Market Cap"})
    dataframe_dict[crypto] = pd.merge(dataframe_dict[crypto], market_cap, on = "Open Time")

Output()

In [16]:
df1 = dataframe_dict["BTC"]
df2 = dataframe_dict["ETH"]

min(dataframe_dict["BTC"]["Open Time"])

Timestamp('2017-08-17 00:00:00')

In [45]:
#market cap dataframe
all_market_caps = pd.DataFrame()
all_market_caps["Open Time"] = dataframe_dict["BTC"]["Open Time"] #Bitcoin because it has the largest range of dates

for crypto in (dataframe_dict):
    all_market_caps = pd.merge(all_market_caps, dataframe_dict[crypto][["Open Time", "Market Cap"]], on = "Open Time", how = "outer")
    all_market_caps = all_market_caps.rename(columns={all_market_caps.columns[-1]: crypto})
all_market_caps = all_market_caps.fillna(0)

all_market_caps.head(3)

Unnamed: 0,Open Time,1INCH,AAVE,ADA,ALGO,AMP,APE,AR,ATOM,AVAX,...,XEM,XLM,XMR,XRP,XTZ,YFI,ZEC,ZEN,ZIL,ZRX
0,2017-08-17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2017-08-18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2017-08-19,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [87]:
#relative market cap dataframe
rel_market_caps = all_market_caps.copy(deep=True)

#transpose dataframe
rel_market_caps = rel_market_caps.transpose()
new_header = rel_market_caps.iloc[0] #grab the first row for the header
rel_market_caps = rel_market_caps[1:] #take the data less the header row
rel_market_caps.columns = new_header #set the header row as the df header

#divide each column by sum of market caps to get relative market cap
rel_market_caps = rel_market_caps / rel_market_caps.sum()
assert sum(rel_market_caps.sum()) == 1800 #assert if sums of all dates are 1 * 1800 dates

#transpose dataframe back
rel_market_caps = rel_market_caps.transpose()
rel_market_caps = rel_market_caps.reset_index(level=0)
assert rel_market_caps.columns.all() == all_market_caps.columns.all() #assert if transposing was successful
assert sum(rel_market_caps.loc[:, rel_market_caps.columns != "Open Time"].sum(axis = 1)) == 1800 #assert if sums of all dates are 1 * 1800 dates

rel_market_caps["BTC"]

0       0.716099
1       0.711859
2       0.712157
3       0.704105
4       0.685847
          ...   
1795     0.51779
1796    0.506217
1797    0.516069
1798    0.522473
1799    0.513895
Name: BTC, Length: 1800, dtype: object

In [29]:
basket = dataframe_dict["BTC"].copy(deep=True) #Bitcoin because it has the largest range of dates
basket[["Open", "High", "Low", "Close", "Volume", "Quote Asset Volume", "Number of Trades", "TB Base Volume",\
    "TB Quote Volume", "Ignore", "Market Cap"]] = 0
basket

Unnamed: 0,Open Time,Open Time.1,Open,High,Low,Close,Volume,Close Time,Quote Asset Volume,Number of Trades,TB Base Volume,TB Quote Volume,Ignore,Market Cap
0,2017-08-17,2017-08-17 00:00:00.000000000,0,0,0,0,0,2017-08-17 23:59:59.999000064,0,0,0,0,0,0
1,2017-08-18,2017-08-18 00:00:00.000000000,0,0,0,0,0,2017-08-18 23:59:59.999000064,0,0,0,0,0,0
2,2017-08-19,2017-08-19 00:00:00.000000000,0,0,0,0,0,2017-08-19 23:59:59.999000064,0,0,0,0,0,0
3,2017-08-20,2017-08-20 00:00:00.000000000,0,0,0,0,0,2017-08-20 23:59:59.999000064,0,0,0,0,0,0
4,2017-08-21,2017-08-21 00:00:00.000000000,0,0,0,0,0,2017-08-21 23:59:59.999000064,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1795,2022-07-17,2022-07-17 00:00:00.000000000,0,0,0,0,0,2022-07-17 23:59:59.999000064,0,0,0,0,0,0
1796,2022-07-18,2022-07-18 00:00:00.000000000,0,0,0,0,0,2022-07-18 23:59:59.999000064,0,0,0,0,0,0
1797,2022-07-19,2022-07-19 00:00:00.000000000,0,0,0,0,0,2022-07-19 23:59:59.999000064,0,0,0,0,0,0
1798,2022-07-20,2022-07-20 00:00:00.000000000,0,0,0,0,0,2022-07-20 23:59:59.999000064,0,0,0,0,0,0


In [15]:
start_date = min(dataframe_dict["XRP"]["Open Time"])
for crypto in (dataframe_dict):
    start_date_cand = min(dataframe_dict[crypto]["Open Time"])
    if start_date_cand < start_date: start_date = start_date_cand

end_date = 
start_date

Timestamp('2017-08-17 00:00:00')