In [1]:
import pandas as pd
import json
from pathlib import Path
import os


In [4]:
df = pd.read_csv("./data/ABNB_12_months.csv") # just to remember what the data looks like
df

Unnamed: 0,ms_of_day,open,high,low,close,volume,count,date,time
0,34200000,134.43,134.50,134.41,134.50,300.0,20.0,2024-01-02,09:30:00
1,34201000,134.43,134.50,134.41,134.50,76464.0,52.0,2024-01-02,09:30:01
2,34202000,134.42,134.42,134.36,134.36,4084.0,33.0,2024-01-02,09:30:02
3,34203000,134.34,134.37,134.34,134.37,986.0,92.0,2024-01-02,09:30:03
4,34204000,134.28,134.32,134.28,134.32,471.0,15.0,2024-01-02,09:30:04
...,...,...,...,...,...,...,...,...,...
5311795,57595000,137.37,137.40,137.35,137.40,1880.0,27.0,2024-11-22,15:59:55
5311796,57596000,137.38,137.40,137.35,137.35,3602.0,40.0,2024-11-22,15:59:56
5311797,57597000,137.35,137.36,137.32,137.34,1122.0,25.0,2024-11-22,15:59:57
5311798,57598000,137.33,137.34,137.31,137.32,1142.0,25.0,2024-11-22,15:59:58


In [None]:
data_folder = Path("data") 
output_folder = Path("data_json") # new folder for the json files
output_folder.mkdir(exist_ok=True)

for csv_file in data_folder.glob("*.csv"):
    ticker = csv_file.stem.split("_")[0]
    df = pd.read_csv(csv_file, parse_dates=["date"])
    df['time'] = pd.to_datetime(df['time']).dt.time.astype(str)

    # i do this in order to preserve all info, though may not be necessary if we want it more lightweight
    # if want to make it more lightweight can get rid of volume and count, though marginally
    grouped = {str(date): group[['time','open','high','low','close','volume','count']].to_dict(orient="records") for date, group in df.groupby(df['date'].dt.date)}

    json_data = {"ticker": ticker, "days": grouped}

    with open(output_folder / f"{ticker}.json", "w") as f:
        json.dump(json_data, f)

In [2]:
# sanity check
size_mb = os.path.getsize("./data_json/ABNB.json") / (1024 * 1024)
print(f"file size: {size_mb:.2f}mb")
df = pd.read_json("./data_json/ABNB.json")
df

file size: 593.88mb


Unnamed: 0,ticker,days
2024-01-02,ABNB,"[{'time': '09:30:00', 'open': 134.43, 'high': ..."
2024-01-03,ABNB,"[{'time': '09:30:00', 'open': 132.71, 'high': ..."
2024-01-04,ABNB,"[{'time': '09:30:00', 'open': 132.86, 'high': ..."
2024-01-05,ABNB,"[{'time': '09:30:00', 'open': 134.25, 'high': ..."
2024-01-08,ABNB,"[{'time': '09:30:00', 'open': 137.31, 'high': ..."
...,...,...
2024-11-18,ABNB,"[{'time': '09:30:00', 'open': 132.56, 'high': ..."
2024-11-19,ABNB,"[{'time': '09:30:00', 'open': 131.15, 'high': ..."
2024-11-20,ABNB,"[{'time': '09:30:00', 'open': 130.22, 'high': ..."
2024-11-21,ABNB,"[{'time': '09:30:00', 'open': 134.18, 'high': ..."
