In [4]:
import pandas as pd
import logging

# Basic logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Function to load a JSON lines file
def load_json(file_path):
    try:
        return pd.read_json(file_path, lines=True)
    except ValueError as e:
        logging.error(f"Failed to load data from {file_path}: {e}")
        return pd.DataFrame() 
    except Exception as e:
        logging.error(f"An unexpected error occurred: {e}")
        return pd.DataFrame() 
    
# Load data into pandas DataFrames: each list contains three DataFrames (one for each file)
full_channel_files = ["../data/raw/FullChannel_GDAX_20220511_17hr.json", "../data/raw/FullChannel_GDAX_20220511_19hr.json", "../data/raw/FullChannel_GDAX_20220511_20hr.json"]
ticker_files = ["../data/raw/Ticker_GDAX_20220511_17hr.json", "../data/raw/Ticker_GDAX_20220511_19hr.json", "../data/raw/Ticker_GDAX_20220511_20hr.json"]

full_channel_data = [load_json(file) for file in full_channel_files]
ticker_data = [load_json(file) for file in ticker_files]

if any(df.empty for df in full_channel_data + ticker_data):
    logging.warning("One or more files failed to load, check logs for details.")

# Concatenate data
combined_full_channel = pd.concat(full_channel_data, ignore_index=True) if full_channel_data else pd.DataFrame()
combined_ticker = pd.concat(ticker_data, ignore_index=True) if ticker_data else pd.DataFrame()

if not combined_full_channel.empty:
    logging.info(f"Loaded FullChannel data with {combined_full_channel.shape[0]} rows and {combined_full_channel.shape[1]} columns.")
if not combined_ticker.empty:
    logging.info(f"Loaded Ticker data with {combined_ticker.shape[0]} rows and {combined_ticker.shape[1]} columns.")

2024-04-29 03:27:43,193 - INFO - Loaded FullChannel data with 1774678 rows and 18 columns.
2024-04-29 03:27:43,194 - INFO - Loaded Ticker data with 102805 rows and 15 columns.


In [5]:
print(combined_full_channel)

                                     order_id order_type      size  price  \
0        04074a2a-ff4d-40f8-a921-d88ece5d1562      limit   281.146   2.06   
1        04074a2a-ff4d-40f8-a921-d88ece5d1562        NaN       NaN   2.06   
2        0299ed2d-d33d-4313-a1d4-b74ce9cc9f26      limit  2324.238   2.33   
3                                         NaN        NaN   374.806   2.33   
4        474813db-2329-4aba-a07b-b1adea78da8f        NaN       NaN   2.33   
...                                       ...        ...       ...    ...   
1774673  7478edba-b1fd-4ea0-934e-dfd8cb3107fa        NaN       NaN   1.13   
1774674  44b6917b-b419-4310-a6a1-86d80b797c0e      limit    56.833   1.25   
1774675  44b6917b-b419-4310-a6a1-86d80b797c0e        NaN       NaN   1.25   
1774676  d4e32f4a-c08b-459c-a19f-5c750a8e1894      limit    20.000   1.15   
1774677  d4e32f4a-c08b-459c-a19f-5c750a8e1894        NaN       NaN   1.15   

                                   client_oid      type  side product_id  \

In [6]:
print(combined_ticker)

          type    sequence product_id  price  open_24h    volume_24h  low_24h  \
0       ticker  1292614427  WLUNA-USD   2.42     31.40  3.773185e+07     0.95   
1       ticker  1292614429  WLUNA-USD   2.42     31.40  3.773188e+07     0.95   
2       ticker  1292614431  WLUNA-USD   2.42     31.40  3.773190e+07     0.95   
3       ticker  1292614433  WLUNA-USD   2.42     31.40  3.773217e+07     0.95   
4       ticker  1292614468  WLUNA-USD   2.44     31.40  3.773217e+07     0.95   
...        ...         ...        ...    ...       ...           ...      ...   
102800  ticker  1294996328  WLUNA-USD   1.26     24.85  6.043075e+07     0.95   
102801  ticker  1294996345  WLUNA-USD   1.26     24.85  6.043076e+07     0.95   
102802  ticker  1294996347  WLUNA-USD   1.26     24.85  6.043076e+07     0.95   
102803  ticker  1294996349  WLUNA-USD   1.27     24.85  6.043078e+07     0.95   
102804  ticker  1294996441  WLUNA-USD   1.27     24.85  6.043078e+07     0.95   

        high_24h    volume_

In [7]:
# Save the combined data frames
combined_full_channel.to_csv("../data/raw/full_channel.csv", index=False)
combined_ticker.to_csv("../data/raw/ticker.csv", index=False)