In [2]:
import pandas as pd
import logging
import os

# Basic logging setup
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

def load_json(file_path):
    if not os.path.exists(file_path):
        logging.error(f"File not found: {file_path}")
        return pd.DataFrame()
    
    try:
        return pd.read_json(file_path, lines=True)
    except ValueError as e:
        logging.error(f"Failed to load data from {file_path}: {e}")
        return pd.DataFrame()
    except Exception as e:
        logging.error(f"An unexpected error occurred: {e}")
        return pd.DataFrame()

def load_data():
    """
    Loads data into pandas DataFrames from JSON files specified in environment variables or defaults.
    
    Returns: 
        combined_full_channel (pd.DataFrame): Concatenated DataFrame for FullChannel data.
        combined_ticker (pd.DataFrame): Concatenated DataFrame for Ticker data.
    """
    # Environment variables for file paths or default paths
    full_channel_files = os.getenv("FULL_CHANNEL_FILES", "../data/raw/FullChannel_GDAX_20220511_17hr.json,../data/raw/FullChannel_GDAX_20220511_19hr.json,../data/raw/FullChannel_GDAX_20220511_20hr.json").split(",")
    ticker_files = os.getenv("TICKER_FILES", "../data/raw/Ticker_GDAX_20220511_17hr.json,../data/raw/Ticker_GDAX_20220511_19hr.json,../data/raw/Ticker_GDAX_20220511_20hr.json").split(",")

    full_channel_data = [load_json(file) for file in full_channel_files if os.path.exists(file)]
    ticker_data = [load_json(file) for file in ticker_files if os.path.exists(file)]

    # Concatenate data
    combined_full_channel = pd.concat(full_channel_data, ignore_index=True) if full_channel_data else pd.DataFrame()
    combined_ticker = pd.concat(ticker_data, ignore_index=True) if ticker_data else pd.DataFrame()

    if combined_full_channel.empty:
        logging.warning("No FullChannel data loaded.")
    else:
        logging.info(f"Loaded FullChannel data with {combined_full_channel.shape[0]} rows and {combined_full_channel.shape[1]} columns.")

    if combined_ticker.empty:
        logging.warning("No Ticker data loaded.")
    else:
        logging.info(f"Loaded Ticker data with {combined_ticker.shape[0]} rows and {combined_ticker.shape[1]} columns.")

    return combined_full_channel, combined_ticker

In [4]:
full_channel, ticker = load_data()

print(full_channel.head())

2024-04-30 19:24:28,016 - INFO - Loaded FullChannel data with 1774678 rows and 18 columns.
2024-04-30 19:24:28,016 - INFO - Loaded Ticker data with 102805 rows and 15 columns.


                               order_id order_type      size  price  \
0  04074a2a-ff4d-40f8-a921-d88ece5d1562      limit   281.146   2.06   
1  04074a2a-ff4d-40f8-a921-d88ece5d1562        NaN       NaN   2.06   
2  0299ed2d-d33d-4313-a1d4-b74ce9cc9f26      limit  2324.238   2.33   
3                                   NaN        NaN   374.806   2.33   
4  474813db-2329-4aba-a07b-b1adea78da8f        NaN       NaN   2.33   

                             client_oid      type  side product_id  \
0  0278c289-d977-44e0-9b3a-ff4e82b8dda5  received   buy  WLUNA-USD   
1                                   NaN      open   buy  WLUNA-USD   
2  b1f276a7-e271-4c82-9fa7-a55451507f82  received  sell  WLUNA-USD   
3                                   NaN     match   buy  WLUNA-USD   
4                                   NaN      done   buy  WLUNA-USD   

                          time    sequence  remaining_size   trade_id  \
0  2022-05-11T15:59:00.796073Z  1292598749             NaN        NaN   
1  202

In [5]:
print(ticker.head())

     type    sequence product_id  price  open_24h    volume_24h  low_24h  \
0  ticker  1292614427  WLUNA-USD   2.42      31.4  3.773185e+07     0.95   
1  ticker  1292614429  WLUNA-USD   2.42      31.4  3.773188e+07     0.95   
2  ticker  1292614431  WLUNA-USD   2.42      31.4  3.773190e+07     0.95   
3  ticker  1292614433  WLUNA-USD   2.42      31.4  3.773217e+07     0.95   
4  ticker  1292614468  WLUNA-USD   2.44      31.4  3.773217e+07     0.95   

   high_24h    volume_30d  best_bid  best_ask  side  \
0      32.7  5.215304e+07      2.42      2.44  sell   
1      32.7  5.215306e+07      2.42      2.44  sell   
2      32.7  5.215308e+07      2.42      2.44  sell   
3      32.7  5.215335e+07      2.42      2.44  sell   
4      32.7  5.215335e+07      2.42      2.44   buy   

                          time  trade_id  last_size  
0  2022-05-11T15:59:59.959016Z   7884870     11.525  
1  2022-05-11T15:59:59.959016Z   7884871     28.646  
2  2022-05-11T15:59:59.959016Z   7884872     14.00

In [6]:
# Save the combined data frames
full_channel.to_csv("../data/raw/full_channel.csv", index=False)
ticker.to_csv("../data/raw/ticker.csv", index=False)