In [None]:
import os
import subprocess
import json

import pandas as pd
import numpy as np

from numba import jit
from typing import Tuple
from tqdm import tqdm

# Running it live

## Set up the arrays and functions used in advance

Alternatively they can also be set up when "BEGIN" is read

In [None]:
num_markets = 12001
num_data_points = 100000

market_data = np.empty((num_markets, num_data_points, 4)) * np.nan
market_analysis = np.empty((num_markets, 5)) * np.nan

In [None]:
def get_data(line: str) -> Tuple[int, int, np.ndarray]:
    idx = line["id"]
    
    market = line["market"]
    market_data[market, idx, 0] = line["volume"]
    market_data[market, idx, 1] = line["price"]
    market_data[market, idx, 2] = line["volume"] * line["price"]
    market_data[market, idx, 3] = line["is_buy"]
    return idx, market, market_data

In [None]:
@jit(nopython=True)
def analyze_data(market: int, idx: int, market_data: np.ndarray, market_analysis: np.ndarray):
    # Total volume
    market_analysis[market, 0] = np.nansum(market_data[market, :idx + 1, 0])
    # Mean price
    market_analysis[market, 1] = np.nanmean(market_data[market, :idx + 1, 1])
    # Mean volume
    market_analysis[market, 2] = market_analysis[market, 0] / (idx + 1)
    # Volume weighted price mean
    market_analysis[market, 3] = np.nanmean(market_data[market, :idx + 1, 0] * market_data[market, :idx + 1, 1])
    # Percentage buys
    market_analysis[market, 4] = np.count_nonzero(market_data[market, :idx + 1, 0] == 1) / (idx + 1) * 100
    
analyze_data(0, 0, market_data, market_analysis)

In [None]:
# Create a subprocess to read the stdout of the bin
proc = subprocess.Popen([os.path.join(os.getcwd(), 'stdoutinator_amd64_darwin.bin')], stdout=subprocess.PIPE)

# while True:
# The try-except is to temporarily limit the number of data points as my RAM cant handle a 10M array
for _ in tqdm(range(num_data_points)):
    line = proc.stdout.readline().decode("utf-8").rstrip()
    
    if not line:
        break
    if line == "BEGIN":
        # Skip execution of the rest
        # Arrays and functions can also be initialized here
        continue
    if line == "END":
        break
            
    # Extract relevant data from the json
    # Note that `id` has been renamed to `idx`to prevent built-in conflicts 
    idx, market, market_data = get_data(json.loads(line)) 
    analyze_data(market, idx, market_data, market_analysis)

In [None]:
df = pd.DataFrame(columns=["total_volume", "mean_price", "mean_volume", "mean_volume_weighted_price", "percentage_buys"])

for idx, element in enumerate(market_analysis):
    df.loc[idx] = element

In [None]:
# Print first 30 elements
df[:30]