This sample notebook shows how you can plot three-dimensional data extracted from the Data Lake.

It plots the price, quantity and time of AAPL trades on a given day.

In [None]:
import datetime

import pandas as pd
import matplotlib.pyplot as plt
import maystreet_data as md
import numpy as np

year, month, day = '2022', '01', '19'

In [None]:
def fetch_price_quantity():
    """
    Query the Data Lake for min/max prices grouped by hour of the day.

    Returns a Pandas dataframe with timestamp (as a Python datetime), min_price and max_price.
    """

    query = f"""
    SELECT 
        ExchangeTimestamp AS "timestamp",
        price,
        quantity
    FROM 
        "prod_lake"."p_mst_data_lake".mt_trade
    WHERE 
        y = '{year}'
        AND m = '{month}'
        AND d = '{day}'
        AND product = 'AAPL'
    ORDER BY 1
    """

    return pd.DataFrame(md.query(md.DataSource.DATA_LAKE, query))

data = fetch_price_quantity()
data

In [None]:
def time_formatter(x, pos=None):
    as_datetime = datetime.datetime.fromtimestamp(x / 1000000000)
    return as_datetime.strftime('%H:%M:%S')

plt.rcParams['figure.figsize'] = [10, 10]

fig = plt.figure()
fig.patch.set_facecolor((1, 1, 1))

ax = plt.axes(projection='3d')

ax.set_title('AAPL time/price/quantity, 2022/01/19')
ax.set_xlabel('Time')
ax.set_ylabel('Quantity')
ax.set_zlabel('Price')

ax.xaxis.set_major_formatter(time_formatter)

ax.scatter3D(data['timestamp'], data['quantity'], data['price'])

plt.show()