In [19]:
import presto
import pandas as pd
import numpy as np
import pulsar
import os
import plotly.express as px
import plotly.graph_objects as go
import json
import csv

In [2]:
os.chdir('../')
from src.MV2 import cfg, schema

In [3]:
pulsar_url="pulsar://localhost:6650"
client = pulsar.Client(pulsar_url)

In [4]:
def presto_query(query, user='test', catalog='pulsar', schema='public/default', host='localhost', port=8081):
    conn = presto.dbapi.connect(
        host=host,
        port=port,
        user=user,
        catalog=catalog,
        schema=schema)
    cur = conn.cursor()
    cur.execute(query)
    data = cur.fetchall()
    columns = cur.description
    df = pd.DataFrame(data)
    df.columns = [x[0] for x in columns]
    return df.replace('', np.nan)

def get_all_data(topic, schema):
    data = []

    reader = client.create_reader(topic, start_message_id=pulsar.MessageId.earliest)

    while reader.has_message_available():
        msg = reader.read_next()
        data.append(json.loads(msg.data()))
        #print("Received message '{}' id='{}'".format(msg.data(), msg.message_id()))
        # No acknowledgment
    return data

In [5]:
topic = f"persistent://public/default/allocation_topic"
data = get_all_data(topic, schema.AllocationSchema)
dfa = pd.DataFrame(data)
dfa['supplier_times'] = dfa.apply(lambda row: -row["supplieroffertimestamps"][0] + row["timestamp"], axis=1)
dfa['customer_times'] = dfa.apply(lambda row: -row["customeroffertimestamp"] + row["timestamp"], axis=1)
dfa['customer_alo_to_start'] = dfa.apply(lambda row: -row["timestamp"] + row["start"], axis=1)
#supplier_times = dfa.loc[df['supplier_times'] < 3, 'supplier_times']
#customer_times = dfa.loc[df['customer_times'] < 3, 'customer_times']

In [6]:
dfs = []
for customer in dfa['customer'].unique().tolist():
    #df = presto_query("SELECT * FROM output", schema=f"{customer}/rand_nums")
    topic = f"persistent://{customer}/rand_nums/output"
    data = get_all_data(topic, schema.OutputDataSchema)
    dfs.append(pd.DataFrame(data))
    #dfs.append(df)
dfo = pd.concat(dfs, axis=0)
dfo['output_times'] = dfo.apply(lambda row: float(row['suppliertimestamp']) - float(row['customertimestamp']), axis=1)

In [27]:
dfa.to_csv(os.path.join(os.getcwd(), "notebooks", "data", "dfa1.csv"))

In [28]:
dfo.to_csv(os.path.join(os.getcwd(), "notebooks", "data", "dfo1.csv"))

In [8]:
fig = go.Figure()
fig.add_trace(go.Box(y=dfa['supplier_times'].values.tolist()))
#fig.add_trace(go.Box(y=dfa['customer_times'].values.tolist()))
fig.add_trace(go.Box(y=dfa['customer_alo_to_start'].values.tolist()))
fig.add_trace(go.Box(y=dfo['output_times'].values.tolist()))
#fig.add_trace(go.Box(y=output_times))

fig.show()

In [30]:
dfo['output_times'].median()

0.029277920722961426

In [31]:
dfa['supplier_times'].median()

0.3841630220413208

In [38]:
np.percentile(dfa['supplier_times'].values, .99999)

0.015796198362684248

In [42]:
dfa['supplier_times'].quantile(.75)

1.1091567277908325

In [11]:
cutoff = (-dfa['start'].min() + dfa['start'].max()) * .25 + dfa['start'].min()
dfa_plot = dfa[dfa['start']>cutoff]
dfo_plot = dfo[dfo['suppliertimestamp']>cutoff]

In [13]:
#dfa_plot = dfa[dfa['start']>cutoff]
dfo_plot = dfo[dfo['output_times']<5]

In [22]:
def write_to_csv(data, name, output_file_name):
    with open(os.path.join(os.getcwd(), "notebooks", "data", output_file_name), "w") as f:
        writer = csv.writer(f)
        writer.writerow([name])
        d = [[x] for x in data]
        writer.writerows(d)

In [23]:
output_times = []
for allocation in dfo_plot['allocationid'].unique().tolist():
    temp = dfo_plot.loc[dfo_plot['allocationid']==allocation, "output_times"]
    output_times.append(np.median(temp.values))
write_to_csv(output_times, 'time', 'output_times.csv')

In [24]:
x = dfa_plot.loc[dfa_plot['customer_alo_to_start']<150, "customer_alo_to_start"].values.tolist()
write_to_csv(x, 'time', 'customer_to_start.csv')

In [25]:
x = dfa_plot['supplier_times'].values.tolist()
write_to_csv(x, 'time', 'supplier_times.csv')

In [26]:

fig = go.Figure()
fig.add_trace(go.Box(y=dfa_plot['supplier_times'].values.tolist()))
fig.add_trace(go.Box(y=dfa['customer_times'].values.tolist()))
fig.add_trace(go.Box(y=dfa_plot['customer_alo_to_start'].values.tolist()))
fig.add_trace(go.Box(y=dfo_plot['output_times'].values.tolist()))
#fig.add_trace(go.Box(y=output_times))

fig.show()

In [15]:
len(dfo_plot)

76503

In [None]:
y=dfo.loc[dfo['output_times']<100, 'output_times']
fig = go.Figure()
fig.add_trace(go.Box(y=y.values.tolist()))
fig.show()

In [None]:
y=dfo.loc[dfo['output_times']>100]
y.head()

In [None]:
y.iloc[0].to_dict()

In [None]:
import time
time.time()

In [None]:
x = dfo.sort_values(by='value')
x.head()