In [33]:
import json
import numpy as np
import scipy.stats
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
jsons = []

In [3]:
with open('81000_line_measuremetnts.json') as f:
    for line in f:
        jsons.append(json.loads(line))

In [40]:
TIME = 'timestamp'
ANCHOR = 'anchor'
FSIZE = 'file_size_kb'
RELAY = 'relay'
TIMES = 'times'

MEAN = "mean"
VARIANCE = "variance"
STD = "standard deviation"
MEAD = "mean absolute deviation"
MAD = "median absolute deviation"

In [5]:
jsons[0]

{'timestamp': '2021-02-24 T 13:16:26.186364',
 'anchor': '749EF4A434DFD00DAB31E93DE86233FB916D31E3',
 'file_size_kb': 1,
 'relay': 'FFFBFB50A83A414CC21B4CDA93A9674B004705E8',
 'times': [0.5963854789733887,
  0.6080307960510254,
  0.6715302467346191,
  0.6455419063568115,
  0.7262742519378662,
  0.756727933883667,
  0.7973346710205078,
  0.7233819961547852,
  0.6402339935302734,
  0.7347784042358398]}

In [6]:
def getDataFrame(path_to_json):
    jsons = []
    
    with open(path_to_json) as f:
        for line in f:
            jsons.append(json.loads(line))
        
    timestamps = []
    anchors = []
    file_sizes = []
    relays = []
    times = []
    
    for row in jsons:
        timestamps.append(row[TIME])
        anchors.append(row[ANCHOR])
        file_sizes.append(row[FSIZE])
        relays.append(row[RELAY])
        times.append(row[TIMES])
        
    df = pd.DataFrame()
    df[TIME] = timestamps
    df[RELAY] = relays
    df[ANCHOR] = anchors
    df[FSIZE] = file_sizes
    df[TIMES] = times
    return df
            

In [10]:
df_raw = getDataFrame('81000_line_measuremetnts.json')
print(f'Shape: {df_raw.shape}')
print(f'Unique relays: {df_raw[RELAY].unique().shape[0]}')

Shape: (81008, 5)
Unique relays: 7395


In [38]:
df_raw.iloc[81003][TIMES]

[0.29636311531066895,
 0.3683156967163086,
 0.3712601661682129,
 0.3815762996673584,
 0.3727574348449707,
 0.3731966018676758,
 0.43424534797668457,
 0.5296764373779297,
 0.37731051445007324,
 0.41094493865966797]

In [41]:
# Calculate mean and variance of each relay measurement
df = pd.DataFrame()
df[TIME] = df_raw[TIME]
df[RELAY] = df_raw[RELAY]
df[MEAN] = df_raw[TIMES].apply(np.mean)
df[VARIANCE] = df_raw[TIMES].apply(np.var)
df[STD] = df_raw[TIMES].apply(np.std)

In [42]:
df[MEAD] = df_raw[TIMES].apply(lambda x: np.mean([np.abs(elem - np.mean(x)) for elem in x]))

In [43]:
df[MAD] = df_raw[TIMES].apply(scipy.stats.median_absolute_deviation)

In [44]:
df

Unnamed: 0,timestamp,relay,mean,variance,standard deviation,mean absolute deviation,median absolute deviation
0,2021-02-24 T 13:16:26.186364,FFFBFB50A83A414CC21B4CDA93A9674B004705E8,0.690022,0.004069,0.063785,0.057677,0.080903
1,2021-02-24 T 13:16:34.484505,FFF78C44BA6E6B6F7525095BBE14EF7CBEB89744,0.301382,0.003554,0.059616,0.047776,0.080941
2,2021-02-24 T 13:16:37.866556,FFF651A9D56C4CE94B6CC0C7C31C25E5A80D6906,0.262694,0.001473,0.038375,0.027262,0.032958
3,2021-02-24 T 13:16:40.820013,FFDC2CC395CB214B736502EF4008278461159D66,0.628321,0.002264,0.047586,0.040326,0.021841
4,2021-02-24 T 13:16:47.994572,FFBC69467B37D6AC66598BBD295F9B0D74119ADC,0.243789,0.001149,0.033899,0.025279,0.028395
...,...,...,...,...,...,...,...
81003,2021-03-04 T 11:16:34.457442,2D41697016C0070174BF079BADA7207604086A15,0.391565,0.003228,0.056817,0.040035,0.009830
81004,2021-03-04 T 11:16:39.309890,2D3BAED2B7D2FF19DA0AEEA37C238B57A01EA485,0.275194,0.002786,0.052780,0.041289,0.043597
81005,2021-03-04 T 11:16:42.346751,2D2A8535FA0D93E8164F13669148936841871051,0.331982,0.034258,0.185090,0.164078,0.039413
81006,2021-03-04 T 11:16:46.022272,2D1F9D59FBA5ECD6BE5A0B00197DC281FC2C40C4,0.321245,0.014465,0.120269,0.078934,0.044658


In [45]:
# df.to_csv("measurement_stats.csv", index=False)

40.685566663506734