In [1]:
# THIS FILE ANALYSES THE DATA SENT TO FIREBASE

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import json

In [70]:
with open("data/20201007-firebase-export.json") as f:
    d = json.load(f)
d = d.get('location_reports')

# Iterate through the values for each device
for device in d.keys():
    # Get all messages and timestamps
    data = pd.DataFrame.from_records(list(d.get(device).values()))

    # take only the first 24 characters, which is the timestamp portion 
    # e.g. 22 Sep 2020 16:15:54:625 in 22 Sep 2020 16:17:12:674: (1.3210017, 103.8602953)
    data["timestamp"] = data["message"].str[0:24].str.strip()
    data["timestamp"] = pd.to_datetime(data["timestamp"], format="%d %b %Y %H:%M:%S:%f")

    # data = data[['message','timestamp','server_timestamp']]
    data = data[['message','timestamp']]
    
    group1min = data.groupby(pd.Grouper(key="timestamp",freq="1min")).count()
    group5min = data.groupby(pd.Grouper(key="timestamp",freq="5min")).count()

    # rename the column name to count
    group1min.rename( columns={'message': "count"}, inplace=True)
    group5min.rename( columns={'message': "count"}, inplace=True)
    
    # calculate results for 1 minute intervals
    g1_total_rows = len(group1min)
    g1_misses = group1min["count"].value_counts().get(0,0) # get value for key 0, if it doesnt exist we return 0 which  means 
    g1_score = (g1_total_rows - g1_misses)/g1_total_rows
    group1min.loc[group1min["count"] == 0]
    
    # calculate results for 5 minute intervals
    g5_total_rows = len(group5min)
    g5_misses = group5min["count"].value_counts().get(0, 0) # get value for key 0, if it doesnt exist we return 0 which means no misses.
    g5_score = (g5_total_rows - g5_misses)/g5_total_rows
    group5min.loc[group5min["count"] == 0]

    print("Printing scores for", device, "from", data["timestamp"].iloc[0] , "to" , data["timestamp"].iloc[-1])
    print("1 min intervals:", "{0:.2%}".format(g1_score) , "missing" , g1_misses , "intervals")
    print("5 min intervals:", "{0:.2%}".format(g5_score) , "missing" , g5_misses , "intervals")
    print()

    # return only the rows with alarmreceiver
    # data["method"] = data[data[0].str.contains("(alarmreceiver)", na=False)]
    # data = data[data["method"].notnull()]

Printing scores for Google-Pixel 2 XL from 2020-10-04 23:25:17.047000 to 2020-10-06 02:55:40.701000
1 min intervals: 98.49% missing 25 intervals
5 min intervals: 100.00% missing 0 intervals

Printing scores for HUAWEI-MHA-L29 from 2020-10-04 23:23:20.725000 to 2020-10-06 08:12:46.841000
1 min intervals: 99.59% missing 8 intervals
5 min intervals: 99.75% missing 1 intervals

Printing scores for Samsung-SM-J730GM from 2020-10-06 21:49:46.794000 to 2020-10-07 19:50:36.590000
1 min intervals: 99.92% missing 1 intervals
5 min intervals: 100.00% missing 0 intervals

Printing scores for Vivo 1820 from 2020-10-06 21:54:02.547000 to 2020-10-07 19:48:08.597000
1 min intervals: 39.85% missing 791 intervals
5 min intervals: 100.00% missing 0 intervals

Printing scores for Xiaomi-Redmi Note 3 from 2020-10-04 23:28:18.199000 to 2020-10-07 19:50:48.382000
1 min intervals: 49.57% missing 2069 intervals
5 min intervals: 50.12% missing 410 intervals

