In [179]:
import pandas as pd
import matplotlib.pyplot as plt

In [180]:
# read and clean data by removing unwanted text and spaces

# use a separator that will never appear in the txt file.
data = pd.read_csv("data/12_20201001_vivo", header=None, sep="~")

# return only the rows with alarmreceiver
# data["method"] = data[data[0].str.contains("(alarmreceiver)", na=False)]
# data = data[data["method"].notnull()]

# take only the first 24 characters, which is the timestamp portion 
# e.g. 22 Sep 2020 16:15:54:625 in 22 Sep 2020 16:17:12:674: (1.3210017, 103.8602953)
data["timestamp"] = data[0].str[0:24].str.strip()
data["timestamp"] = pd.to_datetime(data["timestamp"], format="%d %b %Y %H:%M:%S:%f")

data

Unnamed: 0,0,timestamp
0,01 Oct 2020 16:09:24:098 onCreate MainActivity,2020-10-01 16:09:24.098
1,01 Oct 2020 16:09:24:420 onCreate Service,2020-10-01 16:09:24.420
2,01 Oct 2020 16:09:48:097 onCreate MainActivity,2020-10-01 16:09:48.097
3,01 Oct 2020 16:09:48:271 onCreate Service,2020-10-01 16:09:48.271
4,01 Oct 2020 16:09:54:301 onDestroy Service,2020-10-01 16:09:54.301
...,...,...
374,02 Oct 2020 10:52:07:570 (alarmreceiver): (1.3...,2020-10-02 10:52:07.570
375,02 Oct 2020 10:56:07:635 (alarmreceiver): (1.3...,2020-10-02 10:56:07.635
376,02 Oct 2020 11:00:07:644 (alarmreceiver): (1.3...,2020-10-02 11:00:07.644
377,02 Oct 2020 11:03:00:948 (alarmreceiver): (1.3...,2020-10-02 11:03:00.948


In [181]:
group1min = data.groupby(pd.Grouper(key="timestamp",freq="1min")).count()
group5min = data.groupby(pd.Grouper(key="timestamp",freq="5min")).count()
# rename the column name to count
group1min.rename( columns={0: "count"}, inplace=True)
group5min.rename( columns={0: "count"}, inplace=True)

len(group1min["count"].value_counts())
len(group5min["count"].value_counts())

6

In [182]:
# calculate results for 1 minute intervals
g1_total_rows = len(group1min)
g1_misses = group1min["count"].value_counts().get(0,0) # get value for key 0, if it doesnt exist we return 0 which means 
g1_score = (g1_total_rows - g1_misses)/g1_total_rows
# print(g1_score)
group1min.loc[group1min["count"] == 0]

Unnamed: 0_level_0,count
timestamp,Unnamed: 1_level_1
2020-10-01 16:11:00,0
2020-10-01 16:12:00,0
2020-10-01 16:13:00,0
2020-10-01 16:14:00,0
2020-10-01 16:16:00,0
...,...
2020-10-02 10:57:00,0
2020-10-02 10:58:00,0
2020-10-02 10:59:00,0
2020-10-02 11:01:00,0


In [183]:
# calculate results for 5 minute intervals
g5_total_rows = len(group5min)
g5_misses = group5min["count"].value_counts().get(0, 0) # get value for key 0, if it doesnt exist we return 0 which means no misses.
g5_score = (g5_total_rows - g5_misses)/g5_total_rows
# print(g5_score)
group5min.loc[group5min["count"] == 0]

Unnamed: 0_level_0,count
timestamp,Unnamed: 1_level_1


In [184]:
print("Looking at data from", data[0].iloc[0] , "to" , data[0].iloc[-1])
print("1 min intervals:", "{0:.2%}".format(g1_score) , "missing" , g1_misses , "intervals")
print("5 min intervals:", "{0:.2%}".format(g5_score) , "missing" , g5_misses , "intervals")

Looking at data from 01 Oct 2020 16:09:24:098 onCreate MainActivity to 02 Oct 2020 11:04:01:014 (alarmreceiver): (1.3212542, 103.8603324)
1 min intervals: 31.51% missing 778 intervals
5 min intervals: 100.00% missing 0 intervals
