In [22]:
import pandas as pd
import matplotlib.pyplot as plt

In [23]:
# read and clean data by removing unwanted text and spaces

# use a separator that will never appear in the txt file.
data = pd.read_csv("data/20201117-pra-lx2-timestamp.csv", header=None, sep="~")

# return only the rows with alarmreceiver
# data["method"] = data[data[0].str.contains("(alarmreceiver)", na=False)]
# data = data[data["method"].notnull()]

# take only the first 24 characters, which is the timestamp portion 
# e.g. 22 Sep 2020 16:15:54:625 in 22 Sep 2020 16:17:12:674: (1.3210017, 103.8602953)
data["timestamp"] = data[0].str[0:24].str.strip()
data["timestamp"] = pd.to_datetime(data["timestamp"], format="%d %b %Y %H:%M:%S:%f")

# below two lines used for Homer exports
# data["timestamp"] = data[0].str[0:23].str.strip()
# data["timestamp"] = pd.to_datetime(data["timestamp"], format="%Y-%m-%d %H:%M:%S.%f")

data

Unnamed: 0,0,timestamp
0,2020-11-17 06:13:04.537,2020-11-17 06:13:04.537
1,2020-11-17 06:07:04.051,2020-11-17 06:07:04.051
2,2020-11-17 06:00:59.837,2020-11-17 06:00:59.837
3,2020-11-17 05:55:25.594,2020-11-17 05:55:25.594
4,2020-11-17 05:54:58.244,2020-11-17 05:54:58.244
...,...,...
153,2020-11-16 17:25:59.148,2020-11-16 17:25:59.148
154,2020-11-16 17:21:17.993,2020-11-16 17:21:17.993
155,2020-11-16 17:15:25.516,2020-11-16 17:15:25.516
156,2020-11-16 17:14:40.798,2020-11-16 17:14:40.798


In [24]:
group1min = data.groupby(pd.Grouper(key="timestamp",freq="1min")).count()
group5min = data.groupby(pd.Grouper(key="timestamp",freq="5min")).count()
# rename the column name to count
group1min.rename( columns={0: "count"}, inplace=True)
group5min.rename( columns={0: "count"}, inplace=True)

len(group1min["count"].value_counts())
len(group5min["count"].value_counts())

3

In [25]:
# calculate results for 1 minute intervals
g1_total_rows = len(group1min)
g1_misses = group1min["count"].value_counts().get(0,0) # get value for key 0, if it doesnt exist we return 0 which means 
g1_score = (g1_total_rows - g1_misses)/g1_total_rows
# print(g1_score)
group1min.loc[group1min["count"] == 0]

Unnamed: 0_level_0,count
timestamp,Unnamed: 1_level_1
2020-11-16 17:06:00,0
2020-11-16 17:07:00,0
2020-11-16 17:08:00,0
2020-11-16 17:09:00,0
2020-11-16 17:10:00,0
...,...
2020-11-17 06:08:00,0
2020-11-17 06:09:00,0
2020-11-17 06:10:00,0
2020-11-17 06:11:00,0


In [26]:
# calculate results for 5 minute intervals
g5_total_rows = len(group5min)
g5_misses = group5min["count"].value_counts().get(0, 0) # get value for key 0, if it doesnt exist we return 0 which means no misses.
g5_score = (g5_total_rows - g5_misses)/g5_total_rows
# print(g5_score)
group5min.loc[group5min["count"] == 0]

Unnamed: 0_level_0,count
timestamp,Unnamed: 1_level_1
2020-11-16 18:45:00,0
2020-11-16 20:10:00,0
2020-11-16 21:55:00,0
2020-11-16 23:10:00,0
2020-11-16 23:40:00,0
2020-11-17 01:35:00,0
2020-11-17 02:05:00,0
2020-11-17 02:35:00,0
2020-11-17 04:30:00,0
2020-11-17 05:00:00,0


In [27]:
print("Looking at data from", data[0].iloc[0] , "to" , data[0].iloc[-1])
# print("1 min intervals:", "{0:.2%}".format(g1_score) , "missing" , g1_misses , "intervals")
print("5 min intervals:", "{0:.2%}".format(g5_score) , "missing" , g5_misses , "intervals")

Looking at data from 2020-11-17 06:13:04.537 to 2020-11-16 17:05:25.478
5 min intervals: 93.67% missing 10 intervals
