This notebook attempts to see which data gaps are present in the timestamp gaps but not in the message gaps.

- iterate over timestamp gaps
- see if they fall near or within message gaps by printing nearest message gap

In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import WP19_analysis as wpa


data = {}
for rdf in wpa.raw_file_data:
    energy_data = wpa.load_timeseries_file(rdf['village_name'] + '-clean.csv')
    messages = wpa.load_message_file(rdf['village_name'] + '-messages.csv')
    total_duration = wpa.get_total_duration(energy_data)
    data[rdf['village_name']] = {'total_duration':total_duration,
                                 'ts num gaps':wpa.num_gaps_timestamp(energy_data),
                                 'ts down hours':wpa.get_downtime_timestamps(energy_data)/3600,
                                'mess num gaps':wpa.num_gaps_messages(messages), 
                                'mess down hours':wpa.get_durations_messages(messages).sum()[0]}

data_table = pd.DataFrame(data).T
data_table['ts up hrs'] = data_table['total_duration'] - data_table['ts down hours']
data_table['mess up hrs'] = data_table['total_duration'] - data_table['mess down hours']
data_table

Unnamed: 0,mess down hours,mess num gaps,total_duration,ts down hours,ts num gaps,ts up hrs,mess up hrs
ajau,102.056111,112.0,3083.533333,134.866667,778.0,2948.666667,2981.477222
asei,101.501389,119.0,1888.533333,163.216667,215.0,1725.316667,1787.031944
atamali,2003.236389,98.0,2977.566667,2414.533333,233.0,563.033333,974.330278
ayapo,2055.485278,243.0,3058.383333,2351.683333,262.0,706.7,1002.898056
kensio,2053.530833,48.0,2452.666667,2234.316667,46.0,218.35,399.135833


In [2]:
# place all events into a single data frame

filename = 'ayapo'
energy_data = wpa.load_timeseries_file(filename + '-clean.csv')
messages = wpa.load_message_file(filename + '-messages.csv')

def wpa_get_gaps_timestamp(energy_data):
    differences = np.diff(energy_data.index.values)
    gaps = pd.Series(index=energy_data.index[:-1], data=differences)
    gaps = gaps[gaps > np.timedelta64(1,'m')]
    return gaps

ts_gaps = pd.DataFrame(wpa_get_gaps_timestamp(energy_data), columns=['duration'])
ts_gaps['type'] = 'ts'

ms_gaps = pd.DataFrame(wpa.get_gaps_messages(messages))
ms_gaps['duration'] = ms_gaps['date']
ms_gaps['type'] = 'ms'
del(ms_gaps['date'])

all_gaps = ts_gaps.append(ms_gaps).sort_index()
all_gaps[all_gaps['duration']>np.timedelta64(60,'m')]
# now you can filter by duration and group by type to see the differences

Unnamed: 0,duration,type
2015-04-22 11:54:00,0 days 02:01:00,ts
2015-04-22 14:03:00,0 days 03:49:00,ts
2015-04-22 14:03:50,0 days 03:47:50,ms
2015-04-23 00:36:00,0 days 19:33:00,ts
2015-04-23 00:36:16,0 days 18:43:26,ms
2015-04-23 20:33:00,0 days 17:32:00,ts
2015-04-24 01:27:58,0 days 12:33:07,ms
2015-04-24 14:09:00,0 days 03:37:00,ts
2015-04-24 14:09:45,0 days 03:36:08,ms
2015-04-25 00:47:00,0 days 17:26:00,ts
