In [1]:
import pandas as pd
import time
import numpy as np
from IPython.display import clear_output
import matplotlib.pyplot as plt
import plotly.tools as tls
%matplotlib inline

devices_data = pd.read_csv('C:\\Users\\ruben\\Desktop\\Investigation\\updated_dataset_07_3_2018\\devices.csv', index_col='id', parse_dates=['created_at', 'updated_at'])
samples_data = pd.read_csv('C:\\Users\\ruben\\Desktop\\Investigation\\updated_dataset_07_3_2018\\samples.csv', usecols=['device_id', 'timestamp', 'battery_state', 'battery_level'], parse_dates=['timestamp'])

samples_data.head()

Unnamed: 0,device_id,timestamp,battery_state,battery_level
0,1,2017-10-08 12:50:04,Discharging,0.82
1,1,2017-10-08 12:50:04,Discharging,0.82
2,1,2017-10-08 12:55:15,Discharging,0.81
3,1,2017-10-08 12:58:28,Discharging,0.8
4,1,2017-10-08 13:03:16,Discharging,0.79


In [2]:
#clean fulls where battery level < 100% and charges where battery level = 100%
samples_data.drop(samples_data.loc[(samples_data['battery_state'] == 'Full') & (samples_data['battery_level'] < 1)].index, inplace=True)
samples_data.drop(samples_data.loc[(samples_data['battery_state'] == 'Charging') & (samples_data['battery_level'] == 1)].index, inplace=True)

samples_data.head()

Unnamed: 0,device_id,timestamp,battery_state,battery_level
0,1,2017-10-08 12:50:04,Discharging,0.82
1,1,2017-10-08 12:50:04,Discharging,0.82
2,1,2017-10-08 12:55:15,Discharging,0.81
3,1,2017-10-08 12:58:28,Discharging,0.8
4,1,2017-10-08 13:03:16,Discharging,0.79


In [None]:
# device_charge_samples = samples_data.loc[samples_data['device_id'] == 1 & samples_data['battery_state'].isin(['Charging', 'Full'])].drop_duplicates(subset=['timestamp']).sort_values(by='timestamp').reset_index(drop=True)
# device_discharge_samples = samples_data.loc[(samples_data['device_id'] == 1) & (samples_data['battery_state'] == 'Discharging')].drop_duplicates(subset=['timestamp']).sort_values(by='timestamp').reset_index(drop=True)
# device_not_charging_samples = samples_data.loc[(samples_data['device_id'] == 1) & (samples_data['battery_state'] == 'Not charging')].drop_duplicates(subset=['timestamp']).sort_values(by='timestamp').reset_index(drop=True)

#time to charge 1% (seconds) of given charging sample
def computeChargeKPI(charge_subset):
    subset_charge_result = []
    
    for index, row in charge_subset.iterrows():
        #600/6 = 10 minutes, max diference and step <=4
        if (index != 0) and (0 < (row['timestamp'] - charge_subset['timestamp'].iloc[index - 1]).seconds <= 600) and (((row['battery_level'] - charge_subset['battery_level'].iloc[index - 1]) * 100) <= 4):
            subset_charge_result.append(((row['timestamp'] - charge_subset['timestamp'].iloc[index - 1]).seconds) / ((row['battery_level'] - charge_subset['battery_level'].iloc[index - 1]) * 100))
    
    if len(subset_charge_result) != 0:
        return sum(subset_charge_result)/len(subset_charge_result)
    else:
        return -1

#get valid subsets from all charging/full samples and calls a function to compute the kpis on those subsets
def chargeKPIs(dev_id, percentage):
    device_charge_samples = samples_data.loc[(samples_data['device_id'] == dev_id) & (samples_data['battery_state'].isin(['Charging', 'Full']))].drop_duplicates(subset=['timestamp']).sort_values(by='timestamp').reset_index(drop=True)
    
    if device_charge_samples.empty:
        return -1
    
    each_subset_charge_results = []
    times_reached_full = 0
    times_not_reached_full = 0
    
    previous_battery_level_index = 0
    for index, row in device_charge_samples.iterrows():
        if index != 0:
            #creates one subset
            if row['battery_level'] < device_charge_samples.iloc[index - 1].battery_level:
                subset = device_charge_samples[previous_battery_level_index:index]
                
                #remove duplicate fulls
                if 'Full' in device_charge_samples['battery_state'].unique() and device_charge_samples['battery_state'].value_counts()['Full'] > 1:
                    subset = subset.drop(subset.loc[subset['battery_state'] == 'Full'].index[1:])

                subset.reset_index(drop=True, inplace=True)

                #count number times the device charge to full
                if 'Full' in subset['battery_state'].unique():
                    times_reached_full += 1
                else:
                    times_not_reached_full += 1

                #check if subset is valid and computes the kpis
                if len(subset) >= round(percentage * 100/4) and subset['battery_level'].iloc[len(subset) - 1] - subset['battery_level'].iloc[0] >= percentage:
                    subset = subset.drop_duplicates(subset=['battery_level']).sort_values(by='battery_level').reset_index(drop=True)
                    
                    res = computeChargeKPI(subset)
                    
                    if res != -1:
                        each_subset_charge_results.append(res)

                previous_battery_level_index = index
            
            #special case of last subset
            if (index == len(device_charge_samples) - 1) and (not device_charge_samples[previous_battery_level_index:index + 1].empty):
                subset = device_charge_samples[previous_battery_level_index:index + 1]
                
                #remove duplicate fulls
                if 'Full' in device_charge_samples['battery_state'].unique() and device_charge_samples['battery_state'].value_counts()['Full'] > 1:
                    subset = subset.drop(subset.loc[subset['battery_state'] == 'Full'].index[1:])

                subset.reset_index(drop=True, inplace=True)

                #count number times the device charge to full
                if 'Full' in subset['battery_state'].unique():
                    times_reached_full += 1
                else:
                    times_not_reached_full += 1

                #check if subset is valid and computes the kpis
                if len(subset) >= round(percentage * 100 /4) and subset['battery_level'].iloc[len(subset) - 1] - subset['battery_level'].iloc[0] >= percentage:
                    subset = subset.drop_duplicates(subset=['battery_level']).sort_values(by='battery_level').reset_index(drop=True)
                    
                    res = computeChargeKPI(subset)
                    
                    if res != -1:
                        each_subset_charge_results.append(res)
    
    if len(each_subset_charge_results) == 0:
        return (-1, times_reached_full, times_not_reached_full)
    else:
#         return (time.strftime("%M:%S", time.gmtime(sum(each_subset_charge_results)/len(each_subset_charge_results))), times_reached_full, times_not_reached_full)
        return (time.strftime("%M:%S", time.gmtime(sum(each_subset_charge_results)/len(each_subset_charge_results))), times_reached_full, times_not_reached_full)
    

    
# for devID in devices_data.index:
#     print(devID)
    
#     results = chargeKPIs(devID, 0.4)

#     if results != -1:

#         if results[0] != -1:
#             lst = list(results)
#             lst[0] = time.strftime("%M:%S", time.gmtime(lst[0]))
#             print(lst)

#         else:
#             print(results)
#     else:
#         print('No Samples!')

chargeKPIs(1, 0.4)

In [None]:
#time to charge 1% (seconds) of given charging sample
def computeDischargeKPI(discharge_subset):
    previous_state_index = 0
    subset_discharge_result = []
    battery_per_hour = []
    
    for index, row in discharge_subset.iterrows():
        if (index != 0) and (0 < (row['timestamp'] - discharge_subset['timestamp'].iloc[index - 1]).seconds <= 900) and (abs(row['battery_level'] - discharge_subset['battery_level'].iloc[index - 1]) * 100 <= 4):
            subset_discharge_result.append((row['timestamp'] - discharge_subset['timestamp'].iloc[index - 1]).seconds / (abs(row['battery_level'] - discharge_subset['battery_level'].iloc[index - 1]) * 100))
          
        if 3600 <= (row['timestamp'] - discharge_subset['timestamp'].iloc[previous_state_index]).seconds <= 5400 :
            battery_per_hour.append(abs(row['battery_level'] - discharge_subset['battery_level'].iloc[previous_state_index]))
            previous_state_index = index
    
    if len(subset_discharge_result) != 0 and len(battery_per_hour) != 0:
        return (sum(subset_discharge_result)/len(subset_discharge_result), sum(battery_per_hour)/len(battery_per_hour))
    elif len(subset_discharge_result) != 0:
        return (sum(subset_discharge_result)/len(subset_discharge_result), -1)
    elif len(battery_per_hour) != 0:
        return (-1, sum(battery_per_hour)/len(battery_per_hour))
    else:
        return (-1, -1)

#get valid subsets from all charging/full samples and calls a function to compute the kpis on those subsets
def dischargeKPIs(dev_id, percentage):
    device_discharge_samples = samples_data.loc[(samples_data['device_id'] == dev_id) & (samples_data['battery_state'] == 'Discharging')].drop_duplicates(subset=['timestamp']).sort_values(by='timestamp').reset_index(drop=True)
    each_subset_discharge_per_percentage_results = []
    each_subset_discharge_per_hour_results = []
    
    if device_discharge_samples.empty:
        return -1
 
    previous_battery_level_index = 0
    for index, row in device_discharge_samples.iterrows():
        if index != 0:
            #creates one subset
            if row['battery_level'] > device_discharge_samples.iloc[index - 1].battery_level:
                subset = device_discharge_samples[previous_battery_level_index:index]

                #check if subset is valid and computes the kpis
                if len(subset) >= round(percentage * 100/4) and abs(subset['battery_level'].iloc[len(subset) - 1] - subset['battery_level'].iloc[0]) >= percentage:
                    subset = subset.drop_duplicates(subset=['battery_level']).sort_values(by='battery_level', ascending=False).reset_index(drop=True)
                      
                    res = computeDischargeKPI(subset)
                    
                    if res[0] != -1:
                        each_subset_discharge_per_percentage_results.append(res[0])
                    if res[1] != -1:
                        each_subset_discharge_per_hour_results.append(res[1])

                previous_battery_level_index = index
            
            #special case of last subset
            if (index == len(device_discharge_samples) - 1) and (not device_discharge_samples[previous_battery_level_index:index + 1].empty):
                subset = device_discharge_samples[previous_battery_level_index:index + 1]
                
                #check if subset is valid and computes the kpis
                if len(subset) >= round(percentage * 100/4) and abs(subset['battery_level'].iloc[len(subset) - 1] - subset['battery_level'].iloc[0]) >= percentage:
                    subset = subset.drop_duplicates(subset=['battery_level']).sort_values(by='battery_level', ascending=False).reset_index(drop=True)
                    
                    res = computeDischargeKPI(subset)
                    
                    if res[0] != -1:
                        each_subset_discharge_per_percentage_results.append(res[0])
                    if res[1] != -1:
                        each_subset_discharge_per_hour_results.append(res[1])

    if len(each_subset_discharge_per_percentage_results) != 0 and len(each_subset_discharge_per_hour_results) != 0:
        return (time.strftime("%M:%S", time.gmtime(sum(each_subset_discharge_per_percentage_results)/len(each_subset_discharge_per_percentage_results))), float("{0:.2f}".format(round(sum(each_subset_discharge_per_hour_results)/len(each_subset_discharge_per_hour_results), 2))))
    elif len(each_subset_discharge_per_percentage_results) != 0:
        return (time.strftime("%M:%S", time.gmtime(sum(each_subset_discharge_per_percentage_results)/len(each_subset_discharge_per_percentage_results))), -1)
    elif len(each_subset_discharge_per_hour_results) != 0:
        return (-1, float("{0:.2f}".format(round(sum(each_subset_discharge_per_hour_results)/len(each_subset_discharge_per_hour_results), 2))))
    else:
        return (-1, -1)

# df = pd.DataFrame(columns=['device_id','charge_per_unit','discharge_per_unit','discharge_per_hour','reach_full','does_not_reach_full', 'number_of_samples'])
# print('start')
# for devID in devices_data.index:
#     print(devID)

#     if charge_results != -1 and discharge_results != -1:
#         df.loc[-1] = [devID, charge_results[0], discharge_results[0], discharge_results[1], charge_results[1], charge_results[2], len(samples_data.loc[samples_data['device_id'] == devID])]
#         df.index = df.index + 1
#         df = df.sort_index()
#     elif  charge_results != -1:
#         df.loc[-1] = [devID, charge_results[0], -1, -1, charge_results[1], charge_results[2], len(samples_data.loc[samples_data['device_id'] == devID])]
#         df.index = df.index + 1
#         df = df.sort_index()
#     elif discharge_results != -1:
#         df.loc[-1] = [devID, -1, discharge_results[0], discharge_results[1], -1, -1, len(samples_data.loc[samples_data['device_id'] == devID])]
#         df.index = df.index + 1
#         df = df.sort_index()
#     else:
#         df.loc[-1] = [devID, -1, -1, -1, -1, -1, len(samples_data.loc[samples_data['device_id'] == devID])]
#         df.index = df.index + 1
#         df = df.sort_index()

    
# df.head()    

In [None]:
final = df.sort_values(by='device_id').reset_index(drop=True)

In [None]:
final.head(20)

In [None]:
final.to_csv('C:\\Users\\ruben\\Desktop\\devices_stats.csv', index = False, sep=',', encoding='utf-8')

In [7]:
#time to charge 1% (seconds) of given charging sample
def computeChargeKPI(charge_subset):
    subset_charge_result = []
    
    for index, row in charge_subset.iterrows():
        #600/6 = 10 minutes, max diference and step <=4
        if (index != 0) and (0 < (row['timestamp'] - charge_subset['timestamp'].iloc[index - 1]).seconds <= 600) and (((row['battery_level'] - charge_subset['battery_level'].iloc[index - 1]) * 100) <= 4):
            subset_charge_result.append(((row['timestamp'] - charge_subset['timestamp'].iloc[index - 1]).seconds) / ((row['battery_level'] - charge_subset['battery_level'].iloc[index - 1]) * 100))
    
    if len(subset_charge_result) != 0:
        return sum(subset_charge_result)/len(subset_charge_result)
    else:
        return -1

#get valid subsets from all charging/full samples and calls a function to compute the kpis on those subsets
def chargeKPIs(dev_id, percentage):
    device_charge_samples = samples_data.loc[(samples_data['device_id'] == dev_id) & (samples_data['battery_state'].isin(['Charging', 'Full']))].drop_duplicates(subset=['timestamp']).sort_values(by='timestamp').reset_index(drop=True)
    
    total_samples = 0
    valid_samples = 0
    
    if device_charge_samples.empty:
        return -1
    
    each_subset_charge_results = []
    times_reached_full = 0
    times_not_reached_full = 0
    
    previous_battery_level_index = 0
    for index, row in device_charge_samples.iterrows():
        if index != 0:
            #creates one subset
            if row['battery_level'] < device_charge_samples.iloc[index - 1].battery_level:
                subset = device_charge_samples[previous_battery_level_index:index]
                
                #remove duplicate fulls
                if 'Full' in device_charge_samples['battery_state'].unique() and device_charge_samples['battery_state'].value_counts()['Full'] > 1:
                    subset = subset.drop(subset.loc[subset['battery_state'] == 'Full'].index[1:])

                subset.reset_index(drop=True, inplace=True)

                #count number times the device charge to full
                if 'Full' in subset['battery_state'].unique():
                    times_reached_full += 1
                else:
                    times_not_reached_full += 1
                
                total_samples += len(subset)
                #check if subset is valid and computes the kpis
                if len(subset) >= round(percentage * 100/4) and subset['battery_level'].iloc[len(subset) - 1] - subset['battery_level'].iloc[0] >= percentage:
                    subset = subset.drop_duplicates(subset=['battery_level']).sort_values(by='battery_level').reset_index(drop=True)
                    valid_samples += len(subset)
                    res = computeChargeKPI(subset)
                    
                    if res != -1:
                        each_subset_charge_results.append(res)

                previous_battery_level_index = index
            
            #special case of last subset
            if (index == len(device_charge_samples) - 1) and (not device_charge_samples[previous_battery_level_index:index + 1].empty):
                subset = device_charge_samples[previous_battery_level_index:index + 1]
                
                #remove duplicate fulls
                if 'Full' in device_charge_samples['battery_state'].unique() and device_charge_samples['battery_state'].value_counts()['Full'] > 1:
                    subset = subset.drop(subset.loc[subset['battery_state'] == 'Full'].index[1:])

                subset.reset_index(drop=True, inplace=True)

                #count number times the device charge to full
                if 'Full' in subset['battery_state'].unique():
                    times_reached_full += 1
                else:
                    times_not_reached_full += 1
                
                total_samples += len(subset)
                #check if subset is valid and computes the kpis
                if len(subset) >= round(percentage * 100 /4) and subset['battery_level'].iloc[len(subset) - 1] - subset['battery_level'].iloc[0] >= percentage:
                    subset = subset.drop_duplicates(subset=['battery_level']).sort_values(by='battery_level').reset_index(drop=True)
                    valid_samples += len(subset)
                    res = computeChargeKPI(subset)
                    
                    if res != -1:
                        each_subset_charge_results.append(res)
    
    if len(each_subset_charge_results) == 0:
        return (-1, times_reached_full, times_not_reached_full)
    else:
#         return (time.strftime("%M:%S", time.gmtime(sum(each_subset_charge_results)/len(each_subset_charge_results))), times_reached_full, times_not_reached_full)
        return (time.strftime("%M:%S", time.gmtime(sum(each_subset_charge_results)/len(each_subset_charge_results))), times_reached_full, times_not_reached_full)
 
#time to charge 1% (seconds) of given charging sample
def computeDischargeKPI(discharge_subset):
    previous_state_index = 0
    subset_discharge_result = []
    battery_per_hour = []
    
    for index, row in discharge_subset.iterrows():
        if (index != 0) and (0 < (row['timestamp'] - discharge_subset['timestamp'].iloc[index - 1]).seconds <= 900) and (abs(row['battery_level'] - discharge_subset['battery_level'].iloc[index - 1]) * 100 <= 4):
            subset_discharge_result.append((row['timestamp'] - discharge_subset['timestamp'].iloc[index - 1]).seconds / (abs(row['battery_level'] - discharge_subset['battery_level'].iloc[index - 1]) * 100))
          
        if 3600 <= (row['timestamp'] - discharge_subset['timestamp'].iloc[previous_state_index]).seconds <= 5400 :
            battery_per_hour.append(abs(row['battery_level'] - discharge_subset['battery_level'].iloc[previous_state_index]))
            previous_state_index = index
    
    if len(subset_discharge_result) != 0 and len(battery_per_hour) != 0:
        return (sum(subset_discharge_result)/len(subset_discharge_result), sum(battery_per_hour)/len(battery_per_hour))
    elif len(subset_discharge_result) != 0:
        return (sum(subset_discharge_result)/len(subset_discharge_result), -1)
    elif len(battery_per_hour) != 0:
        return (-1, sum(battery_per_hour)/len(battery_per_hour))
    else:
        return (-1, -1)

#get valid subsets from all charging/full samples and calls a function to compute the kpis on those subsets
def dischargeKPIs(dev_id, percentage):
    device_discharge_samples = samples_data.loc[(samples_data['device_id'] == dev_id) & (samples_data['battery_state'] == 'Discharging')].drop_duplicates(subset=['timestamp']).sort_values(by='timestamp').reset_index(drop=True)
    each_subset_discharge_per_percentage_results = []
    each_subset_discharge_per_hour_results = []
    
    if device_discharge_samples.empty:
        return -1
 
    previous_battery_level_index = 0
    for index, row in device_discharge_samples.iterrows():
        if index != 0:
            #creates one subset
            if row['battery_level'] > device_discharge_samples.iloc[index - 1].battery_level:
                subset = device_discharge_samples[previous_battery_level_index:index]

                #check if subset is valid and computes the kpis
                if len(subset) >= round(percentage * 100/4) and abs(subset['battery_level'].iloc[len(subset) - 1] - subset['battery_level'].iloc[0]) >= percentage:
                    subset = subset.drop_duplicates(subset=['battery_level']).sort_values(by='battery_level', ascending=False).reset_index(drop=True)
                      
                    res = computeDischargeKPI(subset)
                    
                    if res[0] != -1:
                        each_subset_discharge_per_percentage_results.append(res[0])
                    if res[1] != -1:
                        each_subset_discharge_per_hour_results.append(res[1])

                previous_battery_level_index = index
            
            #special case of last subset
            if (index == len(device_discharge_samples) - 1) and (not device_discharge_samples[previous_battery_level_index:index + 1].empty):
                subset = device_discharge_samples[previous_battery_level_index:index + 1]
                
                #check if subset is valid and computes the kpis
                if len(subset) >= round(percentage * 100/4) and abs(subset['battery_level'].iloc[len(subset) - 1] - subset['battery_level'].iloc[0]) >= percentage:
                    subset = subset.drop_duplicates(subset=['battery_level']).sort_values(by='battery_level', ascending=False).reset_index(drop=True)
                    
                    res = computeDischargeKPI(subset)
                    
                    if res[0] != -1:
                        each_subset_discharge_per_percentage_results.append(res[0])
                    if res[1] != -1:
                        each_subset_discharge_per_hour_results.append(res[1])

    if len(each_subset_discharge_per_percentage_results) != 0 and len(each_subset_discharge_per_hour_results) != 0:
        return (time.strftime("%M:%S", time.gmtime(sum(each_subset_discharge_per_percentage_results)/len(each_subset_discharge_per_percentage_results))), float("{0:.2f}".format(round(sum(each_subset_discharge_per_hour_results)/len(each_subset_discharge_per_hour_results), 2))))
    elif len(each_subset_discharge_per_percentage_results) != 0:
        return (time.strftime("%M:%S", time.gmtime(sum(each_subset_discharge_per_percentage_results)/len(each_subset_discharge_per_percentage_results))), -1)
    elif len(each_subset_discharge_per_hour_results) != 0:
        return (-1, float("{0:.2f}".format(round(sum(each_subset_discharge_per_hour_results)/len(each_subset_discharge_per_hour_results), 2))))
    else:
        return (-1, -1)










total_samples_processed = 0
samples_valid = 0
print('start')
for devID in devices_data.index:
    print(devID)
    a = samples_data.loc[(samples_data['device_id'] == devID) & (samples_data['battery_state'] == 'Discharging')].drop_duplicates(subset=['timestamp']).sort_values(by='timestamp').reset_index(drop=True)
    b = samples_data.loc[(samples_data['device_id'] == devID) & (samples_data['battery_state'].isin(['Charging', 'Full']))].drop_duplicates(subset=['timestamp']).sort_values(by='timestamp').reset_index(drop=True)
    
    total_samples_processed += len(a)
    total_samples_processed += len(b)
    
    charge_results = chargeKPIs(devID, 0.4)
    discharge_results = dischargeKPIs(devID, 0.4)
    

Unnamed: 0,device_id,timestamp,battery_state,battery_level
50,56,2017-10-10 14:42:43,Not charging,0.64
51,56,2017-10-10 14:58:12,Not charging,0.63
52,56,2017-10-10 15:25:10,Not charging,0.62
53,56,2017-10-10 15:52:40,Not charging,0.61
54,56,2017-10-10 16:20:41,Not charging,0.6
55,56,2017-10-10 16:23:19,Not charging,0.59
56,56,2017-10-10 16:47:49,Not charging,0.58
57,56,2017-10-10 17:08:44,Not charging,0.57
58,56,2017-10-10 18:08:34,Not charging,0.56
59,56,2017-10-10 18:16:59,Not charging,0.55


In [6]:
device_not_charging_samples = samples_data.loc[(samples_data['device_id'] == 56) & (samples_data['battery_state'] == 'Not charging')].drop_duplicates(subset=['timestamp']).sort_values(by='timestamp').reset_index(drop=True)

device_not_charging_samples[50:100]

Unnamed: 0,device_id,timestamp,battery_state,battery_level
50,56,2017-10-10 14:42:43,Not charging,0.64
51,56,2017-10-10 14:58:12,Not charging,0.63
52,56,2017-10-10 15:25:10,Not charging,0.62
53,56,2017-10-10 15:52:40,Not charging,0.61
54,56,2017-10-10 16:20:41,Not charging,0.6
55,56,2017-10-10 16:23:19,Not charging,0.59
56,56,2017-10-10 16:47:49,Not charging,0.58
57,56,2017-10-10 17:08:44,Not charging,0.57
58,56,2017-10-10 18:08:34,Not charging,0.56
59,56,2017-10-10 18:16:59,Not charging,0.55
