In [32]:
import pandas as pd
import ast
import pytz
import os
import json
import yaml
from dateutil import tz
from datetime import datetime
from IPython.display import display

pd.set_option('display.max_columns', None)

class BasePipeline:
    def __init__(self, config, base_file_path):
        self.config = config
        self.base_file_path = base_file_path
        self.map = {}
        self._data = pd.DataFrame({})
    
    def construct_file_path(self):
        #TODO: add type to prefix mapping
        file_prefix = 'per_outage' if self.config['type'] == 'o' else 'per_county'
        file_path = f"{self.base_file_path}/{self.config['state']}/layout_{self.config['layout']}/{file_prefix}_{self.config['name']}.csv"
        return file_path.replace('//', '/')

    def load_data(self):
        # Implementation for loading data
        try:
            file_path = self.construct_file_path()
            print(file_path)
            self._data = pd.read_csv(file_path)
            with open(f"{self.config['state']}_mapping.json", 'r') as json_file:
                self.map = json.load(json_file)
        except Exception as e:
            print(f"An error occurred during file loading: {e}")
            
    def transform(self):
        # Base transformation method
        raise NotImplementedError

    def standardize(self):
        # Base transformation method
        self.load_data()
        self.transform()
        grouped = self._data.groupby('outage_id').apply(self._compute_metrics).reset_index().round(2)
        self._data = pd.merge(grouped, self._data, on=['outage_id', 'timestamp'], how='inner')
        
        self._data['utility_provider'] = self.config['name']
        self._data['state'] = self.config['state']
        self._data['county'] = self._data['zipcode'].map(self.map)
        
        self._data = self._data[[
            'utility_provider', 'state', 'county', 'zipcode',
            'outage_id', 'start_time', 'end_time', 'lat', 'lng', 
            'duration', 'duration_max', 'duration_mean', 'customer_affected_mean', 'total_customer_outage_time', 'total_customer_outage_time_max', 'total_customer_outage_time_mean'
        ]]
        
        return self._data
    
    def output_data(self, standard_data):
        # TODO: Output unified data
        pass
    
    def get_dataframe(self):
        return self._data
    
    def _compute_metrics(self, group):
        duration = (group['end_time'] - group['start_time']).dt.total_seconds() / 60
        duration_max = duration + 15
        duration_mean = (duration + duration_max) / 2
        customer_affected_mean = group['customer_affected'].mean()
        
        total_customer_outage_time = 15 * (group['customer_affected'].sum() - group['customer_affected'].iloc[0]) + (group['timestamp'].iloc[0] - group['start_time'].iloc[0]).total_seconds() / 60 * group['customer_affected'].iloc[0]
        total_customer_outage_time_max = total_customer_outage_time + 15 * group['customer_affected'].iloc[-1]
        total_customer_outage_time_mean = (total_customer_outage_time + total_customer_outage_time_max) / 2

        return pd.Series({
            'timestamp': group['end_time'].iloc[-1],
            'duration': duration.iloc[-1],
            'duration_max': duration_max.iloc[-1],
            'duration_mean': duration_mean.iloc[-1],
            'customer_affected_mean': customer_affected_mean,
            'total_customer_outage_time': total_customer_outage_time,
            'total_customer_outage_time_max': total_customer_outage_time_max,
            'total_customer_outage_time_mean': total_customer_outage_time_mean
        })
        
    def _check_other_vars(self):
        # TODO: Check other useful variables
        pass


class GA1TX8(BasePipeline):
    def transform(self):
        try:
            # Convert timestamps
            eastern = tz.gettz('US/Eastern')
            utc = tz.gettz('UTC')
            self._data['timestamp'] = pd.to_datetime(self._data['timestamp'], utc=True).dt.tz_convert(eastern)
            self._data['outageStartTime'] = pd.to_datetime(self._data['outageStartTime'], utc=True).dt.tz_convert(eastern)
            self._data['end_time'] = self._data.groupby('outageRecID')['timestamp'].transform('max')
            
            # extract lat and long
            self._data['outagePoint'] = self._data['outagePoint'].apply(lambda x: json.loads(x.replace("'", '"')))
            self._data[['lat', 'lng']] = self._data['outagePoint'].apply(lambda x: pd.Series([x['lat'], x['lng']]))
            self._data = self._data.rename(columns={
                'outageRecID':'outage_id',
                'outageStartTime': 'start_time',
                'customersOutNow':'customer_affected',
                'zip':'zipcode'
            })
        except Exception as e:
            print(f"An error occurred during transformation: {e}")

In [37]:
with open('/Users/xuanedx1/github/outage-data-scraper/app/pipeline/config.yaml', 'r') as file:
    config = yaml.safe_load(file)
    base_file_path = config['globals']['local_base_file_path']

# Instantiate a BasePipeline object for each provider in the configuration
for provider in config['providers']:
    pipeline = GA1TX8(provider, base_file_path)
    pipeline.standardize()
    display(pipeline._data)


/Users/xuanedx1/github/outage-data-scraper/data/s3/tx/layout_8/per_outage_Bluebonnet Electric Coop, Inc..csv
An error occurred during file loading: [Errno 2] No such file or directory: 'tx_mapping.json'


Unnamed: 0,utility_provider,state,county,zipcode,outage_id,start_time,end_time,lat,lng,duration,duration_max,duration_mean,customer_affected_mean,total_customer_outage_time,total_customer_outage_time_max,total_customer_outage_time_mean
0,"Bluebonnet Electric Coop, Inc.",tx,,77633,2023-04-06-0284,2023-04-06 01:49:40-04:00,2023-04-06 10:54:48-04:00,30.111487,-96.422005,545.13,560.13,552.63,3.0,1635.40,1680.40,1657.90
1,"Bluebonnet Electric Coop, Inc.",tx,,77426,2023-04-06-0315,2023-04-06 02:55:33-04:00,2023-04-06 10:54:48-04:00,30.129897,-96.210223,479.25,494.25,486.75,2.0,958.50,988.50,973.50
2,"Bluebonnet Electric Coop, Inc.",tx,,77426,2023-04-06-0316,2023-04-06 02:51:17-04:00,2023-04-06 10:54:48-04:00,30.130576,-96.207740,483.52,498.52,491.02,1.0,483.52,498.52,491.02
3,"Bluebonnet Electric Coop, Inc.",tx,,77833,2023-04-06-0364,2023-04-06 05:13:24-04:00,2023-04-06 10:54:48-04:00,30.074719,-96.389438,341.40,356.40,348.90,1.0,341.40,356.40,348.90
4,"Bluebonnet Electric Coop, Inc.",tx,,77426,2023-04-06-0385,2023-04-06 06:08:23-04:00,2023-04-06 10:54:48-04:00,30.141756,-96.283367,286.42,301.42,293.92,1.0,286.42,301.42,293.92
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10929,"Bluebonnet Electric Coop, Inc.",tx,,78953,2024-02-02-0103,2024-02-02 21:39:20-05:00,2024-02-02 22:25:10-05:00,29.890107,-97.350345,45.83,60.83,53.33,1.0,45.83,60.83,53.33
10930,"Bluebonnet Electric Coop, Inc.",tx,,78616,2024-02-02-0104,2024-02-02 21:41:16-05:00,2024-02-02 22:25:10-05:00,29.903496,-97.505146,43.90,58.90,51.40,1.0,43.90,58.90,51.40
10931,"Bluebonnet Electric Coop, Inc.",tx,,78662,2024-02-02-0106,2024-02-02 21:50:09-05:00,2024-02-02 21:55:10-05:00,29.872914,-97.488201,5.02,20.02,12.52,1.0,5.02,20.02,12.52
10932,"Bluebonnet Electric Coop, Inc.",tx,,78616,2024-02-02-0108,2024-02-02 21:52:49-05:00,2024-02-02 22:25:10-05:00,29.903196,-97.505283,32.35,47.35,39.85,1.0,32.40,47.40,39.90


/Users/xuanedx1/github/outage-data-scraper/data/s3/tx/layout_8/per_outage_Bryan Texas Utilities.csv
An error occurred during file loading: [Errno 2] No such file or directory: 'tx_mapping.json'


  self._data = pd.read_csv(file_path)


Unnamed: 0,utility_provider,state,county,zipcode,outage_id,start_time,end_time,lat,lng,duration,duration_max,duration_mean,customer_affected_mean,total_customer_outage_time,total_customer_outage_time_max,total_customer_outage_time_mean
0,Bryan Texas Utilities,tx,,unknown,2023-04-05-0088,2023-04-05 20:26:49-04:00,2023-04-05 21:27:21-04:00,30.923442,-96.227286,60.53,75.53,68.03,1.0,60.53,75.53,68.03
1,Bryan Texas Utilities,tx,,77859,2023-04-13-0269,2023-04-13 08:45:53-04:00,2023-04-13 09:39:44-04:00,30.835858,-96.343101,53.85,68.85,61.35,1.0,53.85,68.85,61.35
2,Bryan Texas Utilities,tx,,77801,2023-04-13-0270,2023-04-13 08:54:48-04:00,2023-04-13 15:17:21-04:00,30.641701,-96.352474,382.55,397.55,390.05,1.0,809.95,824.95,817.45
3,Bryan Texas Utilities,tx,,77845,2023-04-13-0271,2023-04-13 09:30:22-04:00,2023-04-13 09:39:44-04:00,30.573765,-96.336857,9.37,24.37,16.87,1.0,9.37,24.37,16.87
4,Bryan Texas Utilities,tx,,77802,2023-04-13-0272,2023-04-13 09:35:15-04:00,2023-04-13 10:54:45-04:00,30.649198,-96.339935,79.50,94.50,87.00,1.0,79.48,94.48,86.98
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6456,Bryan Texas Utilities,tx,,77803,2024-02-02-0038,2024-02-02 15:10:19-05:00,2024-02-02 15:24:45-05:00,30.691140,-96.382729,14.43,29.43,21.93,1.0,14.43,29.43,21.93
6457,Bryan Texas Utilities,tx,,77845,2024-02-02-0039,2024-02-02 15:55:03-05:00,2024-02-02 17:09:46-05:00,30.478119,-96.240822,74.72,89.72,82.22,1.0,74.72,89.72,82.22
6458,Bryan Texas Utilities,tx,,77845,2024-02-02-0040,2024-02-02 17:13:42-05:00,2024-02-02 20:09:46-05:00,30.585755,-96.263427,176.07,191.07,183.57,6.0,1056.20,1146.20,1101.20
6459,Bryan Texas Utilities,tx,,77802,2024-02-02-0041,2024-02-02 18:15:34-05:00,2024-02-02 19:39:46-05:00,30.643411,-96.336992,84.20,99.20,91.70,3.0,252.45,297.45,274.95


/Users/xuanedx1/github/outage-data-scraper/data/s3/tx/layout_8/per_outage_Bowie-Cass Electric Coop, Inc..csv


  self._data = pd.read_csv(file_path)


An error occurred during file loading: [Errno 2] No such file or directory: 'tx_mapping.json'


Unnamed: 0,utility_provider,state,county,zipcode,outage_id,start_time,end_time,lat,lng,duration,duration_max,duration_mean,customer_affected_mean,total_customer_outage_time,total_customer_outage_time_max,total_customer_outage_time_mean
0,"Bowie-Cass Electric Coop, Inc.",tx,,75551,2023-04-05-0232,2023-04-05 10:04:00-04:00,2023-04-05 21:27:23-04:00,33.095000,-94.226800,683.38,698.38,690.88,1.0,683.38,698.38,690.88
1,"Bowie-Cass Electric Coop, Inc.",tx,,75551,2023-04-05-0235,2023-04-05 10:07:00-04:00,2023-04-05 21:27:23-04:00,33.029500,-94.185800,680.38,695.38,687.88,15.0,10205.75,10430.75,10318.25
2,"Bowie-Cass Electric Coop, Inc.",tx,,unknown,2023-04-05-0260,2023-04-05 10:17:31-04:00,2023-04-05 21:27:23-04:00,32.998569,-94.258361,669.87,684.87,677.37,3.0,2009.60,2054.60,2032.10
3,"Bowie-Cass Electric Coop, Inc.",tx,,75551,2023-04-05-0273,2023-04-05 10:21:37-04:00,2023-04-05 21:27:23-04:00,33.025400,-94.181000,665.77,680.77,673.27,1.0,665.77,680.77,673.27
4,"Bowie-Cass Electric Coop, Inc.",tx,,75551,2023-04-05-0280,2023-04-05 10:38:00-04:00,2023-04-05 21:27:23-04:00,33.010500,-94.162200,649.38,664.38,656.88,1.0,649.38,664.38,656.88
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8001,"Bowie-Cass Electric Coop, Inc.",tx,,75501,2024-02-02-0058,2024-02-02 18:40:47-05:00,2024-02-02 21:24:52-05:00,33.411900,-94.104400,164.08,179.08,171.58,1.0,164.08,179.08,171.58
8002,"Bowie-Cass Electric Coop, Inc.",tx,,75455,2024-02-02-0059,2024-02-02 19:24:38-05:00,2024-02-02 21:24:52-05:00,33.128596,-94.856394,120.23,135.23,127.73,1.0,120.23,135.23,127.73
8003,"Bowie-Cass Electric Coop, Inc.",tx,,75668,2024-02-02-0060,2024-02-02 19:31:32-05:00,2024-02-02 21:24:52-05:00,32.942926,-94.706891,113.33,128.33,120.83,1.0,113.33,128.33,120.83
8004,"Bowie-Cass Electric Coop, Inc.",tx,,75563,2024-02-02-0061,2024-02-02 20:36:56-05:00,2024-02-02 21:24:52-05:00,33.151300,-94.522900,47.93,62.93,55.43,1.0,47.93,62.93,55.43


/Users/xuanedx1/github/outage-data-scraper/data/s3/tx/layout_8/per_outage_College Station Utilities.csv
An error occurred during file loading: [Errno 2] No such file or directory: 'tx_mapping.json'


Unnamed: 0,utility_provider,state,county,zipcode,outage_id,start_time,end_time,lat,lng,duration,duration_max,duration_mean,customer_affected_mean,total_customer_outage_time,total_customer_outage_time_max,total_customer_outage_time_mean
0,College Station Utilities,tx,,77802,2023-04-11-0067,2023-04-11 01:23:43-04:00,2023-04-11 01:24:45-04:00,30.644772,-96.294298,1.03,16.03,8.53,46.0,47.53,737.53,392.53
1,College Station Utilities,tx,,77845,2023-04-13-0093,2023-04-13 09:23:27-04:00,2023-04-13 09:39:46-04:00,30.596959,-96.305818,16.32,31.32,23.82,1.0,16.32,31.32,23.82
2,College Station Utilities,tx,,77845,2023-04-13-0094,2023-04-13 09:26:19-04:00,2023-04-13 09:39:46-04:00,30.589064,-96.294904,13.45,28.45,20.95,8.0,107.60,227.60,167.60
3,College Station Utilities,tx,,77845,2023-04-13-0095,2023-04-13 10:51:53-04:00,2023-04-13 11:09:47-04:00,30.590618,-96.291173,17.90,32.90,25.40,9.0,52.50,52.50,52.50
4,College Station Utilities,tx,,77845,2023-04-13-0096,2023-04-13 10:51:53-04:00,2023-04-13 12:15:29-04:00,30.590874,-96.290764,83.60,98.60,91.10,10.0,1529.00,1679.00,1604.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1212,College Station Utilities,tx,,77845,2024-02-01-0001,2024-02-01 07:36:59-05:00,2024-02-01 08:09:47-05:00,30.584113,-96.311309,32.80,47.80,40.30,8.0,262.40,382.40,322.40
1213,College Station Utilities,tx,,77840,2024-02-01-0002,2024-02-01 09:47:40-05:00,2024-02-01 10:24:50-05:00,30.604895,-96.315026,37.17,52.17,44.67,3.0,111.35,156.35,133.85
1214,College Station Utilities,tx,,77845,2024-02-02-0005,2024-02-02 10:50:07-05:00,2024-02-02 10:54:50-05:00,30.559510,-96.227885,4.72,19.72,12.22,8.0,37.73,157.73,97.73
1215,College Station Utilities,tx,,77840,2024-02-02-0007,2024-02-02 15:45:21-05:00,2024-02-02 16:54:49-05:00,30.601891,-96.317185,69.47,84.47,76.97,3.2,217.87,217.87,217.87


/Users/xuanedx1/github/outage-data-scraper/data/s3/tx/layout_8/per_outage_Farmers Electric Coop, Inc..csv
An error occurred during file loading: [Errno 2] No such file or directory: 'tx_mapping.json'


  self._data = pd.read_csv(file_path)


Unnamed: 0,utility_provider,state,county,zipcode,outage_id,start_time,end_time,lat,lng,duration,duration_max,duration_mean,customer_affected_mean,total_customer_outage_time,total_customer_outage_time_max,total_customer_outage_time_mean
0,"Farmers Electric Coop, Inc.",tx,,75126,2023-04-05-0187,2023-04-05 15:08:55-04:00,2023-04-05 21:27:34-04:00,32.776863,-96.426858,378.65,393.65,386.15,48.00,18175.20,18895.20,18535.20
1,"Farmers Electric Coop, Inc.",tx,,75431,2023-04-13-0399,2023-04-13 07:11:44-04:00,2023-04-13 09:09:48-04:00,33.067477,-95.447090,118.07,133.07,125.57,1.00,118.03,133.03,125.53
2,"Farmers Electric Coop, Inc.",tx,,unknown,2023-04-13-0401,2023-04-13 09:46:38-04:00,2023-04-13 10:09:51-04:00,33.250415,-95.756933,23.22,38.22,30.72,2.00,46.40,76.40,61.40
3,"Farmers Electric Coop, Inc.",tx,,75440,2023-04-13-0406,2023-04-13 11:05:36-04:00,2023-04-13 12:43:19-04:00,32.878502,-95.830348,97.72,112.72,105.22,4.00,857.20,917.20,887.20
4,"Farmers Electric Coop, Inc.",tx,,unknown,2023-04-13-0407,2023-04-13 11:15:18-04:00,2023-04-13 11:24:55-04:00,32.774461,-96.491332,9.62,24.62,17.12,6.00,57.70,147.70,102.70
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7908,"Farmers Electric Coop, Inc.",tx,,75402,2024-02-02-0089,2024-02-02 16:21:02-05:00,2024-02-02 16:57:05-05:00,33.052449,-96.015269,36.05,51.05,43.55,2.00,72.03,102.03,87.03
7909,"Farmers Electric Coop, Inc.",tx,,75442,2024-02-02-0091,2024-02-02 16:44:08-05:00,2024-02-02 17:57:03-05:00,33.133711,-96.377714,72.92,87.92,80.42,1.00,72.95,87.95,80.45
7910,"Farmers Electric Coop, Inc.",tx,,75098,2024-02-02-0094,2024-02-02 18:23:18-05:00,2024-02-02 20:42:03-05:00,33.046531,-96.567695,138.75,153.75,146.25,78.78,10964.15,11159.15,11061.65
7911,"Farmers Electric Coop, Inc.",tx,,75098,2024-02-02-0097,2024-02-02 18:23:18-05:00,2024-02-02 21:25:07-05:00,33.046452,-96.566463,181.82,196.82,189.32,89.00,16181.68,17516.68,16849.18


/Users/xuanedx1/github/outage-data-scraper/data/s3/tx/layout_8/per_outage_Guadalupe Valley Electric Coop, Inc..csv
An error occurred during file loading: [Errno 2] No such file or directory: 'tx_mapping.json'


  self._data = pd.read_csv(file_path)


Unnamed: 0,utility_provider,state,county,zipcode,outage_id,start_time,end_time,lat,lng,duration,duration_max,duration_mean,customer_affected_mean,total_customer_outage_time,total_customer_outage_time_max,total_customer_outage_time_mean
0,"Guadalupe Valley Electric Coop, Inc.",tx,,77995,2023-04-05-0220,2023-04-05 20:03:53-04:00,2023-04-05 21:27:35-04:00,29.208372,-97.117103,83.70,98.70,91.20,17.0,1422.90,1677.90,1550.40
1,"Guadalupe Valley Electric Coop, Inc.",tx,,77995,2023-04-05-0221,2023-04-05 20:05:28-04:00,2023-04-05 21:27:35-04:00,29.228264,-97.116361,82.12,97.12,89.62,3.0,246.35,291.35,268.85
2,"Guadalupe Valley Electric Coop, Inc.",tx,,77995,2023-04-05-0223,2023-04-05 20:07:40-04:00,2023-04-05 21:27:35-04:00,29.191775,-97.068438,79.92,94.92,87.42,2.0,159.83,189.83,174.83
3,"Guadalupe Valley Electric Coop, Inc.",tx,,78154,2023-04-05-0224,2023-04-05 20:20:36-04:00,2023-04-05 21:27:35-04:00,29.591483,-98.270095,66.98,81.98,74.48,1.0,66.98,81.98,74.48
4,"Guadalupe Valley Electric Coop, Inc.",tx,,unknown,2023-04-05-0226,2023-04-05 20:20:54-04:00,2023-04-05 21:27:35-04:00,29.230669,-97.111006,66.68,81.68,74.18,1.0,66.68,81.68,74.18
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4569,"Guadalupe Valley Electric Coop, Inc.",tx,,unknown,2024-02-01-0052,2024-02-01 15:55:31-05:00,2024-02-01 17:09:56-05:00,29.069146,-97.045621,74.42,89.42,81.92,37.0,2754.65,3309.65,3032.15
4570,"Guadalupe Valley Electric Coop, Inc.",tx,,78164,2024-02-02-0077,2024-02-02 08:37:56-05:00,2024-02-02 09:09:53-05:00,29.045089,-97.485442,31.95,46.95,39.45,6.0,191.70,281.70,236.70
4571,"Guadalupe Valley Electric Coop, Inc.",tx,,unknown,2024-02-02-0101,2024-02-02 14:31:27-05:00,2024-02-02 16:27:10-05:00,28.930609,-97.588939,115.72,130.72,123.22,1.0,115.67,130.67,123.17
4572,"Guadalupe Valley Electric Coop, Inc.",tx,,78108,2024-02-02-0106,2024-02-02 16:35:38-05:00,2024-02-02 18:27:08-05:00,29.603157,-98.241892,111.50,126.50,119.00,3.0,334.55,379.55,357.05


/Users/xuanedx1/github/outage-data-scraper/data/s3/tx/layout_8/per_outage_Mid-South Electric Coop Association.csv
An error occurred during file loading: [Errno 2] No such file or directory: 'tx_mapping.json'


  self._data = pd.read_csv(file_path)


Unnamed: 0,utility_provider,state,county,zipcode,outage_id,start_time,end_time,lat,lng,duration,duration_max,duration_mean,customer_affected_mean,total_customer_outage_time,total_customer_outage_time_max,total_customer_outage_time_mean
0,Mid-South Electric Coop Association,tx,,unknown,2023-04-06-0384,2023-04-06 13:35:03-04:00,2023-04-06 21:25:17-04:00,30.426111,-95.933718,470.23,485.23,477.73,3.0,1410.70,1455.70,1433.20
1,Mid-South Electric Coop Association,tx,,unknown,2023-04-06-0402,2023-04-06 15:12:37-04:00,2023-04-06 21:25:17-04:00,30.406909,-95.943087,372.67,387.67,380.17,7.0,2608.67,2713.67,2661.17
2,Mid-South Electric Coop Association,tx,,unknown,2023-04-06-0417,2023-04-06 16:57:43-04:00,2023-04-06 21:25:17-04:00,30.500683,-95.634975,267.57,282.57,275.07,61.0,16321.57,17236.57,16779.07
3,Mid-South Electric Coop Association,tx,,77868,2023-04-06-0434,2023-04-06 17:49:38-04:00,2023-04-06 21:25:17-04:00,30.372556,-96.034908,215.65,230.65,223.15,1.0,215.65,230.65,223.15
4,Mid-South Electric Coop Association,tx,,77320,2023-04-06-0463,2023-04-06 19:51:40-04:00,2023-04-06 21:25:17-04:00,30.760431,-95.574893,93.62,108.62,101.12,3.0,280.85,325.85,303.35
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5072,Mid-South Electric Coop Association,tx,,77830,2024-02-02-0031,2024-02-02 10:26:04-05:00,2024-02-02 14:57:10-05:00,30.433436,-95.899287,271.10,286.10,278.60,1.0,269.08,284.08,276.58
5073,Mid-South Electric Coop Association,tx,,77304,2024-02-02-0034,2024-02-02 13:19:47-05:00,2024-02-02 16:27:14-05:00,30.336648,-95.553184,187.45,202.45,194.95,1.0,187.42,202.42,194.92
5074,Mid-South Electric Coop Association,tx,,77356,2024-02-02-0037,2024-02-02 17:50:13-05:00,2024-02-02 17:57:11-05:00,30.372450,-95.628873,6.97,21.97,14.47,3.0,20.90,65.90,43.40
5075,Mid-South Electric Coop Association,tx,,77320,2024-02-02-0038,2024-02-02 18:58:38-05:00,2024-02-02 19:42:12-05:00,30.784514,-95.572183,43.57,58.57,51.07,2.0,87.10,117.10,102.10


/Users/xuanedx1/github/outage-data-scraper/data/s3/tx/layout_8/per_outage_New Braunfels Utilities.csv
An error occurred during file loading: [Errno 2] No such file or directory: 'tx_mapping.json'


Unnamed: 0,utility_provider,state,county,zipcode,outage_id,start_time,end_time,lat,lng,duration,duration_max,duration_mean,customer_affected_mean,total_customer_outage_time,total_customer_outage_time_max,total_customer_outage_time_mean
0,New Braunfels Utilities,tx,,78130,2023-04-10-0088,2023-04-10 12:00:58-04:00,2023-04-10 15:09:47-04:00,29.677340,-98.046842,188.82,203.82,196.32,1.0,188.82,203.82,196.32
1,New Braunfels Utilities,tx,,unknown,2023-04-13-0132,2023-04-13 09:49:14-04:00,2023-04-13 10:24:51-04:00,29.663844,-98.050521,35.62,50.62,43.12,1197.0,42593.25,60548.25,51570.75
2,New Braunfels Utilities,tx,,unknown,2023-04-13-0133,2023-04-13 09:49:14-04:00,2023-04-13 10:24:51-04:00,29.699468,-98.022335,35.62,50.62,43.12,968.0,34444.67,48964.67,41704.67
3,New Braunfels Utilities,tx,,78130,2023-04-13-0134,2023-04-13 09:49:14-04:00,2023-04-13 10:24:51-04:00,29.684255,-98.059658,35.62,50.62,43.12,1970.0,70099.17,99649.17,84874.17
4,New Braunfels Utilities,tx,,78130,2023-04-13-0144,2023-04-13 10:09:39-04:00,2023-04-13 12:32:24-04:00,29.702254,-98.101071,142.75,157.75,150.25,2.0,480.40,510.40,495.40
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
860,New Braunfels Utilities,tx,,78132,2024-02-01-0011,2024-02-01 15:21:33-05:00,2024-02-01 16:09:48-05:00,29.764354,-98.211396,48.25,63.25,55.75,2.0,96.50,126.50,111.50
861,New Braunfels Utilities,tx,,78130,2024-02-01-0012,2024-02-01 22:12:13-05:00,2024-02-01 22:54:49-05:00,29.706131,-98.139039,42.60,57.60,50.10,4.0,170.40,230.40,200.40
862,New Braunfels Utilities,tx,,78130,2024-02-02-0018,2024-02-02 09:54:10-05:00,2024-02-02 14:54:51-05:00,29.736173,-98.108692,300.68,315.68,308.18,1.0,300.68,315.68,308.18
863,New Braunfels Utilities,tx,,78132,2024-02-02-0020,2024-02-02 10:29:10-05:00,2024-02-02 10:54:52-05:00,29.741628,-98.112126,25.70,40.70,33.20,1.0,25.70,40.70,33.20


/Users/xuanedx1/github/outage-data-scraper/data/s3/tx/layout_8/per_outage_North Plains Electric Coop, Inc..csv
An error occurred during file loading: [Errno 2] No such file or directory: 'tx_mapping.json'


  self._data = pd.read_csv(file_path)


Unnamed: 0,utility_provider,state,county,zipcode,outage_id,start_time,end_time,lat,lng,duration,duration_max,duration_mean,customer_affected_mean,total_customer_outage_time,total_customer_outage_time_max,total_customer_outage_time_mean
0,"North Plains Electric Coop, Inc.",tx,,79093,2023-04-05-0005,2023-04-05 19:27:00-04:00,2023-04-05 21:27:39-04:00,36.130189,-101.054489,120.65,135.65,128.15,1.00,120.65,135.65,128.15
1,"North Plains Electric Coop, Inc.",tx,,unknown,2023-04-14-0009,2023-04-14 17:20:00-04:00,2023-04-17 08:09:49-04:00,35.974462,-101.285244,3769.82,3784.82,3777.32,0.07,259.87,259.87,259.87
2,"North Plains Electric Coop, Inc.",tx,,79081,2023-04-14-0010,2023-04-14 17:29:00-04:00,2023-04-17 08:24:49-04:00,36.242258,-101.336603,3775.82,3790.82,3783.32,0.02,70.92,70.92,70.92
3,"North Plains Electric Coop, Inc.",tx,,unknown,2023-04-15-0011,2023-04-15 10:30:00-04:00,2023-04-17 08:39:51-04:00,36.240510,-100.980723,2769.85,2784.85,2777.35,0.03,69.87,69.87,69.87
4,"North Plains Electric Coop, Inc.",tx,,unknown,2023-04-16-0012,2023-04-16 20:09:00-04:00,2023-04-17 08:39:51-04:00,36.034593,-100.288466,750.85,765.85,758.35,0.30,225.83,225.83,225.83
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
808,"North Plains Electric Coop, Inc.",tx,,79033,2024-01-29-0074,2024-01-29 05:51:00-05:00,2024-01-29 06:09:55-05:00,36.317772,-100.894180,18.92,33.92,26.42,1.00,18.95,33.95,26.45
809,"North Plains Electric Coop, Inc.",tx,,unknown,2024-01-29-0075,2024-01-29 05:53:00-05:00,2024-01-29 10:40:02-05:00,36.101762,-100.707937,287.03,302.03,294.53,1.68,487.67,487.67,487.67
810,"North Plains Electric Coop, Inc.",tx,,unknown,2024-01-29-0076,2024-01-29 06:48:00-05:00,2024-01-29 10:55:12-05:00,36.249363,-101.552342,247.20,262.20,254.70,0.53,126.93,126.93,126.93
811,"North Plains Electric Coop, Inc.",tx,,unknown,2024-01-31-0077,2024-01-31 15:03:56-05:00,2024-01-31 15:10:18-05:00,36.290955,-100.751730,6.37,21.37,13.87,2.00,12.73,42.73,27.73


/Users/xuanedx1/github/outage-data-scraper/data/s3/tx/layout_8/per_outage_Nueces Electric Coop, Inc..csv


  self._data = pd.read_csv(file_path)


An error occurred during file loading: [Errno 2] No such file or directory: 'tx_mapping.json'


Unnamed: 0,utility_provider,state,county,zipcode,outage_id,start_time,end_time,lat,lng,duration,duration_max,duration_mean,customer_affected_mean,total_customer_outage_time,total_customer_outage_time_max,total_customer_outage_time_mean
0,"Nueces Electric Coop, Inc.",tx,,78379,2023-04-07-0462,2023-04-07 16:31:39-04:00,2023-04-07 20:54:50-04:00,27.296416,-97.813700,263.18,278.18,270.68,1.0,263.18,278.18,270.68
1,"Nueces Electric Coop, Inc.",tx,,78332,2023-04-07-0468,2023-04-07 17:35:45-04:00,2023-04-07 20:54:50-04:00,27.746860,-97.950783,199.08,214.08,206.58,6.0,1194.50,1284.50,1239.50
2,"Nueces Electric Coop, Inc.",tx,,78363,2023-04-07-0471,2023-04-07 18:00:40-04:00,2023-04-07 20:54:50-04:00,27.417603,-97.858746,174.17,189.17,181.67,6.0,1045.00,1135.00,1090.00
3,"Nueces Electric Coop, Inc.",tx,,78341,2023-04-07-0479,2023-04-07 20:01:56-04:00,2023-04-07 20:54:50-04:00,27.695209,-98.494693,52.90,67.90,60.40,84.0,4443.60,5703.60,5073.60
4,"Nueces Electric Coop, Inc.",tx,,unknown,2023-04-07-0483,2023-04-07 20:13:58-04:00,2023-04-07 20:54:50-04:00,27.996953,-98.357433,40.87,55.87,48.37,1.0,40.87,55.87,48.37
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3747,"Nueces Electric Coop, Inc.",tx,,78380,2024-02-01-0014,2024-02-01 14:41:52-05:00,2024-02-01 17:25:04-05:00,27.772587,-97.812809,163.20,178.20,170.70,1.0,163.27,178.27,170.77
3748,"Nueces Electric Coop, Inc.",tx,,unknown,2024-02-01-0017,2024-02-01 17:23:28-05:00,2024-02-02 08:40:01-05:00,28.063337,-98.139595,916.55,931.55,924.05,2.8,2594.77,2594.77,2594.77
3749,"Nueces Electric Coop, Inc.",tx,,unknown,2024-02-02-0018,2024-02-02 09:31:05-05:00,2024-02-02 12:57:22-05:00,27.960272,-98.047275,206.28,221.28,213.78,1.0,204.03,219.03,211.53
3750,"Nueces Electric Coop, Inc.",tx,,78384,2024-02-02-0023,2024-02-02 16:09:04-05:00,2024-02-02 16:42:14-05:00,27.769132,-98.254747,33.17,48.17,40.67,2.0,66.33,96.33,81.33


/Users/xuanedx1/github/outage-data-scraper/data/s3/tx/layout_8/per_outage_Taylor Electric Coop, Inc..csv
An error occurred during file loading: [Errno 2] No such file or directory: 'tx_mapping.json'


Unnamed: 0,utility_provider,state,county,zipcode,outage_id,start_time,end_time,lat,lng,duration,duration_max,duration_mean,customer_affected_mean,total_customer_outage_time,total_customer_outage_time_max,total_customer_outage_time_mean
0,"Taylor Electric Coop, Inc.",tx,,79536,2023-04-12-0042,2023-04-12 08:38:37-04:00,2023-04-12 08:39:51-04:00,32.500945,-99.965394,1.23,16.23,8.73,1.0,1.23,16.23,8.73
1,"Taylor Electric Coop, Inc.",tx,,79536,2023-04-13-0044,2023-04-13 15:28:08-04:00,2023-04-13 15:39:56-04:00,32.500945,-99.965394,11.80,26.80,19.30,1.0,19.35,34.35,26.85
2,"Taylor Electric Coop, Inc.",tx,,79536,2023-04-13-0045,2023-04-13 15:50:59-04:00,2023-04-13 16:02:55-04:00,32.500945,-99.965394,11.93,26.93,19.43,1.0,33.93,48.93,41.43
3,"Taylor Electric Coop, Inc.",tx,,79536,2023-04-13-0046,2023-04-13 15:53:10-04:00,2023-04-13 16:02:55-04:00,32.521595,-99.970467,9.75,24.75,17.25,2.0,63.50,93.50,78.50
4,"Taylor Electric Coop, Inc.",tx,,79536,2023-04-13-0047,2023-04-13 16:50:32-04:00,2023-04-13 17:13:00-04:00,32.500945,-99.965394,22.47,37.47,29.97,1.0,64.28,79.28,71.78
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1699,"Taylor Electric Coop, Inc.",tx,,79602,2024-02-02-0014,2024-02-02 20:26:00-05:00,2024-02-02 20:57:33-05:00,32.273665,-99.573499,31.55,46.55,39.05,25.0,790.42,1165.42,977.92
1700,"Taylor Electric Coop, Inc.",tx,,unknown,2024-02-02-0015,2024-02-02 20:27:00-05:00,2024-02-02 22:25:29-05:00,32.266347,-99.471563,118.48,133.48,125.98,1.0,120.62,135.62,128.12
1701,"Taylor Electric Coop, Inc.",tx,,unknown,2024-02-02-0016,2024-02-02 20:32:00-05:00,2024-02-02 21:25:43-05:00,32.151788,-99.512645,53.72,68.72,61.22,37.0,2057.82,2612.82,2335.32
1702,"Taylor Electric Coop, Inc.",tx,,79602,2024-02-02-0017,2024-02-02 20:32:00-05:00,2024-02-02 20:57:33-05:00,32.251032,-99.585578,25.55,40.55,33.05,9.0,230.55,365.55,298.05


/Users/xuanedx1/github/outage-data-scraper/data/s3/tx/layout_8/per_outage_Trinity Valley Electric Coop, Inc..csv
An error occurred during file loading: [Errno 2] No such file or directory: 'tx_mapping.json'


Unnamed: 0,utility_provider,state,county,zipcode,outage_id,start_time,end_time,lat,lng,duration,duration_max,duration_mean,customer_affected_mean,total_customer_outage_time,total_customer_outage_time_max,total_customer_outage_time_mean
0,"Trinity Valley Electric Coop, Inc.",tx,,75763,2023-04-11-0401,2023-04-11 17:58:59-04:00,2023-04-11 18:09:52-04:00,32.076880,-95.511193,10.88,25.88,18.38,2.00,21.77,51.77,36.77
1,"Trinity Valley Electric Coop, Inc.",tx,,75758,2023-04-13-0453,2023-04-13 11:05:52-04:00,2023-04-13 11:09:59-04:00,32.217910,-95.505367,4.12,19.12,11.62,3.00,12.35,57.35,34.85
2,"Trinity Valley Electric Coop, Inc.",tx,,75169,2023-04-13-0454,2023-04-13 11:05:00-04:00,2023-04-13 11:09:59-04:00,32.862322,-96.044870,4.98,19.98,12.48,2.00,9.97,39.97,24.97
3,"Trinity Valley Electric Coop, Inc.",tx,,unknown,2023-04-13-0455,2023-04-13 11:13:00-04:00,2023-04-13 11:25:00-04:00,31.861947,-95.831848,12.00,27.00,19.50,1.00,12.00,27.00,19.50
4,"Trinity Valley Electric Coop, Inc.",tx,,75159,2023-10-18-0676,2023-10-18 21:27:10-04:00,2023-10-18 21:54:58-04:00,32.596015,-96.544277,27.80,42.80,35.30,2.00,55.63,85.63,70.63
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1988,"Trinity Valley Electric Coop, Inc.",tx,,unknown,2024-02-02-0010,2024-02-02 14:04:43-05:00,2024-02-02 14:12:39-05:00,32.477779,-95.842087,7.93,22.93,15.43,728.00,5775.47,16695.47,11235.47
1989,"Trinity Valley Electric Coop, Inc.",tx,,75103,2024-02-02-0021,2024-02-02 14:42:00-05:00,2024-02-02 14:57:38-05:00,32.527764,-95.860944,15.63,30.63,23.13,5.00,78.17,153.17,115.67
1990,"Trinity Valley Electric Coop, Inc.",tx,,unknown,2024-02-02-0022,2024-02-02 19:19:08-05:00,2024-02-02 20:12:38-05:00,31.662225,-95.716628,53.50,68.50,61.00,2.00,107.07,137.07,122.07
1991,"Trinity Valley Electric Coop, Inc.",tx,,75117,2024-02-02-0026,2024-02-02 20:54:08-05:00,2024-02-02 21:25:49-05:00,32.610176,-95.872362,31.68,46.68,39.18,2.33,93.53,138.53,116.03


/Users/xuanedx1/github/outage-data-scraper/data/s3/tx/layout_8/per_outage_Wood County Electric Coop, Inc..csv
An error occurred during file loading: [Errno 2] No such file or directory: 'tx_mapping.json'


Unnamed: 0,utility_provider,state,county,zipcode,outage_id,start_time,end_time,lat,lng,duration,duration_max,duration_mean,customer_affected_mean,total_customer_outage_time,total_customer_outage_time_max,total_customer_outage_time_mean
0,"Wood County Electric Coop, Inc.",tx,,75497,2023-04-05-0239,2023-04-05 23:30:04-04:00,2023-04-06 01:40:10-04:00,32.895604,-95.510205,130.10,145.10,137.60,2.0,260.20,290.20,275.20
1,"Wood County Electric Coop, Inc.",tx,,75457,2023-04-13-0455,2023-04-13 09:26:28-04:00,2023-04-13 09:54:55-04:00,33.151040,-95.152653,28.45,43.45,35.95,1.0,28.42,43.42,35.92
2,"Wood County Electric Coop, Inc.",tx,,75773,2023-04-13-0456,2023-04-13 10:07:09-04:00,2023-04-13 10:39:59-04:00,32.715167,-95.366578,32.83,47.83,40.33,2.0,65.60,95.60,80.60
3,"Wood County Electric Coop, Inc.",tx,,75706,2023-04-13-0459,2023-04-13 10:37:22-04:00,2023-04-13 10:39:59-04:00,32.513062,-95.261032,2.62,17.62,10.12,1.0,2.62,17.62,10.12
4,"Wood County Electric Coop, Inc.",tx,,75790,2023-04-13-0461,2023-04-13 11:13:39-04:00,2023-04-13 11:25:01-04:00,32.497511,-95.644615,11.37,26.37,18.87,1.0,11.37,26.37,18.87
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6537,"Wood County Electric Coop, Inc.",tx,,unknown,2024-02-02-0035,2024-02-02 13:27:54-05:00,2024-02-02 13:42:40-05:00,32.505452,-95.560784,14.77,29.77,22.27,1.0,14.77,29.77,22.27
6538,"Wood County Electric Coop, Inc.",tx,,75773,2024-02-02-0036,2024-02-02 13:37:48-05:00,2024-02-02 13:42:40-05:00,32.715294,-95.562928,4.87,19.87,12.37,1.0,4.87,19.87,12.37
6539,"Wood County Electric Coop, Inc.",tx,,75773,2024-02-02-0037,2024-02-02 13:37:48-05:00,2024-02-02 14:12:41-05:00,32.715294,-95.562928,34.88,49.88,42.38,1.0,34.85,49.85,42.35
6540,"Wood County Electric Coop, Inc.",tx,,unknown,2024-02-02-0038,2024-02-02 14:57:14-05:00,2024-02-02 15:12:39-05:00,32.532015,-95.550032,15.42,30.42,22.92,13.0,200.42,395.42,297.92


------
**dev**

In [173]:
df = pd.read_csv('/Users/xuanedx1/github/outage-data-scraper/data/s3/ga/layout_1/per_outage_Amicalola EMC.csv')

  df = pd.read_csv('/Users/xuanedx1/github/outage-data-scraper/data/s3/ga/layout_1/per_outage_Amicalola EMC.csv')


In [174]:
df.head()

Unnamed: 0,outageRecID,outageName,outagePoint,outageStartTime,estimatedTimeOfRestoral,outageEndTime,verified,cause,crewAssigned,customersOutInitially,customersOutNow,customersRestored,streetsAffected,outageModifiedTime,outageWorkStatus,timestamp,zip,EMC
0,2023-04-13-1040,2023-04-13-1040,"{'lat': 34.812996172514474, 'lng': -84.6029181...",2023-04-13T15:44:42-04:00,,,False,,False,2,2,0,"['COHUTTA FOREST RD 145', 'OLD CCC CAMP RD 112...",2023-04-13T15:44:59-04:00,,04-13-2023 19:50:53,30705,Amicalola EMC
1,2023-04-13-1040,2023-04-13-1040,"{'lat': 34.812996172514474, 'lng': -84.6029181...",2023-04-13T15:44:42-04:00,,,False,,False,2,2,0,"['COHUTTA FOREST RD 145', 'OLD CCC CAMP RD 112...",2023-04-13T15:44:59-04:00,,04-13-2023 19:54:01,30705,Amicalola EMC
2,2023-04-13-1041,2023-04-13-1041,"{'lat': 34.65504147851155, 'lng': -84.25847692...",2023-04-13T15:47:48-04:00,,,False,,False,1,1,0,['ROCKWATER RD 215'],2023-04-13T15:48:10-04:00,,04-13-2023 19:54:01,30536,Amicalola EMC
3,2023-04-13-1040,2023-04-13-1040,"{'lat': 34.812996172514474, 'lng': -84.6029181...",2023-04-13T15:44:42-04:00,,,False,,False,2,2,0,"['COHUTTA FOREST RD 145', 'OLD CCC CAMP RD 112...",2023-04-13T15:44:59-04:00,,04-13-2023 19:58:02,30705,Amicalola EMC
4,2023-04-13-1041,2023-04-13-1041,"{'lat': 34.65504147851155, 'lng': -84.25847692...",2023-04-13T15:47:48-04:00,,,False,,False,1,1,0,['ROCKWATER RD 215'],2023-04-13T15:48:10-04:00,,04-13-2023 19:58:02,30536,Amicalola EMC


In [175]:
df.isna().sum()

outageRecID                     0
outageName                      0
outagePoint                     0
outageStartTime                 0
estimatedTimeOfRestoral    247410
outageEndTime              244075
verified                        0
cause                      246589
crewAssigned                    0
customersOutInitially           0
customersOutNow                 0
customersRestored               0
streetsAffected                 0
outageModifiedTime              0
outageWorkStatus           247379
timestamp                       0
zip                             0
EMC                             0
dtype: int64

In [176]:
len(df[df['outageRecID'] != df['outageName']])

0

In [177]:
len(df[df['customersOutInitially'] < df['customersOutNow']])

13

In [178]:
len(df[df['customersOutInitially'] == df['customersRestored']])

2673

In [179]:
def transform(df):
    # Convert timestamps
    eastern = tz.gettz('US/Eastern')
    utc = tz.gettz('UTC')
    df['timestamp'] = pd.to_datetime(df['timestamp'], utc=True).dt.tz_convert(eastern)
    df['outageStartTime'] = pd.to_datetime(df['outageStartTime'], utc=True).dt.tz_convert(eastern)
    df['end_time'] = df.groupby('outageRecID')['timestamp'].transform('max')
    
    # extract lat and long
    df['outagePoint'] = df['outagePoint'].apply(lambda x: json.loads(x.replace("'", '"')))
    df[['lat', 'lng']] = df['outagePoint'].apply(lambda x: pd.Series([x['lat'], x['lng']]))
    
    return df

In [180]:
df = transform(df)

In [181]:
df

Unnamed: 0,outageRecID,outageName,outagePoint,outageStartTime,estimatedTimeOfRestoral,outageEndTime,verified,cause,crewAssigned,customersOutInitially,customersOutNow,customersRestored,streetsAffected,outageModifiedTime,outageWorkStatus,timestamp,zip,EMC,end_time,lat,lng
0,2023-04-13-1040,2023-04-13-1040,"{'lat': 34.812996172514474, 'lng': -84.6029181...",2023-04-13 15:44:42-04:00,,,False,,False,2,2,0,"['COHUTTA FOREST RD 145', 'OLD CCC CAMP RD 112...",2023-04-13T15:44:59-04:00,,2023-04-13 15:50:53-04:00,30705,Amicalola EMC,2023-04-13 19:58:21-04:00,34.812996,-84.602918
1,2023-04-13-1040,2023-04-13-1040,"{'lat': 34.812996172514474, 'lng': -84.6029181...",2023-04-13 15:44:42-04:00,,,False,,False,2,2,0,"['COHUTTA FOREST RD 145', 'OLD CCC CAMP RD 112...",2023-04-13T15:44:59-04:00,,2023-04-13 15:54:01-04:00,30705,Amicalola EMC,2023-04-13 19:58:21-04:00,34.812996,-84.602918
2,2023-04-13-1041,2023-04-13-1041,"{'lat': 34.65504147851155, 'lng': -84.25847692...",2023-04-13 15:47:48-04:00,,,False,,False,1,1,0,['ROCKWATER RD 215'],2023-04-13T15:48:10-04:00,,2023-04-13 15:54:01-04:00,30536,Amicalola EMC,2023-04-13 17:24:17-04:00,34.655041,-84.258477
3,2023-04-13-1040,2023-04-13-1040,"{'lat': 34.812996172514474, 'lng': -84.6029181...",2023-04-13 15:44:42-04:00,,,False,,False,2,2,0,"['COHUTTA FOREST RD 145', 'OLD CCC CAMP RD 112...",2023-04-13T15:44:59-04:00,,2023-04-13 15:58:02-04:00,30705,Amicalola EMC,2023-04-13 19:58:21-04:00,34.812996,-84.602918
4,2023-04-13-1041,2023-04-13-1041,"{'lat': 34.65504147851155, 'lng': -84.25847692...",2023-04-13 15:47:48-04:00,,,False,,False,1,1,0,['ROCKWATER RD 215'],2023-04-13T15:48:10-04:00,,2023-04-13 15:58:02-04:00,30536,Amicalola EMC,2023-04-13 17:24:17-04:00,34.655041,-84.258477
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
247405,2024-02-02-0025,2024-02-02-0025,"{'lat': 34.374641655676704, 'lng': -84.4433162...",2024-02-02 18:36:10-05:00,,,False,,True,3,3,0,"['LOWER DOWDA MILL RD 1052', 'LOWER DOWDA MILL...",2024-02-02T18:40:53.5800000-05:00,,2024-02-02 19:36:12-05:00,30143,Amicalola EMC,2024-02-02 20:36:11-05:00,34.374642,-84.443316
247406,2024-02-02-0025,2024-02-02-0025,"{'lat': 34.374641655676704, 'lng': -84.4433162...",2024-02-02 18:36:10-05:00,,,False,,True,3,3,0,"['LOWER DOWDA MILL RD 1052', 'LOWER DOWDA MILL...",2024-02-02T18:40:53.5800000-05:00,,2024-02-02 19:51:11-05:00,30143,Amicalola EMC,2024-02-02 20:36:11-05:00,34.374642,-84.443316
247407,2024-02-02-0025,2024-02-02-0025,"{'lat': 34.374641655676704, 'lng': -84.4433162...",2024-02-02 18:36:10-05:00,,,False,,True,3,3,0,"['LOWER DOWDA MILL RD 1052', 'LOWER DOWDA MILL...",2024-02-02T18:40:53.5800000-05:00,,2024-02-02 20:06:11-05:00,30143,Amicalola EMC,2024-02-02 20:36:11-05:00,34.374642,-84.443316
247408,2024-02-02-0025,2024-02-02-0025,"{'lat': 34.374641655676704, 'lng': -84.4433162...",2024-02-02 18:36:10-05:00,,,False,,True,3,3,0,"['LOWER DOWDA MILL RD 1052', 'LOWER DOWDA MILL...",2024-02-02T18:40:53.5800000-05:00,,2024-02-02 20:21:12-05:00,30143,Amicalola EMC,2024-02-02 20:36:11-05:00,34.374642,-84.443316


In [96]:
# len(t[t['outageStartTime'] > df['timestamp']])

0

In [164]:
def compute_metrics(group):
    duration = (group['end_time'] - group['outageStartTime']).dt.total_seconds() / 60
    # duration_min_est = 15 * (len(group) - 1) + 7.5
    # duration_max_est = 15 * (len(group) + 1)
    # duration_mean_est = (duration_|min_est + duration_max_est) / 2
    duration_max = duration + 15
    duration_mean = (duration + duration_max) / 2
    customer_affected_mean = group['customersOutNow'].mean()
    
    total_customer_outage_time = 15 * (group['customersOutNow'].sum() - group['customersOutNow'].iloc[0]) + (group['timestamp'].iloc[0] - group['outageStartTime'].iloc[0]).total_seconds() / 60 * group['customersOutNow'].iloc[0]
    total_customer_outage_time_max = total_customer_outage_time + 15 * group['customersOutNow'].iloc[-1]
    total_customer_outage_time_mean = (total_customer_outage_time + total_customer_outage_time_max) / 2
    
    # total_customer_outage_time_a1 = 15 * (group['customersOutNow'].sum() - group['customersOutNow'].iloc[0]) + 1 * group['customersOutNow'].iloc[0]
    # total_customer_outage_time_a2 = 15 * (group['customersOutNow'].sum() - group['customersOutNow'].iloc[0]) + 7.5 * group['customersOutNow'].iloc[0]
    # total_customer_outage_time_b = 15 * group['customersOutNow'].sum()
    # total_customer_outage_time_c = 15 * group['customersOutNow'].sum() + 15 * group['customersOutNow'].iloc[0] + 15 * group['customersOutNow'].iloc[-1]
    # total_customer_outage_time_d = (cust_out_time_est_a1 + cust_out_time_est_a2 + cust_out_time_est_b + cust_out_time_est_c) / 4

    return pd.Series({
        'timestamp': group['end_time'].iloc[-1],
        'duration': duration.iloc[-1],
        'duration_max': duration_max.iloc[-1],
        'duration_mean': duration_mean.iloc[-1],
        'customer_affected_mean': customer_affected_mean,
        'total_customer_outage_time': total_customer_outage_time,
        'total_customer_outage_time_max': total_customer_outage_time_max,
        'total_customer_outage_time_mean': total_customer_outage_time_mean
    })

In [165]:
df.head()

Unnamed: 0,outageRecID,outageName,outagePoint,outageStartTime,estimatedTimeOfRestoral,outageEndTime,verified,cause,crewAssigned,customersOutInitially,customersOutNow,customersRestored,streetsAffected,outageModifiedTime,outageWorkStatus,timestamp,zip,EMC,end_time,lat,lng
0,2023-04-13-1040,2023-04-13-1040,"{'lat': 34.812996172514474, 'lng': -84.6029181...",2023-04-13 15:44:42-04:00,,,False,,False,2,2,0,"['COHUTTA FOREST RD 145', 'OLD CCC CAMP RD 112...",2023-04-13T15:44:59-04:00,,2023-04-13 15:50:53-04:00,30705,Amicalola EMC,2023-04-13 19:58:21-04:00,34.812996,-84.602918
1,2023-04-13-1040,2023-04-13-1040,"{'lat': 34.812996172514474, 'lng': -84.6029181...",2023-04-13 15:44:42-04:00,,,False,,False,2,2,0,"['COHUTTA FOREST RD 145', 'OLD CCC CAMP RD 112...",2023-04-13T15:44:59-04:00,,2023-04-13 15:54:01-04:00,30705,Amicalola EMC,2023-04-13 19:58:21-04:00,34.812996,-84.602918
2,2023-04-13-1041,2023-04-13-1041,"{'lat': 34.65504147851155, 'lng': -84.25847692...",2023-04-13 15:47:48-04:00,,,False,,False,1,1,0,['ROCKWATER RD 215'],2023-04-13T15:48:10-04:00,,2023-04-13 15:54:01-04:00,30536,Amicalola EMC,2023-04-13 17:24:17-04:00,34.655041,-84.258477
3,2023-04-13-1040,2023-04-13-1040,"{'lat': 34.812996172514474, 'lng': -84.6029181...",2023-04-13 15:44:42-04:00,,,False,,False,2,2,0,"['COHUTTA FOREST RD 145', 'OLD CCC CAMP RD 112...",2023-04-13T15:44:59-04:00,,2023-04-13 15:58:02-04:00,30705,Amicalola EMC,2023-04-13 19:58:21-04:00,34.812996,-84.602918
4,2023-04-13-1041,2023-04-13-1041,"{'lat': 34.65504147851155, 'lng': -84.25847692...",2023-04-13 15:47:48-04:00,,,False,,False,1,1,0,['ROCKWATER RD 215'],2023-04-13T15:48:10-04:00,,2023-04-13 15:58:02-04:00,30536,Amicalola EMC,2023-04-13 17:24:17-04:00,34.655041,-84.258477


In [166]:
df_grouped = df.groupby('outageRecID').apply(compute_metrics).reset_index().round(2)

In [167]:
df_grouped

Unnamed: 0,outageRecID,timestamp,duration,duration_max,duration_mean,customer_affected_mean,total_customer_outage_time,total_customer_outage_time_max,total_customer_outage_time_mean
0,2023-04-13-1040,2023-04-13 19:58:21-04:00,253.65,268.65,261.15,16.50,12357.37,12762.37,12559.87
1,2023-04-13-1041,2023-04-13 17:24:17-04:00,96.48,111.48,103.98,1.00,276.22,291.22,283.72
2,2023-04-13-1042,2023-04-13 22:39:07-04:00,402.12,417.12,409.62,1.00,1178.90,1193.90,1186.40
3,2023-04-13-1043,2023-04-13 18:24:19-04:00,133.30,148.30,140.80,5.81,2252.33,2342.33,2297.33
4,2023-04-13-1044,2023-04-13 16:54:13-04:00,12.58,27.58,20.08,2.00,63.17,93.17,78.17
...,...,...,...,...,...,...,...,...,...
9046,2024-02-02-0018,2024-02-02 14:36:23-05:00,68.38,83.38,75.88,5.00,341.67,416.67,379.17
9047,2024-02-02-0019,2024-02-02 14:51:14-05:00,72.38,87.38,79.88,31.00,2246.47,2711.47,2478.97
9048,2024-02-02-0022,2024-02-02 16:51:15-05:00,51.57,66.57,59.07,1.00,51.53,66.53,59.03
9049,2024-02-02-0024,2024-02-02 17:36:26-05:00,47.37,62.37,54.87,1.00,47.18,62.18,54.68


In [168]:
df_merged = pd.merge(df_grouped, df, on=['outageRecID', 'timestamp'], how='inner')

In [169]:
df_merged

Unnamed: 0,outageRecID,timestamp,duration,duration_max,duration_mean,customer_affected_mean,total_customer_outage_time,total_customer_outage_time_max,total_customer_outage_time_mean,outageName,outagePoint,outageStartTime,estimatedTimeOfRestoral,outageEndTime,verified,cause,crewAssigned,customersOutInitially,customersOutNow,customersRestored,streetsAffected,outageModifiedTime,outageWorkStatus,zip,EMC,end_time,lat,lng
0,2023-04-13-1040,2023-04-13 19:58:21-04:00,253.65,268.65,261.15,16.50,12357.37,12762.37,12559.87,2023-04-13-1040,"{'lat': 34.815493607300034, 'lng': -84.5984642...",2023-04-13 15:44:42-04:00,,,False,400 Decay/age of material/equipment,True,27,27,0,"['CHATSWORTH HWY', 'COHUTTA FOREST 401', 'COHU...",2023-04-13T19:28:09-04:00,,30705,Amicalola EMC,2023-04-13 19:58:21-04:00,34.815494,-84.598464
1,2023-04-13-1041,2023-04-13 17:24:17-04:00,96.48,111.48,103.98,1.00,276.22,291.22,283.72,2023-04-13-1041,"{'lat': 34.65504147851155, 'lng': -84.25847692...",2023-04-13 15:47:48-04:00,,,False,,True,1,1,0,['ROCKWATER RD 215'],2023-04-13T15:57:30.3270000-04:00,,30536,Amicalola EMC,2023-04-13 17:24:17-04:00,34.655041,-84.258477
2,2023-04-13-1042,2023-04-13 22:39:07-04:00,402.12,417.12,409.62,1.00,1178.90,1193.90,1186.40,2023-04-13-1042,"{'lat': 34.76022275601547, 'lng': -84.43656079...",2023-04-13 15:57:00-04:00,,,False,,True,1,1,0,['POCASET DR 150'],2023-04-13T20:08:15-04:00,,30540,Amicalola EMC,2023-04-13 22:39:07-04:00,34.760223,-84.436561
3,2023-04-13-1043,2023-04-13 18:24:19-04:00,133.30,148.30,140.80,5.81,2252.33,2342.33,2297.33,2023-04-13-1043,"{'lat': 34.64094744509421, 'lng': -84.27010785...",2023-04-13 16:11:01-04:00,,,False,,True,6,6,0,"['NEWBERRY DR 364', 'NEWBERRY DR 626', 'NEWBER...",2023-04-13T17:24:18.9530000-04:00,,unknown,Amicalola EMC,2023-04-13 18:24:19-04:00,34.640947,-84.270108
4,2023-04-13-1044,2023-04-13 16:54:13-04:00,12.58,27.58,20.08,2.00,63.17,93.17,78.17,2023-04-13-1044,"{'lat': 34.400304776099645, 'lng': -84.2707052...",2023-04-13 16:41:38-04:00,,,False,,False,2,2,0,"['LOON LN 25', 'LOVELADY RD 505']",2023-04-13T16:42:03-04:00,,30107,Amicalola EMC,2023-04-13 16:54:13-04:00,34.400305,-84.270705
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9046,2024-02-02-0018,2024-02-02 14:36:23-05:00,68.38,83.38,75.88,5.00,341.67,416.67,379.17,2024-02-02-0018,"{'lat': 34.714668737578464, 'lng': -84.4774361...",2024-02-02 13:28:00-05:00,,,False,,True,5,5,0,"['BOARDTOWN RD 1064', 'BOARDTOWN RD 1066', 'VA...",2024-02-02T13:33:18.5070000-05:00,,30540,Amicalola EMC,2024-02-02 14:36:23-05:00,34.714669,-84.477436
9047,2024-02-02-0019,2024-02-02 14:51:14-05:00,72.38,87.38,79.88,31.00,2246.47,2711.47,2478.97,2024-02-02-0019,"{'lat': 34.54793254252141, 'lng': -84.04290642...",2024-02-02 13:38:51-05:00,,,False,,True,31,31,0,"['HOMER EDWARDS RD 385', 'HOMER EDWARDS RD 425...",2024-02-02T13:47:42-05:00,,30533,Amicalola EMC,2024-02-02 14:51:14-05:00,34.547933,-84.042906
9048,2024-02-02-0022,2024-02-02 16:51:15-05:00,51.57,66.57,59.07,1.00,51.53,66.53,59.03,2024-02-02-0022,"{'lat': 34.612873511729674, 'lng': -84.0811178...",2024-02-02 15:59:41-05:00,,,False,,True,1,1,0,['SUNRISE RIDGE/ETOWAH TRL'],2024-02-02T16:03:33.6600000-05:00,,unknown,Amicalola EMC,2024-02-02 16:51:15-05:00,34.612874,-84.081118
9049,2024-02-02-0024,2024-02-02 17:36:26-05:00,47.37,62.37,54.87,1.00,47.18,62.18,54.68,2024-02-02-0024,"{'lat': 34.54980869474821, 'lng': -84.04115022...",2024-02-02 16:49:04-05:00,,,False,,True,1,1,0,['HORTON RD 90'],2024-02-02T16:54:59.6570000-05:00,,30533,Amicalola EMC,2024-02-02 17:36:26-05:00,34.549809,-84.041150


In [38]:
pd.read_csv('uszips.csv')

Unnamed: 0,zip,lat,lng,city,state_id,state_name,zcta,parent_zcta,population,density,county_fips,county_name,county_weights,county_names_all,county_fips_all,imprecise,military,timezone
0,601,18.18027,-66.75266,Adjuntas,PR,Puerto Rico,True,,16834.0,100.9,72001,Adjuntas,"{""72001"": 98.73, ""72141"": 1.27}",Adjuntas|Utuado,72001|72141,False,False,America/Puerto_Rico
1,602,18.36075,-67.17541,Aguada,PR,Puerto Rico,True,,37642.0,479.2,72003,Aguada,"{""72003"": 100}",Aguada,72003,False,False,America/Puerto_Rico
2,603,18.45744,-67.12225,Aguadilla,PR,Puerto Rico,True,,49075.0,551.7,72005,Aguadilla,"{""72005"": 99.76, ""72099"": 0.24}",Aguadilla|Moca,72005|72099,False,False,America/Puerto_Rico
3,606,18.16585,-66.93716,Maricao,PR,Puerto Rico,True,,5590.0,48.7,72093,Maricao,"{""72093"": 82.27, ""72153"": 11.66, ""72121"": 6.06}",Maricao|Yauco|Sabana Grande,72093|72153|72121,False,False,America/Puerto_Rico
4,610,18.29110,-67.12243,Anasco,PR,Puerto Rico,True,,25542.0,265.7,72011,Añasco,"{""72011"": 96.7, ""72099"": 2.81, ""72083"": 0.37, ...",Añasco|Moca|Las Marías|Aguada,72011|72099|72083|72003,False,False,America/Puerto_Rico
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33783,99923,55.98043,-130.03803,Hyder,AK,Alaska,True,,25.0,0.6,2198,Prince of Wales-Hyder,"{""02198"": 100}",Prince of Wales-Hyder,02198,False,False,America/Sitka
33784,99925,55.55398,-132.96276,Klawock,AK,Alaska,True,,920.0,6.6,2198,Prince of Wales-Hyder,"{""02198"": 100}",Prince of Wales-Hyder,02198,False,False,America/Sitka
33785,99926,55.12617,-131.48928,Metlakatla,AK,Alaska,True,,1465.0,4.3,2198,Prince of Wales-Hyder,"{""02198"": 100}",Prince of Wales-Hyder,02198,False,False,America/Metlakatla
33786,99927,56.33305,-133.60044,Point Baker,AK,Alaska,True,,14.0,1.2,2198,Prince of Wales-Hyder,"{""02198"": 100}",Prince of Wales-Hyder,02198,False,False,America/Sitka
