## Converting California to S2 Cells

In [1]:
# increase cell width
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import gc
import time
import re
import s2_py as s2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import shapefile as shp
import geopandas as gpd
import pandas_profiling as pp
from shapely.geometry import Polygon, mapping, box
from datetime import date, timedelta

### Part 1. Read in CA coordinates

In [3]:
# Change the location to where you downloaded CA_State folder (it's also in EDA/Data folder on GitHub)
ca_df = gpd.read_file("./Data/CA_State/CA_State_TIGER2016.shp")
# we need to change coordinates system 
ca_df = ca_df.to_crs({'init': 'epsg:4326'})
ca_df

Unnamed: 0,REGION,DIVISION,STATEFP,STATENS,GEOID,STUSPS,NAME,LSAD,MTFCC,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,geometry
0,4,9,6,1779778,6,CA,California,0,G4000,A,403501101370,20466718403,37.1551773,-119.5434183,(POLYGON ((-119.6347313537315 33.2654466936341...


In [4]:
# ca_counties_df = gpd.read_file("./Data/CA_Counties/CA_Counties_TIGER2016.shp")
# ca_counties_df = ca_counties_df.to_crs({'init': 'epsg:4326'})
# ca_counties_df.shape

In [5]:
# ca_counties_df.head()

In [6]:
# ca_counties_df.COUNTYFP.nunique()

In [7]:
def extract_max_polygon(fire_poly):
    """Return the largest polygon for each multipolygon"""
    fire_map = mapping(fire_poly)
    if 'coordinates' in fire_map:
        coords = fire_map['coordinates']
    elif 'features' in fire_map:
        coords = fire_map['features'][0]['geometry']['coordinates']
    
    if len(coords) == 1:
        max_poly = coords[0]
    else:
        max_poly = coords[0][0]
        for i in range(len(coords)):
            if len(coords[i][0]) > len(max_poly):
                max_poly = coords[i][0]
    return max_poly

In [8]:
ca_df['Largest_polygon'] = ca_df.geometry.apply(extract_max_polygon)
ca_df.head()

Unnamed: 0,REGION,DIVISION,STATEFP,STATENS,GEOID,STUSPS,NAME,LSAD,MTFCC,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,geometry,Largest_polygon
0,4,9,6,1779778,6,CA,California,0,G4000,A,403501101370,20466718403,37.1551773,-119.5434183,(POLYGON ((-119.6347313537315 33.2654466936341...,"((-124.13657496576488, 41.46445707660226), (-1..."


In [9]:
# ca_counties_df['Largest_polygon'] = ca_counties_df.geometry.apply(extract_max_polygon)
# ca_counties_df.head()

### Part 2. Create S2 Cells

In [10]:
def create_S2_loop(max_poly):
    """Converts Polygon into S2 Loop"""
    points = []
    for coord in tuple(reversed(max_poly)):
        long, lat = coord
        latlng = s2.S2LatLng.FromDegrees(lat, long)
        points.append(latlng.ToPoint())
    return s2.S2Loop(points)

In [11]:
ca_df['S2_Loop'] = ca_df.Largest_polygon.apply(create_S2_loop)
ca_df

Unnamed: 0,REGION,DIVISION,STATEFP,STATENS,GEOID,STUSPS,NAME,LSAD,MTFCC,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,geometry,Largest_polygon,S2_Loop
0,4,9,6,1779778,6,CA,California,0,G4000,A,403501101370,20466718403,37.1551773,-119.5434183,(POLYGON ((-119.6347313537315 33.2654466936341...,"((-124.13657496576488, 41.46445707660226), (-1...",<s2_py.pywraps2.S2Loop; proxy of <Swig Object ...


In [12]:
# ca_counties_df['S2_Loop'] = ca_counties_df.Largest_polygon.apply(create_S2_loop)
# ca_counties_df.head()

In [13]:
def create_S2_coverer(region, lvl):
    """Generates a list of S2 Cells of specified level"""
    coverer = s2.S2RegionCoverer()
    coverer.set_min_level(lvl)
    coverer.set_max_level(lvl)
    return coverer.GetCovering(region)

In [14]:
ca_df['S2_Cells'] = ca_df.S2_Loop.apply(create_S2_coverer, args=[13])
ca_df

Unnamed: 0,REGION,DIVISION,STATEFP,STATENS,GEOID,STUSPS,NAME,LSAD,MTFCC,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,geometry,Largest_polygon,S2_Loop,S2_Cells
0,4,9,6,1779778,6,CA,California,0,G4000,A,403501101370,20466718403,37.1551773,-119.5434183,(POLYGON ((-119.6347313537315 33.2654466936341...,"((-124.13657496576488, 41.46445707660226), (-1...",<s2_py.pywraps2.S2Loop; proxy of <Swig Object ...,"(2/2212102122120�, 2/2212102122121�, 2/2212102..."


In [15]:
print(ca_df['S2_Cells'][0][0])

2/2212102122120 


In [16]:
ca_df['S2_Cells'][0][0].id()

6109472385923022848

In [17]:
ca_df['S2_Cells'][0][0].ToToken()

'54c934c4'

### Part 3. Create dataframe with one row per S2 Cell

In [18]:
def split_data_frame_list(df, target_column, row_id):
    """
    Splits a column with lists into rows
    
    Arguments:
        df: dataframe
        target_column: name of column that contains lists        
        row_id: column to merge back on
    
    Returns:
        Dataframe
    """
    
    # create a new dataframe with each item in a seperate column, dropping rows with missing values
    col_df = pd.DataFrame(df[target_column].tolist())\
                .join(df[[target_column, row_id]])\
                .drop(columns=[target_column])\
                .set_index(row_id)

    # create a series with columns stacked as rows         
    stacked = col_df.stack()\
                    .reset_index()\
                    .drop(columns='level_1')
    stacked.columns = [row_id, target_column]

    return stacked

In [19]:
ca_s2_df = ca_df[['NAME', 'S2_Cells']]
ca_s2_df = split_data_frame_list(ca_s2_df, 'S2_Cells', 'NAME')
ca_s2_df['S2_Cells_ID'] = ca_s2_df.S2_Cells.apply(lambda x: x.ToToken())
ca_s2_df.shape

(377631, 3)

In [20]:
ca_s2_df.head()

Unnamed: 0,NAME,S2_Cells,S2_Cells_ID
0,California,2/2212102122120�,54c934c4
1,California,2/2212102122121�,54c934cc
2,California,2/2212102122122�,54c934d4
3,California,2/2212102122123�,54c934dc
4,California,2/2212102122130�,54c934e4


In [22]:
len(set(ca_s2_df.S2_Cells_ID))

377631

### Part 4. Convert to Shapely File

In [39]:
def S2Cells_To_Poly(s2_cell):
    geoms = []
    new_cell = s2.S2Cell(s2_cell)
    vertices = []
    for i in range(4):
        vertex = new_cell.GetVertex(i)
        latlng = s2.S2LatLng(vertex)
        vertices.append((latlng.lng().degrees(),
                         latlng.lat().degrees()))
    return Polygon(vertices)

In [158]:
ca_s2_df['geometry'] = ca_s2_df.S2_Cells.apply(S2Cells_To_Poly)
ca_s2_df.head()

Unnamed: 0,NAME,S2_Cells,S2_Cells_ID,geometry
0,California,2/2212102122120�,54c934c4,POLYGON ((-121.5216871390368 42.00668446866296...
1,California,2/2212102122121�,54c934cc,POLYGON ((-121.5335407453959 42.00306844180258...
2,California,2/2212102122122�,54c934d4,POLYGON ((-121.5249364503428 41.99609443564806...
3,California,2/2212102122123�,54c934dc,POLYGON ((-121.5130846540147 41.99970860001073...
4,California,2/2212102122130�,54c934e4,POLYGON ((-121.5012315987987 42.00332151857898...


In [159]:
ca_s2_df.shape

(377631, 4)

In [160]:
ca_s2_geo_df = gpd.GeoDataFrame(ca_s2_df[['S2_Cells_ID', 'geometry']], crs={'init': 'epsg:4326'}, geometry='geometry')
ca_s2_geo_df.head()

Unnamed: 0,S2_Cells_ID,geometry
0,54c934c4,POLYGON ((-121.5216871390368 42.00668446866296...
1,54c934cc,POLYGON ((-121.5335407453959 42.00306844180258...
2,54c934d4,POLYGON ((-121.5249364503428 41.99609443564806...
3,54c934dc,POLYGON ((-121.5130846540147 41.99970860001073...
4,54c934e4,POLYGON ((-121.5012315987987 42.00332151857898...


In [37]:
# ca_s2_geo_df['S2_Cells_ID'] = ca_s2_geo_df['S2_Cells_ID'].astype('str')
# ca_s2_geo_df['S2_Cells_ID'].head()

0    6109472381628055552
1    6109472390217990144
2    6109472407397859328
3    6109472415987793920
4    6109472424577728512
Name: S2_Cells_ID, dtype: object

In [161]:
ca_s2_geo_df['S2_Cells_ID'].tail()

377626    80eebd24
377627    80eebd2c
377628    80eebd34
377629    80eebd3c
377630    80eebd4c
Name: S2_Cells_ID, dtype: object

In [34]:
ls Data/Processed

WildFire_S2Cells_nogeom.csv  [0m[01;31mWildFire_S2Cells_nogeom.zip[0m


In [162]:
ca_s2_geo_df.to_file('./Data/Processed/CA_S2Cells13_Shapely/CA_S2Cells.shp', driver='ESRI Shapefile')

### Part 5. Check overlap with WildFire data

In [124]:
cal_fire_s2id_df = pd.read_csv('./Data/Processed/WildFire_S2Cells13_nogeom.csv')
cal_fire_s2id_df.shape

(75642, 15)

In [125]:
cal_fire_s2id_df.head()

Unnamed: 0,FIRE_CUSTOM_ID,S2_Cells_ID,AGENCY,UNIT_ID,CAUSE,GIS_ACRES,C_METHOD,OBJECTIVE,Shape_Length,Shape_Area,YEAR,ALARM_DATE_DT_DT,CONT_DATE_DT_DT,FIRE_DUR,ALARM_DATE_MONTH
0,OCTOBER_2007_10_21_0,80c28604,CCO,LAC,14.0,25.736713,8.0,1.0,1902.439051,104152.8,2007,2007-10-21,2007-10-23,3.0,10
1,OCTOBER_2007_10_21_0,80c2860c,CCO,LAC,14.0,25.736713,8.0,1.0,1902.439051,104152.8,2007,2007-10-21,2007-10-23,3.0,10
2,OCTOBER_2007_10_21_0,80c28614,CCO,LAC,14.0,25.736713,8.0,1.0,1902.439051,104152.8,2007,2007-10-21,2007-10-23,3.0,10
3,OCTOBER_2007_10_21_0,80c2861c,CCO,LAC,14.0,25.736713,8.0,1.0,1902.439051,104152.8,2007,2007-10-21,2007-10-23,3.0,10
4,MAGIC_2007_10_22_0,80c27fd4,CCO,LAC,14.0,2824.877197,8.0,1.0,20407.965662,11431870.0,2007,2007-10-22,2007-10-25,4.0,10


In [126]:
ca_s2_df.head()

Unnamed: 0,NAME,S2_Cells,S2_Cells_ID,geometry
0,California,2/2212102122120�,54c934c4,POLYGON ((-121.5216871390368 42.00668446866296...
1,California,2/2212102122121�,54c934cc,POLYGON ((-121.5335407453959 42.00306844180258...
2,California,2/2212102122122�,54c934d4,POLYGON ((-121.5249364503428 41.99609443564806...
3,California,2/2212102122123�,54c934dc,POLYGON ((-121.5130846540147 41.99970860001073...
4,California,2/2212102122130�,54c934e4,POLYGON ((-121.5012315987987 42.00332151857898...


In [127]:
ca_s2_df[['NAME', 'S2_Cells_ID']].head()

Unnamed: 0,NAME,S2_Cells_ID
0,California,54c934c4
1,California,54c934cc
2,California,54c934d4
3,California,54c934dc
4,California,54c934e4


In [128]:
wf_s2_df = pd.DataFrame(cal_fire_s2id_df.S2_Cells_ID.unique())
wf_s2_df.columns = ['S2_Cells_ID']
wf_s2_df['WildFire'] = 1
wf_s2_df.shape

(63603, 2)

In [129]:
wf_s2_df.head()

Unnamed: 0,S2_Cells_ID,WildFire
0,80c28604,1
1,80c2860c,1
2,80c28614,1
3,80c2861c,1
4,80c27fd4,1


In [130]:
ca_s2_wf_df = pd.merge(ca_s2_df[['NAME', 'S2_Cells_ID']], wf_s2_df, on='S2_Cells_ID', how='left').fillna(0)
ca_s2_wf_df.shape

(377631, 3)

In [55]:
377631*3300

1246182300

In [131]:
ca_s2_wf_df.shape[0] - ca_s2_df.shape[0]

0

In [132]:
ca_s2_wf_df.head()

Unnamed: 0,NAME,S2_Cells_ID,WildFire
0,California,54c934c4,0.0
1,California,54c934cc,0.0
2,California,54c934d4,0.0
3,California,54c934dc,0.0
4,California,54c934e4,0.0


In [133]:
# some of the wildfires were primarily outside California
ca_s2_wf_df.WildFire.sum(), ca_s2_wf_df.WildFire.mean()

(60668.0, 0.16065418358132674)

### Part 6. Random sample of S2 Cells without wildfires

In [134]:
ca_s2_wf_df['WildFire_0_Sample'] = ca_s2_wf_df.WildFire.apply(lambda x: x if x == 1 else np.random.choice(2, 1, p=[0.7, 0.3])[0])
ca_s2_wf_df.head()

Unnamed: 0,NAME,S2_Cells_ID,WildFire,WildFire_0_Sample
0,California,54c934c4,0.0,0.0
1,California,54c934cc,0.0,0.0
2,California,54c934d4,0.0,0.0
3,California,54c934dc,0.0,0.0
4,California,54c934e4,0.0,0.0


In [135]:
ca_s2_wf_df.groupby(['WildFire']).WildFire_0_Sample.sum()

WildFire
0.0    95260.0
1.0    60668.0
Name: WildFire_0_Sample, dtype: float64

In [136]:
ca_s2_wf_df.WildFire_0_Sample.sum()

155928.0

In [142]:
ca_s2_wf_sample_df = ca_s2_wf_df[ca_s2_wf_df.WildFire_0_Sample==1]
ca_s2_wf_sample_df.shape

(155928, 4)

### Part 6. Random sample of Dates without wildfires

In [140]:
d1 = date(2010, 1, 1)  
d2 = date(2018, 12, 31)  

delta = d2 - d1  
dates = [d1+timedelta(days=i) for i in range(delta.days+1)]
len(dates)

3287

In [23]:
dates[:5]

[datetime.date(2010, 1, 1),
 datetime.date(2010, 1, 2),
 datetime.date(2010, 1, 3),
 datetime.date(2010, 1, 4),
 datetime.date(2010, 1, 5)]

In [24]:
dates[-5:]

[datetime.date(2018, 12, 27),
 datetime.date(2018, 12, 28),
 datetime.date(2018, 12, 29),
 datetime.date(2018, 12, 30),
 datetime.date(2018, 12, 31)]

In [137]:
cal_fire_s2id_df.ALARM_DATE_DT_DT.nunique()

2510

In [143]:
ca_s2_wf_sample_df.shape[0]*cal_fire_s2id_df.ALARM_DATE_DT_DT.nunique()

391379280

In [144]:
ca_s2_wf_sample_df.shape[0]*len(dates)

512535336

In [145]:
ca_s2_wf_sample_df.head()

Unnamed: 0,NAME,S2_Cells_ID,WildFire,WildFire_0_Sample
11,California,54c9352c,0.0,1.0
16,California,54c93554,0.0,1.0
18,California,54c93564,0.0,1.0
19,California,54c9356c,0.0,1.0
26,California,54c935a4,0.0,1.0


In [157]:
with open('./Data/Processed/CA_S2_lvl13_WF.txt', 'w') as filename:
    for i in ca_s2_wf_sample_df['S2_Cells_ID']:
        for j in dates:
            filename.write(i+'\t'+str(j)+'\n')

### Part 7. Transmission Lines to S2 Cells

In [23]:
tl_df = gpd.read_file("./Data/Transmission_Line/Transmission_Line.shp")
tl_df.head()

Unnamed: 0,OBJECTID,Name,kV,kV_Sort,Owner,Status,Circuit,Type,Legend,Length_Mil,Length_Fee,Comments,Shape__Len,geometry
0,2001,PG&E 60kV,60,60.0,PG&E,Operational,Single,OH,PG&E_60_70kV,1.0,5813.64639807,,0.018523,LINESTRING (-122.025217180817 39.7560586198882...
1,2002,PG&E 60kV,60,60.0,PG&E,Operational,Single,OH,PG&E_60_70kV,2.0,11459.33011218,,0.032482,LINESTRING (-122.010209405169 39.5827809214517...
2,2003,PG&E 60kV,60,60.0,PG&E,Operational,Single,OH,PG&E_60_70kV,11.0,56984.89118592,,0.159223,LINESTRING (-122.003238381708 39.5533528274813...
3,2004,PG&E 60kV,60,60.0,PG&E,Operational,Single,OH,PG&E_60_70kV,15.0,81003.57166241,,0.250896,LINESTRING (-122.013588113745 39.4023799821862...
4,2005,PG&E 60kV,60,60.0,PG&E,Operational,Single,OH,PG&E_60_70kV,20.0,105641.41726686,,0.300362,"LINESTRING (-122.153286201514 39.276960876995,..."


In [24]:
tl_df.shape

(6841, 14)

In [35]:
tl_df[list(tl_df)[:-1]].describe(include = 'all')

Unnamed: 0,OBJECTID,Name,kV,kV_Sort,Owner,Status,Circuit,Type,Legend,Length_Mil,Length_Fee,Comments,Shape__Len
count,6841.0,6841,6817.0,6817.0,6817,6817,6817,6817,6817,6793.0,6804.0,581,6841.0
unique,,99,21.0,,36,3,6,4,35,,6782.0,156,
top,,SCE 66kV,66.0,,PG&E,Operational,Single,OH,SCE_33_69kV,,379674.82497541,Partially underground,
freq,,1463,1498.0,,2826,6802,5437,6696,1646,,2.0,181,
mean,3421.0,,,116.437949,,,,,,5.156485,,,0.08511326
std,1974.970928,,,84.262934,,,,,,12.254171,,,0.1948325
min,1.0,,,33.0,,,,,,0.0,,,2.802626e-08
25%,1711.0,,,66.0,,,,,,0.0,,,0.005894882
50%,3421.0,,,69.0,,,,,,2.0,,,0.02769896
75%,5131.0,,,115.0,,,,,,5.0,,,0.08373453


In [36]:
pp.ProfileReport(tl_df[list(tl_df)[:-1]])

0,1
Number of variables,13
Number of observations,6841
Total Missing (%),7.3%
Total size in memory,694.9 KiB
Average record size in memory,104.0 B

0,1
Numeric,3
Categorical,9
Boolean,0
Date,0
Text (Unique),0
Rejected,1
Unsupported,0

0,1
Distinct count,7
Unique (%),0.1%
Missing (%),0.4%
Missing (n),24

0,1
Single,5437
Double,1329
Many,46
Other values (3),5
(Missing),24

Value,Count,Frequency (%),Unnamed: 3
Single,5437,79.5%,
Double,1329,19.4%,
Many,46,0.7%,
Liberty Energy,2,0.0%,
Duble,2,0.0%,
Quad,1,0.0%,
(Missing),24,0.4%,

0,1
Distinct count,157
Unique (%),2.3%
Missing (%),91.5%
Missing (n),6260

0,1
Partially underground,181
Multiple lines,24
Obanion - elverta #2 (obnelv2),18
Other values (153),358
(Missing),6260

Value,Count,Frequency (%),Unnamed: 3
Partially underground,181,2.6%,
Multiple lines,24,0.4%,
Obanion - elverta #2 (obnelv2),18,0.3%,
Line eventually goes underground,15,0.2%,
Unsured the path for lines as the aerial photos are very uncleared and don't have any other good res,14,0.2%,
Changed kv from 115 to 60kv,13,0.2%,
Double circuit. changed kv from 230 to 115,12,0.2%,
Elverta - hurley #1 (elvhur1); double circuit,12,0.2%,
Multiple lines on towers,12,0.2%,
Fiddyment - elverta (fiyelv),11,0.2%,

0,1
Distinct count,36
Unique (%),0.5%
Missing (%),0.4%
Missing (n),24

0,1
SCE_33_69kV,1646
PG&E_60_70kV,1097
PG&E_115kV,1030
Other values (32),3044

Value,Count,Frequency (%),Unnamed: 3
SCE_33_69kV,1646,24.1%,
PG&E_60_70kV,1097,16.0%,
PG&E_115kV,1030,15.1%,
PG&E_230kV,648,9.5%,
SMUD_60kV,335,4.9%,
Other_33_92kV,311,4.5%,
SCE_220_230kV,264,3.9%,
SCE_115_161kV,244,3.6%,
SDG&E_69kV,208,3.0%,
IID_34.5_92kV,164,2.4%,

0,1
Distinct count,6783
Unique (%),99.2%
Missing (%),0.5%
Missing (n),37

0,1
34352.70396286,2
18043.09963567,2
237374.30241136,2
Other values (6779),6798
(Missing),37

Value,Count,Frequency (%),Unnamed: 3
34352.70396286,2,0.0%,
18043.09963567,2,0.0%,
237374.30241136,2,0.0%,
26899.46236894,2,0.0%,
223262.07676911,2,0.0%,
50502.60321939,2,0.0%,
478.71883579,2,0.0%,
2432.03565343,2,0.0%,
182052.01948559,2,0.0%,
155045.08789472,2,0.0%,

0,1
Distinct count,95
Unique (%),1.4%
Missing (%),0.7%
Missing (n),48
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,5.1565
Minimum,0
Maximum,294
Zeros (%),29.3%

0,1
Minimum,0
5-th percentile,0
Q1,0
Median,2
Q3,5
95-th percentile,20
Maximum,294
Range,294
Interquartile range,5

0,1
Standard deviation,12.254
Coef of variation,2.3765
Kurtosis,120.41
Mean,5.1565
MAD,5.717
Skewness,8.5357
Sum,35028
Variance,150.16
Memory size,53.5 KiB

Value,Count,Frequency (%),Unnamed: 3
0.0,2006,29.3%,
1.0,1196,17.5%,
2.0,791,11.6%,
3.0,517,7.6%,
4.0,385,5.6%,
5.0,292,4.3%,
6.0,225,3.3%,
7.0,173,2.5%,
8.0,155,2.3%,
9.0,142,2.1%,

Value,Count,Frequency (%),Unnamed: 3
0.0,2006,29.3%,
1.0,1196,17.5%,
2.0,791,11.6%,
3.0,517,7.6%,
4.0,385,5.6%,

Value,Count,Frequency (%),Unnamed: 3
150.0,1,0.0%,
166.0,1,0.0%,
173.0,1,0.0%,
236.0,2,0.0%,
294.0,1,0.0%,

0,1
Distinct count,99
Unique (%),1.4%
Missing (%),0.0%
Missing (n),0

0,1
SCE 66kV,1463
PG&E 115kV,1030
PG&E 60kV,992
Other values (96),3356

Value,Count,Frequency (%),Unnamed: 3
SCE 66kV,1463,21.4%,
PG&E 115kV,1030,15.1%,
PG&E 60kV,992,14.5%,
PG&E 230kV,648,9.5%,
SMUD 60kV,335,4.9%,
SCE 220kV,259,3.8%,
SCE 115kV,243,3.6%,
SDG&E 69kV,208,3.0%,
WAPA 230kV,120,1.8%,
PG&E 70kV,104,1.5%,

0,1
Distinct count,6841
Unique (%),100.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,3421
Minimum,1
Maximum,6841
Zeros (%),0.0%

0,1
Minimum,1
5-th percentile,343
Q1,1711
Median,3421
Q3,5131
95-th percentile,6499
Maximum,6841
Range,6840
Interquartile range,3420

0,1
Standard deviation,1975
Coef of variation,0.57731
Kurtosis,-1.2
Mean,3421
MAD,1710.2
Skewness,0
Sum,23403061
Variance,3900500
Memory size,53.5 KiB

Value,Count,Frequency (%),Unnamed: 3
2047,1,0.0%,
5360,1,0.0%,
3315,1,0.0%,
5368,1,0.0%,
1274,1,0.0%,
3323,1,0.0%,
5376,1,0.0%,
1282,1,0.0%,
3331,1,0.0%,
5384,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
1,1,0.0%,
2,1,0.0%,
3,1,0.0%,
4,1,0.0%,
5,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
6837,1,0.0%,
6838,1,0.0%,
6839,1,0.0%,
6840,1,0.0%,
6841,1,0.0%,

0,1
Distinct count,37
Unique (%),0.5%
Missing (%),0.4%
Missing (n),24

0,1
PG&E,2826
SCE,2212
SMUD,419
Other values (33),1360

Value,Count,Frequency (%),Unnamed: 3
PG&E,2826,41.3%,
SCE,2212,32.3%,
SMUD,419,6.1%,
SDG&E,363,5.3%,
IID,187,2.7%,
WAPA,151,2.2%,
LADWP,135,2.0%,
PCORP,93,1.4%,
MID,67,1.0%,
TID,67,1.0%,

0,1
Correlation,0.9674

0,1
Distinct count,4
Unique (%),0.1%
Missing (%),0.4%
Missing (n),24

0,1
Operational,6802
Proposed,14
Closed,1
(Missing),24

Value,Count,Frequency (%),Unnamed: 3
Operational,6802,99.4%,
Proposed,14,0.2%,
Closed,1,0.0%,
(Missing),24,0.4%,

0,1
Distinct count,5
Unique (%),0.1%
Missing (%),0.4%
Missing (n),24

0,1
OH,6696
UG,117
UW,3
(Missing),24

Value,Count,Frequency (%),Unnamed: 3
OH,6696,97.9%,
UG,117,1.7%,
UW,3,0.0%,
ug,1,0.0%,
(Missing),24,0.4%,

0,1
Distinct count,22
Unique (%),0.3%
Missing (%),0.4%
Missing (n),24

0,1
66,1498
60,1474
115,1382
Other values (18),2463

Value,Count,Frequency (%),Unnamed: 3
66,1498,21.9%,
60,1474,21.5%,
115,1382,20.2%,
230,1064,15.6%,
69,476,7.0%,
220,263,3.8%,
500,131,1.9%,
70,104,1.5%,
92,104,1.5%,
33,87,1.3%,

0,1
Distinct count,21
Unique (%),0.3%
Missing (%),0.4%
Missing (n),24
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,116.44
Minimum,33
Maximum,500
Zeros (%),0.0%

0,1
Minimum,33
5-th percentile,60
Q1,66
Median,69
Q3,115
95-th percentile,230
Maximum,500
Range,467
Interquartile range,49

0,1
Standard deviation,84.263
Coef of variation,0.72367
Kurtosis,6.4469
Mean,116.44
MAD,60.405
Skewness,2.2299
Sum,793760
Variance,7100.2
Memory size,53.5 KiB

Value,Count,Frequency (%),Unnamed: 3
66.0,1498,21.9%,
60.0,1474,21.5%,
115.0,1382,20.2%,
230.0,1064,15.6%,
69.0,476,7.0%,
220.0,263,3.8%,
500.0,138,2.0%,
70.0,104,1.5%,
92.0,104,1.5%,
33.0,87,1.3%,

Value,Count,Frequency (%),Unnamed: 3
33.0,87,1.3%,
34.0,18,0.3%,
34.5,63,0.9%,
55.0,20,0.3%,
60.0,1474,21.5%,

Value,Count,Frequency (%),Unnamed: 3
230.0,1064,15.6%,
250.0,1,0.0%,
287.0,6,0.1%,
345.0,3,0.0%,
500.0,138,2.0%,

Unnamed: 0,OBJECTID,Name,kV,kV_Sort,Owner,Status,Circuit,Type,Legend,Length_Mil,Length_Fee,Comments,Shape__Len
0,2001,PG&E 60kV,60,60.0,PG&E,Operational,Single,OH,PG&E_60_70kV,1.0,5813.64639807,,0.018523
1,2002,PG&E 60kV,60,60.0,PG&E,Operational,Single,OH,PG&E_60_70kV,2.0,11459.33011218,,0.032482
2,2003,PG&E 60kV,60,60.0,PG&E,Operational,Single,OH,PG&E_60_70kV,11.0,56984.89118592,,0.159223
3,2004,PG&E 60kV,60,60.0,PG&E,Operational,Single,OH,PG&E_60_70kV,15.0,81003.57166241,,0.250896
4,2005,PG&E 60kV,60,60.0,PG&E,Operational,Single,OH,PG&E_60_70kV,20.0,105641.41726686,,0.300362


In [24]:
len(mapping(tl_df.geometry[121])['coordinates'])

3

In [37]:
def create_S2_polyline(linestring):
    polylines = []
    mapping_dict = mapping(linestring)
    coords = mapping_dict['coordinates']
    if mapping_dict['type']=='LineString':
        points = []
        for coord in coords:
            long, lat = coord
            latlng = s2.S2LatLng.FromDegrees(lat, long)
            points.append(latlng)        
        polylines.append(s2.S2Polyline(points))
    elif mapping_dict['type']=='MultiLineString':
        for coord in coords:
            points = []
            for subcoord in coord:
                long, lat = subcoord
                latlng = s2.S2LatLng.FromDegrees(lat, long)
                points.append(latlng)
            polylines.append(s2.S2Polyline(points))
    return polylines

In [38]:
tl_df['S2_PolyLine'] = tl_df.geometry.apply(create_S2_polyline)
tl_df.head()

Unnamed: 0,OBJECTID,Name,kV,kV_Sort,Owner,Status,Circuit,Type,Legend,Length_Mil,Length_Fee,Comments,Shape__Len,geometry,S2_PolyLine
0,2001,PG&E 60kV,60,60.0,PG&E,Operational,Single,OH,PG&E_60_70kV,1.0,5813.64639807,,0.018523,LINESTRING (-122.025217180817 39.7560586198882...,[<s2_py.pywraps2.S2Polyline; proxy of <Swig Ob...
1,2002,PG&E 60kV,60,60.0,PG&E,Operational,Single,OH,PG&E_60_70kV,2.0,11459.33011218,,0.032482,LINESTRING (-122.010209405169 39.5827809214517...,[<s2_py.pywraps2.S2Polyline; proxy of <Swig Ob...
2,2003,PG&E 60kV,60,60.0,PG&E,Operational,Single,OH,PG&E_60_70kV,11.0,56984.89118592,,0.159223,LINESTRING (-122.003238381708 39.5533528274813...,[<s2_py.pywraps2.S2Polyline; proxy of <Swig Ob...
3,2004,PG&E 60kV,60,60.0,PG&E,Operational,Single,OH,PG&E_60_70kV,15.0,81003.57166241,,0.250896,LINESTRING (-122.013588113745 39.4023799821862...,[<s2_py.pywraps2.S2Polyline; proxy of <Swig Ob...
4,2005,PG&E 60kV,60,60.0,PG&E,Operational,Single,OH,PG&E_60_70kV,20.0,105641.41726686,,0.300362,"LINESTRING (-122.153286201514 39.276960876995,...",[<s2_py.pywraps2.S2Polyline; proxy of <Swig Ob...


In [39]:
tl_df.OBJECTID.nunique()

6841

In [40]:
tl2_df = tl_df[['OBJECTID', 'S2_PolyLine']]
tl2_df = split_data_frame_list(tl2_df, 'S2_PolyLine', 'OBJECTID')
tl2_df.shape

(6957, 2)

In [41]:
tl2_df.head()

Unnamed: 0,OBJECTID,S2_PolyLine
0,2001,<s2_py.pywraps2.S2Polyline; proxy of <Swig Obj...
1,2002,<s2_py.pywraps2.S2Polyline; proxy of <Swig Obj...
2,2003,<s2_py.pywraps2.S2Polyline; proxy of <Swig Obj...
3,2004,<s2_py.pywraps2.S2Polyline; proxy of <Swig Obj...
4,2005,<s2_py.pywraps2.S2Polyline; proxy of <Swig Obj...


In [42]:
tl2_df['S2_Cells'] = tl2_df.S2_PolyLine.apply(create_S2_coverer, args=[13])
tl2_df.shape

(6957, 3)

In [43]:
tl2_df.head()

Unnamed: 0,OBJECTID,S2_PolyLine,S2_Cells
0,2001,<s2_py.pywraps2.S2Polyline; proxy of <Swig Obj...,"(4/0010011300101�, 4/0010011300102�, 4/0010011..."
1,2002,<s2_py.pywraps2.S2Polyline; proxy of <Swig Obj...,"(4/0010012023332�, 4/0010012023333�, 4/0010012..."
2,2003,<s2_py.pywraps2.S2Polyline; proxy of <Swig Obj...,"(4/0010012022220�, 4/0010012022221�, 4/0010012..."
3,2004,<s2_py.pywraps2.S2Polyline; proxy of <Swig Obj...,"(4/0010012310022�, 4/0010012310122�, 4/0010012..."
4,2005,<s2_py.pywraps2.S2Polyline; proxy of <Swig Obj...,"(4/0010012002210�, 4/0010012002211�, 4/0010012..."


In [44]:
tl3_df = tl2_df[['OBJECTID', 'S2_Cells']]
tl3_df = split_data_frame_list(tl3_df, 'S2_Cells', 'OBJECTID')
tl3_df['S2_Cells_ID'] = tl3_df.S2_Cells.apply(lambda x: x.ToToken())
tl3_df.shape

(75121, 3)

In [45]:
tl3_df.head()

Unnamed: 0,OBJECTID,S2_Cells,S2_Cells_ID
0,2001,4/0010011300101�,8082e08c
1,2001,4/0010011300102�,8082e094
2,2001,4/0010011300120�,8082e0c4
3,2001,4/0010011300131�,8082e0ec
4,2002,4/0010012023332�,808317f4


In [46]:
tl3_df.S2_Cells_ID.nunique()

43069

In [51]:
tl3_df = pd.merge(tl3_df[['OBJECTID', 'S2_Cells_ID']], tl_df[list(tl_df)[:-2]], on='OBJECTID', how='left')
tl3_df.shape

(75121, 14)

In [52]:
tl3_df.head()

Unnamed: 0,OBJECTID,S2_Cells_ID,Name,kV,kV_Sort,Owner,Status,Circuit,Type,Legend,Length_Mil,Length_Fee,Comments,Shape__Len
0,2001,8082e08c,PG&E 60kV,60,60.0,PG&E,Operational,Single,OH,PG&E_60_70kV,1.0,5813.64639807,,0.018523
1,2001,8082e094,PG&E 60kV,60,60.0,PG&E,Operational,Single,OH,PG&E_60_70kV,1.0,5813.64639807,,0.018523
2,2001,8082e0c4,PG&E 60kV,60,60.0,PG&E,Operational,Single,OH,PG&E_60_70kV,1.0,5813.64639807,,0.018523
3,2001,8082e0ec,PG&E 60kV,60,60.0,PG&E,Operational,Single,OH,PG&E_60_70kV,1.0,5813.64639807,,0.018523
4,2002,808317f4,PG&E 60kV,60,60.0,PG&E,Operational,Single,OH,PG&E_60_70kV,2.0,11459.33011218,,0.032482


In [53]:
tl3_df.to_csv('./Data/Processed/Transmission_Lines_S2Cells.csv', index=False)