-
Notifications
You must be signed in to change notification settings - Fork 2
/
state2nerc.py
114 lines (91 loc) · 3.91 KB
/
state2nerc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import pandas as pd
from util.utils import getParentDir, rename_cols
from os.path import join, abspath, normpath, dirname, split
def fraction_state2nerc(df, state, region_col='nerc', fuel_col='fuel category'):
"""Return the percent of gen & consumption by fuel type in each region
for a state
inputs:
df (dataframe): a dataframe with data from the most recent EIA-923
final release. It should either contain a column with reporting
frequency ('A' or 'M' for annual/monthly), or only include annual
facilities.
What if I want to do something other than NERC regions? What is the
appropriate column name?
What sort of data is this function supposed to accept?
- State of each facility
- Reporting frequency
- Year
- NERC region (or other region code?)
- plant id
- fuel category
- generation, total fuel, elec fuel
output:
result (df): The percent of generation, toal fuel, and electric fuel
from annual reporting facilities in each (NERC) region from the
state
"""
# Not sure if I'll pass through all facilities or just annual.
# Drop monthly if all facilities are passed through
if 'reporting frequency' in df.columns:
a = df.loc[(df.state == state) &
(df['reporting frequency'] == 'A')].copy()
else:
a = df.loc[df.state == state].copy()
# Group by region and fuel category
a.drop(['plant id', 'year'], axis=1, inplace=True)
a = a.groupby([region_col, fuel_col]).sum()
# Unique list of fuels
fuels = set(a.index.get_level_values(fuel_col))
# Loop through the fuels and create a df for each fuel. The main df has
# a multi-index, so use .xs to take a cross-slice. Each new df will have
# the percent of generation, total fuel, and elec fuel in each region.
temp_list = []
for fuel in fuels:
temp = (a.xs(fuel, level=fuel_col)
/ a.xs(fuel, level=fuel_col).sum())
temp[fuel_col] = fuel
temp_list.append(temp)
result = pd.concat(temp_list)
result.reset_index(inplace=True)
result['state'] = state
rename_cols = {'generation (mwh)': '% generation',
'total fuel (mmbtu)': '% total fuel',
'elec fuel (mmbtu)': '% elec fuel'}
result.rename(columns=rename_cols, inplace=True)
keep_cols = (['state', region_col, fuel_col]
+ list(rename_cols.values()))
result = result.loc[:, keep_cols]
return result
def add_region(df, regions, region_col='nerc'):
"""
Add the NERC (or other) region as a column based on lat/lon data in a
dataframe
inputs:
df (dataframe): Should have 'lat' and 'lon' columns
regions (dataframe): a geopandas df with region shapes
region_col (str): name of column with regional labels
outputs:
df (dataframe): modified version of the original dataframe with region
column
"""
import geopandas as gpd
from shapely.geometry import Point
from geopandas import GeoDataFrame
# ap = abspath(__file__)
# top_path = getParentDir(dirname(ap), level=2)
# Only do a spatial join on points when necessary
cols = ['lat', 'lon', 'plant id']
small_facility = df.loc[:, cols].drop_duplicates()
small_facility = small_facility.dropna(subset=['lat', 'lon'])
geometry = [Point(xy) for xy in zip(small_facility.lon, small_facility.lat)]
crs = {'init': 'epsg:4326'}
geo_df = GeoDataFrame(small_facility, crs=crs, geometry=geometry)
# Spatial join of the facilities with the NERC regions
facility_nerc = gpd.sjoin(geo_df, regions, how='inner', op='within')
# lowercase column names
rename_cols(facility_nerc)
# Merge the NERC labels back into the main dataframe
cols = ['plant id', region_col]
df = df.merge(facility_nerc.loc[:, cols],
on=['plant id'], how='left')
return df