Revert to fully specifying pudl.helpers for clarity

catalyst-cooperative · Jun 18, 2019 · 97f8035 · 97f8035
1 parent 209e8d5
commit 97f8035
Show file tree

Hide file tree

Showing 2 changed files with 31 additions and 32 deletions.
diff --git a/pudl/transform/eia923.py b/pudl/transform/eia923.py
@@ -6,8 +6,6 @@
 import numpy as np
 import pudl
 import pudl.constants as pc
-from pudl.helpers import fix_int_na, fix_eia_na, cleanstrings, strip_lower
-from pudl.helpers import convert_to_date, cleanstrings_series
 
 logger = logging.getLogger(__name__)
 ###############################################################################
@@ -112,7 +110,7 @@ def _coalmine_cleanup(cmi_df):
 
     # Transform coalmine names to a canonical form to reduce duplicates:
     # No leading or trailing whitespace:
-    cmi_df = strip_lower(cmi_df, columns=['mine_name'])
+    cmi_df = pudl.helpers.strip_lower(cmi_df, columns=['mine_name'])
     # remove all internal non-alphanumeric characters:
     cmi_df['mine_name'] = \
         cmi_df['mine_name'].replace('[^a-zA-Z0-9 -]', '', regex=True)
@@ -227,17 +225,17 @@ def generation_fuel(eia923_dfs, eia923_transformed_dfs):
     # Convert the EIA923 DataFrame from yearly to monthly records.
     gf_df = _yearly_to_monthly_records(gf_df, pc.month_dict_eia923)
     # Replace the EIA923 NA value ('.') with a real NA value.
-    gf_df = fix_eia_na(gf_df)
+    gf_df = pudl.helpers.fix_eia_na(gf_df)
     # Remove "State fuel-level increment" records... which don't pertain to
     # any particular plant (they have plant_id_eia == operator_id == 99999)
     gf_df = gf_df[gf_df.plant_id_eia != 99999]
 
     gf_df['fuel_type_code_pudl'] = \
-        cleanstrings_series(gf_df.fuel_type,
-                            pc.fuel_type_eia923_gen_fuel_simple_map)
+        pudl.helpers.cleanstrings_series(gf_df.fuel_type,
+                                         pc.fuel_type_eia923_gen_fuel_simple_map)
 
     # Convert Year/Month columns into a single Date column...
-    gf_df = convert_to_date(gf_df)
+    gf_df = pudl.helpers.convert_to_date(gf_df)
 
     eia923_transformed_dfs['generation_fuel_eia923'] = gf_df
 
@@ -279,14 +277,14 @@ def boiler_fuel(eia923_dfs, eia923_transformed_dfs):
     bf_df = _yearly_to_monthly_records(
         bf_df, pc.month_dict_eia923)
     bf_df['fuel_type_code_pudl'] = \
-        cleanstrings_series(
+        pudl.helpers.cleanstrings_series(
             bf_df.fuel_type_code,
             pc.fuel_type_eia923_boiler_fuel_simple_map)
     # Replace the EIA923 NA value ('.') with a real NA value.
-    bf_df = fix_eia_na(bf_df)
+    bf_df = pudl.helpers.fix_eia_na(bf_df)
 
     # Convert Year/Month columns into a single Date column...
-    bf_df = convert_to_date(bf_df)
+    bf_df = pudl.helpers.convert_to_date(bf_df)
 
     eia923_transformed_dfs['boiler_fuel_eia923'] = bf_df
 
@@ -330,10 +328,10 @@ def generation(eia923_dfs, eia923_transformed_dfs):
     generation_df = _yearly_to_monthly_records(
         generation_df, pc.month_dict_eia923)
     # Replace the EIA923 NA value ('.') with a real NA value.
-    generation_df = fix_eia_na(generation_df)
+    generation_df = pudl.helpers.fix_eia_na(generation_df)
 
     # Convert Year/Month columns into a single Date column...
-    generation_df = convert_to_date(generation_df)
+    generation_df = pudl.helpers.convert_to_date(generation_df)
 
     eia923_transformed_dfs['generation_eia923'] = generation_df
 
@@ -500,19 +498,19 @@ def fuel_reciepts_costs(eia923_dfs, eia923_transformed_dfs):
                   'mine_type_code', 'county_id_fips']).
         drop(cols_to_drop, axis=1).
         # Replace the EIA923 NA value ('.') with a real NA value.
-        pipe(fix_eia_na).
+        pipe(pudl.helpers.fix_eia_na).
         # These come in ALL CAPS from EIA...
-        pipe(strip_lower, columns=['supplier_name']).
-        pipe(fix_int_na, columns=['contract_expiration_date', ]).
+        pipe(pudl.helpers.strip_lower, columns=['supplier_name']).
+        pipe(pudl.helpers.fix_int_na, columns=['contract_expiration_date', ]).
         assign(
             # Standardize case on transportaion codes -- all upper case!
             primary_transportation_mode_code=lambda x: x.primary_transportation_mode_code.str.upper(),
             secondary_transportation_mode_code=lambda x: x.secondary_transportation_mode_code.str.upper(),
             fuel_cost_per_mmbtu=lambda x: x.fuel_cost_per_mmbtu / 100,
             fuel_group_code=lambda x: x.fuel_group_code.str.lower().str.replace(' ', '_'),
-            fuel_type_code_pudl=lambda x: cleanstrings_series(
+            fuel_type_code_pudl=lambda x: pudl.helpers.cleanstrings_series(
                 x.energy_source_code, pc.energy_source_eia_simple_map),
-            fuel_group_code_simple=lambda x: cleanstrings_series(
+            fuel_group_code_simple=lambda x: pudl.helpers.cleanstrings_series(
                 x.fuel_group_code, pc.fuel_group_eia923_simple_map),
             contract_expiration_month=lambda x: x.contract_expiration_date.apply(
                 lambda y: y[:-2] if y != '' else y)).
@@ -524,12 +522,12 @@ def fuel_reciepts_costs(eia923_dfs, eia923_transformed_dfs):
                 lambda y: '20' + y[-2:] if y != '' else y)).
         # Now that we will create our own real date field, so chuck this one.
         drop('contract_expiration_date', axis=1).
-        pipe(convert_to_date,
+        pipe(pudl.helpers.convert_to_date,
              date_col='contract_expiration_date',
              year_col='contract_expiration_year',
              month_col='contract_expiration_month').
-        pipe(convert_to_date).
-        pipe(cleanstrings,
+        pipe(pudl.helpers.convert_to_date).
+        pipe(pudl.helpers.cleanstrings,
              ['natural_gas_transport_code',
               'natural_gas_delivery_contract_type_code'],
              [{'firm': ['F'], 'interruptible': ['I']},

diff --git a/pudl/transform/ferc1.py b/pudl/transform/ferc1.py
@@ -31,7 +31,6 @@
 import pudl
 import pudl.constants as pc
 from pudl.settings import SETTINGS
-from pudl.helpers import strip_lower, cleanstrings, fix_int_na
 
 logger = logging.getLogger(__name__)
 
@@ -212,11 +211,11 @@ def _plants_steam_clean(ferc1_steam_df):
     ferc1_steam_df = (
         ferc1_steam_df.
         pipe(_clean_cols, 'f1_steam').
-        pipe(strip_lower, ['plant_name']).
+        pipe(pudl.helpers.strip_lower, ['plant_name']).
         # Take the messy free-form construction_type and plant_kind fields, and
         # do our best to map them to some canonical categories... this is
         # necessarily imperfect:
-        pipe(cleanstrings, ['type_const', 'plant_kind'],
+        pipe(pudl.helpers.cleanstrings, ['type_const', 'plant_kind'],
              [pc.ferc1_const_type_strings, pc.ferc1_plant_kind_strings],
              unmapped='')
     )
@@ -292,7 +291,8 @@ def _plants_steam_assign_plant_ids(ferc1_steam_df, ferc1_fuel_df):
     # scikit-learn still doesn't deal well with NA values (this will be fixed
     # eventually) We need to massage the type and missing data for the
     # Classifier to work.
-    ferc1_steam_df = fix_int_na(ferc1_steam_df, columns=['construction_year'])
+    ferc1_steam_df = pudl.helpers.fix_int_na(
+        ferc1_steam_df, columns=['construction_year'])
 
     # Grab fuel consumption proportions for use in assigning plant IDs:
     fuel_fractions = fuel_by_plant_ferc1(ferc1_fuel_df)
@@ -467,11 +467,11 @@ def fuel(ferc1_raw_dfs, ferc1_transformed_dfs):
         _clean_cols(ferc1_raw_dfs['fuel_ferc1'], 'f1_fuel').
         # Standardize plant_name capitalization and remove leading/trailing
         # white space -- necesary b/c plant_name is part of many foreign keys.
-        pipe(strip_lower, ['plant_name']).
+        pipe(pudl.helpers.strip_lower, ['plant_name']).
         # Take the messy free-form fuel & fuel_unit fields, and do our best to
         # map them to some canonical categories... this is necessarily
         # imperfect:
-        pipe(cleanstrings, ['fuel', 'fuel_unit'],
+        pipe(pudl.helpers.cleanstrings, ['fuel', 'fuel_unit'],
              [pc.ferc1_fuel_strings, pc.ferc1_fuel_unit_strings],
              unmapped='').
         # Fuel cost per kWh is a per-unit value that doesn't make sense to
@@ -594,7 +594,7 @@ def plants_small(ferc1_raw_dfs, ferc1_transformed_dfs):
     ferc1_small_df = ferc1_raw_dfs['plants_small_ferc1']
     # Standardize plant_name_raw capitalization and remove leading/trailing
     # white space -- necesary b/c plant_name_raw is part of many foreign keys.
-    ferc1_small_df = strip_lower(
+    ferc1_small_df = pudl.helpers.strip_lower(
         ferc1_small_df, ['plant_name', 'kind_of_fuel']
     )
 
@@ -646,7 +646,8 @@ def plants_small(ferc1_raw_dfs, ferc1_transformed_dfs):
 
     # Standardize plant_name capitalization and remove leading/trailing white
     # space, so that plant_name matches formatting of plant_name_raw
-    ferc1_small_df = strip_lower(ferc1_small_df, ['plant_name_clean'])
+    ferc1_small_df = pudl.helpers.strip_lower(
+        ferc1_small_df, ['plant_name_clean'])
 
     # in order to create one complete column of plant names, we have to use the
     # cleaned plant names when available and the orignial plant names when the
@@ -708,8 +709,8 @@ def plants_hydro(ferc1_raw_dfs, ferc1_transformed_dfs):
         _clean_cols(ferc1_raw_dfs['plants_hydro_ferc1'], 'f1_hydro').
         # Standardize plant_name capitalization and remove leading/trailing
         # white space -- necesary b/c plant_name is part of many foreign keys.
-        pipe(strip_lower, ['plant_name']).
-        pipe(cleanstrings, ['plant_const'], [pc.ferc1_const_type_strings],
+        pipe(pudl.helpers.strip_lower, ['plant_name']).
+        pipe(pudl.helpers.cleanstrings, ['plant_const'], [pc.ferc1_const_type_strings],
              unmapped='').
         assign(
             # Converting kWh to MWh
@@ -793,9 +794,9 @@ def plants_pumped_storage(ferc1_raw_dfs, ferc1_transformed_dfs):
                     'f1_pumped_storage').
         # Standardize plant_name capitalization and remove leading/trailing
         # white space -- necesary b/c plant_name is part of many foreign keys.
-        pipe(strip_lower, ['plant_name']).
+        pipe(pudl.helpers.strip_lower, ['plant_name']).
         # Clean up the messy plant construction type column:
-        pipe(cleanstrings, ['plant_kind'], [pc.ferc1_const_type_strings],
+        pipe(pudl.helpers.cleanstrings, ['plant_kind'], [pc.ferc1_const_type_strings],
              unmapped='').
         assign(
             # Converting from kW/kWh to MW/MWh