In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import os
import numpy as np
import scipy
import pandas as pd
import sqlalchemy as sa
sys.path.append(os.path.abspath(os.path.join('..','..','..')))
from pudl import init, mcoe, analysis, settings, outputs
import pudl.constants as pc
import pudl.extract.ferc1
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import normalize
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import scale

import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline
plt.style.use('ggplot')
mpl.rcParams['figure.figsize'] = (10,6)
mpl.rcParams['figure.dpi'] = 150
pd.options.display.max_columns = 56

## Pull in the large FERC plants data and simplify it

In [3]:
ferc1_engine = pudl.extract.ferc1.connect_db()
ferc1_steam = pd.read_sql('SELECT * FROM f1_steam;', ferc1_engine)
ferc1_steam = ferc1_steam.drop(['row_seq', 'row_prvlg','report_prd'], axis=1)

In [4]:
diesel_strings = [
    'DIESEL',
    'Diesel Engine',
    'Diesel Turbine',
]

geothermal_strings = [
    'Steam - Geothermal',
]
natural_gas_strings = [
    'Combined Cycle',
    'Combustion Turbine',
    'GT',
    'GAS TURBINE',
    'Comb. Turbine',
    'Gas Turbine #1',
    'Combine Cycle Oper',
    'Combustion',
    'Combined',
    'Gas Turbine/Steam',
    'Gas Turbine Peaker',
    'Gas Turbine - Note 1',
    'Resp Share Gas Note3',
    'Gas Turbines',
    'Simple Cycle',
    'Gas / Steam',
    'GasTurbine',
    'Combine Cycle',
    'CTG/Steam-Gas',
    'GTG/Gas',
    'CTG/Steam -Gas',
    'Steam/Gas Turbine',
    'CombustionTurbine',
    'Gas Turbine-Simple',
    'STEAM & GAS TURBINE',
    'Gas & Steam Turbine',
    'Gas',
    'Gas Turbine (2)',
    'COMBUSTION AND GAS',
    'Com Turbine Peaking',
    'Gas Turbine Peaking',
    'Comb Turb Peaking',
    'JET ENGINE',
    'Comb. Cyc',
    'Com. Cyc',
    'Com. Cycle',
    'GAS TURB-COMBINED CY',
    'Gas Turb',
    'Combined Cycle - 40%',
    'IGCC/Gas Turbine',
    'CC',
    'Combined Cycle Oper',
    'Simple Cycle Turbine',
    'Steam and CC',
    'Com Cycle Gas Turb',
    'I.C.E/  Gas Turbine',
    'Combined Cycle CTG',
    'GAS-TURBINE',
    'Gas Expander Turbine',
    'Gas Turbine (Leased)',
    'Gas Turbine # 1',
    'Gas Turbine (Note 1)',
    'COMBUSTINE TURBINE',
    'Gas Turb, Int. Comb.',
    'Combined Turbine',
    'Comb Turb Peak Units',
    'Combustion Tubine',
    'Comb. Cycle',
    'COMB.TURB.PEAK.UNITS',
    'Steam  and  CC',
    'I.C.E. /Gas Turbine',
    'Conbustion Turbine',
    'Gas Turbine/Int Comb',
    'Steam & CC',
    'GAS TURB. & HEAT REC',
    'Gas Turb/Comb. Cyc',
    'Comb. Turine',
]
nuclear_strings = [
    'Nuclear',
    'Nuclear (3)',
]
other_strings = [
    'IC',
    'Internal Combustion',
    'Int Combust - Note 1',
    'Resp. Share - Note 2',
    'Int. Combust - Note1',
    'Resp. Share - Note 4',
    'Resp Share - Note 5',
    'Resp. Share - Note 7',
    'Internal Comb Recip',
    'Reciprocating Engine',
    'Internal Comb',
    'Resp. Share - Note 8',
    'Resp. Share - Note 9',
    'Resp Share - Note 11',
    'Resp. Share - Note 6',
    'INT.COMBUSTINE',
    'Steam (Incl I.C.)',
    'Other',
    'Int Combust (Note 1)',
    'Resp. Share (Note 2)',
    'Int. Combust (Note1)',
    'Resp. Share (Note 8)',
    'Resp. Share (Note 9)',
    'Resp Share (Note 11)',
    'Resp. Share (Note 4)',
    'Resp. Share (Note 6)',
    'Plant retired- 2013',
    'Retired - 2013',
]
steam_strings = [
    'Steam',
    'Steam Units 1, 2, 3',
    'Resp Share St Note 3',
    'Steam Turbine',
    'Steam-Internal Comb',
    'IGCC',
    'Steam- 72%',
    'Steam (1)',
    'Steam (1)',
    'Steam Units 1,2,3',
    'Steam/Fossil',
    'Steams',
    'Steam - 72%',
    'Steam - 100%',
    'Stream',
    'Steam Units 4, 5',
    'Steam - 64%',
    'Common',
    'Steam (A)',
    'Coal',
    'Steam;Retired - 2013',
    'Steam Units 4 & 6',
]
wind_strings = [
    'Wind',
    'Wind Turbine',
    'Wind - Turbine',
    'Wind Energy',
]
solar_strings = [
    'Solar Photovoltaic',
    'Solar Thermal',
    'SOLAR PROJECT',
    'Solar',
    'Photovoltaic',
]
cpi_plant_kind_map = {
    'natural_gas': natural_gas_strings,
    'diesel': diesel_strings,
    'geothermal': geothermal_strings,
    'nuclear': nuclear_strings,
    'steam': steam_strings,
    'wind': wind_strings,
    'solar': solar_strings,
    'other': other_strings,
}

ferc1_steam['plant_kind_cpi'] = pudl.transform.pudl.cleanstrings(ferc1_steam.plant_kind,
                                                                 cpi_plant_kind_map,
                                                                 unmapped='')


## Create a unique record ID

In [5]:
ferc1_steam['record_id'] = \
    ferc1_steam.report_year.astype(str) + \
    ferc1_steam.respondent_id.astype(str) + \
    ferc1_steam.spplmnt_num.astype(str) + \
    ferc1_steam.row_number.astype(str)
ferc1_steam['record_id'] = ferc1_steam.record_id.astype(int)

In [6]:
# If there's no generation, no fuel expenses, and no total expenses... probably this is bunk.
mask_one = ((ferc1_steam.net_generation==0) | (ferc1_steam.net_generation.isnull())) & \
           ((ferc1_steam.expns_fuel==0) | (ferc1_steam.expns_fuel.isnull())) & \
           ((ferc1_steam.tot_prdctn_expns==0) | (ferc1_steam.tot_prdctn_expns.isnull()))
ferc1_steam = ferc1_steam[~mask_one].reset_index()
ferc1_steam['plant_name'] = ferc1_steam.plant_name.str.strip().str.lower().str.replace('\s+',' ')

matching_cols = [
    'record_id',
    'report_year',
    'spplmnt_num',
    'row_number',
    'respondent_id',
    'plant_name',
    'plant_kind_cpi',
    'yr_const',
    'tot_capacity'
]

ferc1_tomatch = ferc1_steam[matching_cols]
ferc1_steam_new = ferc1_steam.drop(matching_cols, axis=1)
ferc1_steam_new = pd.merge(ferc1_tomatch, ferc1_steam_new, left_index=True, right_index=True)

In [7]:
def vectorize_plants(plants,
                     ngram_range=(2,5),
                     sup_num_wt=1.0,
                     row_num_wt=1.0,
                     plant_name_wt=1.0,
                     yr_const_wt=1.0,
                     respondent_wt=1.0,
                     plant_kind_wt=1.0,
                     capacity_wt=1.0):
    """
    Given a set of FERC Form 1 Plant records, vectorize and weight them for clustering.
    
    Uses TF-IDF w/ character based n-grams to vectorize plant names, and categorical
    binary weights for the construction year, respondent ID, and plant type. Normalizes
    the vectorized plant features before returning them.
    """
    
    plant_name_vectorizer = TfidfVectorizer(analyzer='char', ngram_range=ngram_range)
    plant_name_vectors = plant_name_vectorizer.fit_transform(plants.plant_name)
    
    scaler = MinMaxScaler()
    capacity_vectors = scaler.fit_transform(plants.tot_capacity.values.reshape(-1,1))
    
    lb_yr_const = LabelBinarizer()
    yr_const_vectors = scipy.sparse.csr_matrix(lb_yr_const.fit_transform(plants.yr_const))
    
    lb_sup_num = LabelBinarizer()
    sup_num_vectors = scipy.sparse.csr_matrix(lb_sup_num.fit_transform(plants.spplmnt_num))
    
    lb_row_num = LabelBinarizer()
    row_num_vectors = scipy.sparse.csr_matrix(lb_row_num.fit_transform(plants.row_number))
    
    lb_respondent = LabelBinarizer()
    respondent_vectors = scipy.sparse.csr_matrix(lb_respondent.fit_transform(plants.respondent_id))
    
    lb_plantkind = LabelBinarizer()
    plant_kind_vectors = scipy.sparse.csr_matrix(lb_plantkind.fit_transform(plants.plant_kind_cpi))
    
    plant_vectors = normalize(scipy.sparse.hstack([
        plant_name_vectors*plant_name_wt,
        yr_const_vectors*yr_const_wt,
        respondent_vectors*respondent_wt,
        plant_kind_vectors*plant_kind_wt,
        capacity_vectors*capacity_wt,
        sup_num_vectors*sup_num_wt,
        row_num_vectors*row_num_wt
    ]))
    return(plant_vectors)

In [8]:
def best_by_year(plants_df, sim_df, min_sim=0.8):
    out_df = plants_df.copy()
    sim_df = sim_df[sim_df>=min_sim]
    # make a copy of the similarity matrix and mask out fits below a threshold
    years = plants_df.report_year.unique()
    for yr in years:
        newcol = yr
        out_df[newcol]=-1
    
    # seed_yr is the year we are matching *from*
    for seed_yr in years:
        seed_idx = plants_df.index[plants_df.report_year==seed_yr]
        # match_yr is all the other years, in which we are finding the best match
        for match_yr in years:
            bestof_yr = match_yr
            match_idx = plants_df.index[plants_df.report_year==match_yr]
            # For each record specified by seed_idx, obtain the index of
            # the record within match_idx that that is the most similar.
            best_idx = sim_df.iloc[seed_idx, match_idx].idxmax(axis=1)
            out_df[bestof_yr].iloc[seed_idx]=best_idx
        
        #out_df = pd.merge(out_df, sim_df.iloc[yr_idx, y_idx].idxmax(axis=1).to_frame(), left_index=True, right_index=True)
    return(out_df)

In [9]:
def where_matches(match_idx, bestof_df):
    years = bestof_df.report_year.unique()
    out_idx = bestof[years][bestof[years]==match_idx].dropna(how='all').index.values
    return(out_idx)

In [10]:
def best_matches(match_idx, bestof_df):
    years = bestof_df.report_year.unique()
    out_idx = bestof_df.loc[match_idx,years].dropna().astype(int).values
    return(out_idx)

In [11]:
fpv = vectorize_plants(ferc1_tomatch, plant_name_wt=2.0, sup_num_wt=0.25, row_num_wt=0.25)
sim_df = pd.DataFrame(cosine_similarity(fpv))
bestof = best_by_year(ferc1_tomatch, sim_df, min_sim=0.9)
bad_idx = []
for idx in bestof.index:
    if idx%1000 == 0:
        print(idx)
    if not np.array_equiv(where_matches(idx, bestof), best_matches(idx, bestof)):
        bad_idx = bad_idx + [idx,]
        print('bad record: {}'.format(idx))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


0
bad record: 33
bad record: 34
bad record: 44
bad record: 45
bad record: 388
bad record: 476
bad record: 590
bad record: 739
bad record: 981
1000
bad record: 1236
bad record: 1611
bad record: 1612
bad record: 1659
bad record: 1703
bad record: 1704
bad record: 1714
bad record: 1715
2000
bad record: 2018
bad record: 2344
bad record: 2532
bad record: 2672
bad record: 2759
bad record: 2760
bad record: 2769
3000
bad record: 3007
bad record: 3437
bad record: 3451
bad record: 3455
bad record: 3474
bad record: 3476
bad record: 3481
bad record: 3482
bad record: 3592
bad record: 3907
4000
bad record: 4012
bad record: 4172
bad record: 4176
bad record: 4198
bad record: 4199
bad record: 4204
bad record: 4205
bad record: 4391
bad record: 4675
bad record: 4871
bad record: 4988
5000
bad record: 5110
bad record: 5376
bad record: 5510
bad record: 5514
bad record: 5536
bad record: 5537
bad record: 5542
bad record: 5543
bad record: 5594
6000
bad record: 6053
bad record: 6057
bad record: 6079
bad record: 

In [12]:
np.array(bad_idx)

array([   33,    34,    44,    45,   388,   476,   590,   739,   981,
        1236,  1611,  1612,  1659,  1703,  1704,  1714,  1715,  2018,
        2344,  2532,  2672,  2759,  2760,  2769,  3007,  3437,  3451,
        3455,  3474,  3476,  3481,  3482,  3592,  3907,  4012,  4172,
        4176,  4198,  4199,  4204,  4205,  4391,  4675,  4871,  4988,
        5110,  5376,  5510,  5514,  5536,  5537,  5542,  5543,  5594,
        6053,  6057,  6079,  6080,  6085,  6086,  6148,  6445,  6756,
        7253,  7630,  7664,  7668,  7690,  7691,  7696,  7697,  7739,
        8061,  8094,  8392,  8898,  8902,  8924,  8925,  8930,  8931,
        9074,  9084,  9531,  9883,  9887,  9909,  9910,  9915,  9916,
       10325, 10529, 10805, 10809, 10831, 10832, 10837, 10838, 11024,
       11028, 11050, 11051, 11056, 11057, 11059, 11165, 12011, 12052,
       12073, 12074, 12080, 12161])

In [13]:
len(bad_idx)

112

In [14]:
n=476
bestof.loc[best_matches(n, bestof)]

Unnamed: 0,record_id,report_year,spplmnt_num,row_number,respondent_id,plant_name,plant_kind_cpi,yr_const,tot_capacity,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
476,200414102,2004,0,2,141,boardman,steam,1980,390.62,476.0,981.0,2018.0,3007.0,4012.0,4988.0,6756.0,7739.0,8061.0,9074.0,10325.0,11059.0,12011.0
981,200514102,2005,0,2,141,boardman,steam,1980,390.62,476.0,981.0,2018.0,3007.0,4012.0,4988.0,6756.0,7739.0,8061.0,9074.0,10325.0,11059.0,12011.0
2018,200614102,2006,0,2,141,boardman,steam,1980,417.43,476.0,981.0,2018.0,3007.0,4012.0,4988.0,6756.0,7739.0,8061.0,9074.0,10325.0,11059.0,12011.0
3007,200714102,2007,0,2,141,boardman,steam,1980,417.43,476.0,981.0,2018.0,3007.0,4012.0,4988.0,6756.0,7739.0,8061.0,9074.0,10325.0,11059.0,12011.0
4012,200814102,2008,0,2,141,boardman,steam,1980,417.43,476.0,981.0,2018.0,3007.0,4012.0,4988.0,6756.0,7739.0,8061.0,9074.0,10325.0,11059.0,12011.0
4988,200914102,2009,0,2,141,boardman,steam,1980,417.43,476.0,981.0,2018.0,3007.0,4012.0,4988.0,6756.0,7739.0,8061.0,9074.0,10325.0,11059.0,12011.0
6756,201014102,2010,0,2,141,boardman,steam,1980,417.43,476.0,981.0,2018.0,3007.0,4012.0,4988.0,6756.0,7739.0,8061.0,9074.0,10325.0,11059.0,12011.0
7739,201114102,2011,0,2,141,boardman,steam,1980,417.43,476.0,981.0,2018.0,3007.0,4012.0,4988.0,6756.0,7739.0,8061.0,9074.0,10325.0,11059.0,12011.0
8061,201214102,2012,0,2,141,boardman,steam,1980,417.43,476.0,981.0,2018.0,3007.0,4012.0,4988.0,6756.0,7739.0,8061.0,9074.0,10325.0,11059.0,12011.0
9074,201314102,2013,0,2,141,boardman,steam,1980,513.76,476.0,981.0,2018.0,3007.0,4012.0,4988.0,6756.0,7739.0,8061.0,9074.0,10325.0,11059.0,12011.0


In [18]:
bestof.loc[where_matches(n, bestof)]

Unnamed: 0,record_id,report_year,spplmnt_num,row_number,respondent_id,plant_name,plant_kind_cpi,yr_const,tot_capacity,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
476,200414102,2004,0,2,141,boardman,steam,1980,390.62,476.0,981.0,2018.0,3007.0,4012.0,4988.0,6756.0,7739.0,8061.0,9074.0,10325.0,11059.0,12011.0
981,200514102,2005,0,2,141,boardman,steam,1980,390.62,476.0,981.0,2018.0,3007.0,4012.0,4988.0,6756.0,7739.0,8061.0,9074.0,10325.0,11059.0,12011.0
2018,200614102,2006,0,2,141,boardman,steam,1980,417.43,476.0,981.0,2018.0,3007.0,4012.0,4988.0,6756.0,7739.0,8061.0,9074.0,10325.0,11059.0,12011.0
3007,200714102,2007,0,2,141,boardman,steam,1980,417.43,476.0,981.0,2018.0,3007.0,4012.0,4988.0,6756.0,7739.0,8061.0,9074.0,10325.0,11059.0,12011.0
4012,200814102,2008,0,2,141,boardman,steam,1980,417.43,476.0,981.0,2018.0,3007.0,4012.0,4988.0,6756.0,7739.0,8061.0,9074.0,10325.0,11059.0,12011.0
4988,200914102,2009,0,2,141,boardman,steam,1980,417.43,476.0,981.0,2018.0,3007.0,4012.0,4988.0,6756.0,7739.0,8061.0,9074.0,10325.0,11059.0,12011.0
6756,201014102,2010,0,2,141,boardman,steam,1980,417.43,476.0,981.0,2018.0,3007.0,4012.0,4988.0,6756.0,7739.0,8061.0,9074.0,10325.0,11059.0,12011.0
7739,201114102,2011,0,2,141,boardman,steam,1980,417.43,476.0,981.0,2018.0,3007.0,4012.0,4988.0,6756.0,7739.0,8061.0,9074.0,10325.0,11059.0,12011.0
8061,201214102,2012,0,2,141,boardman,steam,1980,417.43,476.0,981.0,2018.0,3007.0,4012.0,4988.0,6756.0,7739.0,8061.0,9074.0,10325.0,11059.0,12011.0
9074,201314102,2013,0,2,141,boardman,steam,1980,513.76,476.0,981.0,2018.0,3007.0,4012.0,4988.0,6756.0,7739.0,8061.0,9074.0,10325.0,11059.0,12011.0


In [19]:
ferc1_steam_new[(ferc1_steam_new.plant_name=='boardman')]

Unnamed: 0,record_id,report_year,spplmnt_num,row_number,respondent_id,plant_name,plant_kind_cpi,yr_const,tot_capacity,index,plant_kind,type_const,yr_installed,peak_demand,plant_hours,plnt_capability,when_not_limited,when_limited,avg_num_of_emp,net_generation,cost_land,cost_structure,cost_equipment,cost_of_plant_to,cost_per_kw,expns_operations,expns_fuel,expns_coolants,expns_steam,expns_steam_othr,expns_transfer,expns_electric,expns_misc_power,expns_rents,expns_allowances,expns_engnr,expns_structures,expns_boiler,expns_plants,expns_misc_steam,tot_prdctn_expns,expns_kwh,asset_retire_cost
475,200414101,2004,0,1,141,boardman,steam,1980,600.96,817,Steam,Conventional,1980,609.0,6449.0,0.0,585.0,585.0,105.0,3540098000.0,1240068.0,149500146.0,452506392.0,604085247.0,1005.2004,6764874.0,44256851.0,0.0,0.0,0.0,0.0,0.0,1192631.0,3618051.0,-7770.0,23500625.0,0.0,0.0,0.0,194192.0,79519454.0,0.0225,838641.0
476,200414102,2004,0,2,141,boardman,steam,1980,390.62,818,Steam,Conventional,1980,0.0,0.0,0.0,0.0,0.0,0.0,2305031000.0,798844.0,98069397.0,289787875.0,389278233.0,996.565,4457560.0,29909037.0,0.0,0.0,0.0,0.0,0.0,789729.0,2688319.0,0.0,15116268.0,0.0,0.0,0.0,119863.0,53080776.0,0.023,622117.0
876,20047002,2004,0,2,70,boardman,steam,1980,56.05,1671,Steam,Conventional,1980,60.0,6448.0,0.0,0.0,0.0,0.0,353543000.0,106610.0,13575473.0,51815464.0,65497547.0,1168.5557,821222.0,4409531.0,0.0,0.0,0.0,0.0,0.0,145173.0,431771.0,0.0,2670682.0,0.0,0.0,0.0,26742.0,8505121.0,0.0241,0.0
980,200514101,2005,0,1,141,boardman,steam,1980,600.96,1891,Steam,Conventional,1980,592.0,6235.0,0.0,585.0,585.0,106.0,3561174000.0,1240068.0,153565948.0,466586460.0,622231117.0,1035.3952,5974221.0,47834482.0,0.0,0.0,0.0,0.0,0.0,2169872.0,1138860.0,-19387.0,19085827.0,0.0,0.0,0.0,259476.0,76443351.0,0.0215,838641.0
981,200514102,2005,0,2,141,boardman,steam,1980,390.62,1892,Steam,Conventional,1980,0.0,0.0,0.0,0.0,0.0,0.0,2309497000.0,798844.0,102118006.0,298819315.0,402358283.0,1030.0504,3818762.0,31124332.0,0.0,0.0,0.0,0.0,0.0,1432334.0,640712.0,0.0,12396430.0,0.0,0.0,0.0,89610.0,49502180.0,0.0214,622118.0
1878,20057002,2005,0,2,70,boardman,steam,1980,56.05,3456,Steam,Conventional,1980,60.0,6233.0,0.0,0.0,0.0,0.0,357180000.0,106610.0,13616489.0,54897896.0,68620995.0,1224.2818,753718.0,4612849.0,0.0,0.0,0.0,0.0,0.0,217308.0,149158.0,0.0,1952145.0,0.0,0.0,0.0,15071.0,7700249.0,0.0216,0.0
2017,200614101,2006,0,1,141,boardman,steam,1980,642.2,3715,Steam,Conventional,1980,590.0,4357.0,0.0,585.0,585.0,109.0,2414553000.0,1240068.0,150839967.0,468952624.0,621871300.0,968.3452,5989289.0,35492843.0,0.0,0.0,0.0,0.0,0.0,2066716.0,257963.0,0.0,18694086.0,0.0,0.0,0.0,108473.0,62609370.0,0.0259,838641.0
2018,200614102,2006,0,2,141,boardman,steam,1980,417.43,3716,Steam,Conventional,1980,0.0,0.0,0.0,0.0,0.0,0.0,1564573000.0,798844.0,99277301.0,301165155.0,401863417.0,962.7085,3742813.0,22218265.0,0.0,0.0,0.0,0.0,0.0,1341837.0,0.0,0.0,11752427.0,0.0,0.0,0.0,67827.0,39123169.0,0.025,622117.0
2851,20067002,2006,0,2,70,boardman,steam,1980,64.22,5226,Steam,Conventional,1980,59.0,4362.0,0.0,0.0,0.0,0.0,241557000.0,106610.0,13664764.0,54705143.0,68476517.0,1066.2802,864657.0,3429448.0,0.0,0.0,0.0,0.0,0.0,236070.0,8426.0,0.0,2439498.0,0.0,0.0,0.0,14663.0,6992762.0,0.0289,0.0
3006,200714101,2007,0,1,141,boardman,steam,1980,642.2,5515,Steam,Conventional,1980,595.0,6686.0,0.0,585.0,585.0,110.0,4354531000.0,1240068.0,151883454.0,474946319.0,628908482.0,979.3031,6763843.0,61041164.0,0.0,0.0,0.0,0.0,0.0,2169128.0,0.0,0.0,19406261.0,0.0,0.0,0.0,163697.0,89544093.0,0.0206,838641.0


In [17]:
# output plants_to_check and ferc1_steam_new into the same spreadsheet.
#writer = pd.ExcelWriter('cpi_ferc_plants/test_data.xlsx', engine='xlsxwriter')
#ferc1_steam_new.to_excel(writer, sheet_name='FERC 1 Steam Plants', index=False)
#plants_to_check.to_excel(writer, sheet_name='Candidates for Labeling', index=False)
#writer.save()