# Gas Stoves Analysis

Queries LEAD and ResStock Data to estimate percentage of stoves using gas or propane by county

LEAD: https://data.openei.org/submissions/573 

ResStock: https://github.com/NREL/resstock/tree/develop/project_national/housing_characteristics, "Cooking Range.tsv"

In [None]:
#compile data for <200FPL by county and main heating fuel type
#outputs to file and also returns df
def compileData():
    income_type = "FPL" #AMI, SMI, FPL
    index_cols=['FIP','LOCATION','FPL15','BLD','TEN','HFL']
    # IMPT: because this takes unweighted mean, must use FUEL*UNITS, also want to use sum, not mean, for units
    val_cols = {'UNITS':np.sum,'ELEP*UNITS':np.sum,'GASP*UNITS':np.sum,'FULP*UNITS':np.sum,'HINCP*UNITS':np.sum}
    
    chdir("LEAD Data Extracted")
    i=0
    
    #loop through files
    for fil in listdir():
        
        # search for file names with the following format: ["AL FPL State, Counties, Cities 2018.csv"]
        relevant_file_names = ".. "+income_type+" State, Counties, Cities 2018.csv"
        if re.search(relevant_file_names, fil):
            print(f"Adding file: {fil}")
            if i==0: #initialize dataframe
                df=pd.read_csv(fil)
                #If dict is passed, the key is column to aggregate and value is function or list of functions.
                df=df.pivot_table(index=index_cols,aggfunc=val_cols) 
                
            else: #append to dataframe
                nextstate=pd.read_csv(fil).pivot_table(index=index_cols,aggfunc=val_cols) 
                df=pd.concat([df,nextstate])
            i+=1
            
    df =  df.rename_axis(None, axis=1).reset_index() 
    #only keep rows <200FPL and take avg over separate FPL categories
    df=df[df['FPL15'].isin(["0-100%","100-150%","150-200%"])]
    index_cols2=['FIP','LOCATION','HFL']
    df=df.pivot_table(index=index_cols2,aggfunc=val_cols)
    df =  df.rename_axis(None, axis=1).reset_index() 
    
    #Only keep rows with 5 digit FIPS, and those with leading 0s cut off
    df = df[df['FIP']<=99999]
    df = df[df['FIP']>=999]
    
    df['FIP']=df['FIP'].astype(str)
    df['FIP']=df['FIP'].str.zfill(5)
    
#     df.to_csv(research_dir+"LEAD_HFL.xlsx")
    print("Complete")
    
    return df  

In [None]:
#Pull in resstock data: Avg % Usage of Gas+Propane[Heating Fuel,Location Region]
#LEAD Housing Counts for <200FPL [County, Heating Fuel] and LEAD Housing counts for <200FPL [County]
 
#Put LEAD data in format to merge with ResStock Data

#---LEAD
LEAD = compileData()

#manipulate LEAD data
#Sum all FPL<=200%
FPL = ["0-100%","100-150%","150-200%"]
LEAD = LEAD[LEAD.FPL15.isin(FPL)]

#Rename to match Resstock
LEAD = LEAD.replace("ELECTRICITY","Electricity")
LEAD = LEAD.replace("FUEL OIL","Fuel Oil")
LEAD = LEAD.replace("UTILITY GAS","Natural Gas")
LEAD = LEAD.replace("NONE","None")
LEAD = LEAD.replace("BOTTLED GAS","Propane")
LEAD = LEAD.replace(["WOOD","OTHER","COAL","SOLAR"],"Other Fuel")

#Groupby to sum over non-relevant categories
key_cols=['FIP','LOCATION','HFL']
LEAD=LEAD.groupby(key_cols).sum().reset_index()

print(LEAD)

#group by county only
cty_cols=['FIP','LOCATION']
#LEAD_cty=LEAD.groupby(cty_cols).sum().reset_index()


In [None]:
# Pull in ResStock Data on gas and propane stoves by region and heating fuel, and format for merging with LEAD Data
Resstock = pd.read_excel("Resstock-Stoves.xlsx")

Resstock=Resstock[Resstock.Usage.eq("Average")]
Resstock["GasPropane"]=Resstock["Gas_100_Usage"]+Resstock["Propane_100_Usage"]

Resstock=Resstock[["Dependency=Heating Fuel","Region","GasPropane"]]

#Get Location Names
Locations = pd.read_excel("ResstockLocationRegions.xlsx","Sheet1")

Locations["Region"]="C"+Locations["Location Region"]

Resstock_cty=pd.merge(Locations,Resstock, on="Region", how='left')
Resstock_cty.rename(columns={"Dependency=Heating Fuel":"HFL"},inplace=True)


In [None]:
# Merge ResStock and LEAD Data

Merge=pd.merge(Resstock_cty, LEAD, on=["FIP","HFL"], how='left')
Merge['GasPropaneUnits']=Merge['GasPropane']*Merge['UNITS']

#sum over all HFL to get total gas-propane stove homes by county
Merge_cty=Merge.groupby("FIP").sum().reset_index()

#divide by homes/cty to get percentage of gas-propane stove homes by county
Merge_cty["GasPropane%"]=Merge_cty["GasPropaneUnits"]/Merge_cty["UNITS"]
print(sum(Merge_cty["GasPropaneUnits"])/sum(Merge_cty["UNITS"]))
Merge_cty=Merge_cty[['FIP','UNITS','GasPropaneUnits','GasPropane%']]
Merge_cty.to_csv("Merge.csv")
