In [1]:
# Dependencies
import pandas as pd
import numpy as np

In [2]:
#Read the csv generated by Anthony which shows power plant locations by county
powerplant_data = "../Output/power_us_comp_df.csv"
powerplant_data_df = pd.read_csv(powerplant_data, encoding="ISO-8859-1")

In [3]:
#Only look at the power plant type and county and state
powerplant_data_df = powerplant_data_df[["primary_fuel", "county", "state"]]
powerplant_data_df = powerplant_data_df.dropna()
powerplant_data_df

Unnamed: 0,primary_fuel,county,state
0,Solar,Rockland County,New York
1,Solar,Lee County,North Carolina
2,Solar,Yadkin County,North Carolina
3,Solar,Mercer County,New Jersey
4,Solar,Norfolk County,Massachusetts
...,...,...,...
9828,Waste,Lake County,Illinois
9829,Gas,Jefferson County,Kentucky
9830,Wind,Ontario County,New York
9831,Solar,Goodhue County,Minnesota


In [4]:
#Combine the county and state columns into one column
powerplant_data_df = powerplant_data_df[["primary_fuel", "county", "state"]]
powerplant_data_df["county, state"]= powerplant_data_df["county"] + ', ' + powerplant_data_df["state"]
powerplant_data_df

Unnamed: 0,primary_fuel,county,state,"county, state"
0,Solar,Rockland County,New York,"Rockland County, New York"
1,Solar,Lee County,North Carolina,"Lee County, North Carolina"
2,Solar,Yadkin County,North Carolina,"Yadkin County, North Carolina"
3,Solar,Mercer County,New Jersey,"Mercer County, New Jersey"
4,Solar,Norfolk County,Massachusetts,"Norfolk County, Massachusetts"
...,...,...,...,...
9828,Waste,Lake County,Illinois,"Lake County, Illinois"
9829,Gas,Jefferson County,Kentucky,"Jefferson County, Kentucky"
9830,Wind,Ontario County,New York,"Ontario County, New York"
9831,Solar,Goodhue County,Minnesota,"Goodhue County, Minnesota"


In [5]:
#Ignore all other columns
powerplant_data_df = powerplant_data_df[["county, state", "primary_fuel"]]
powerplant_data_df

Unnamed: 0,"county, state",primary_fuel
0,"Rockland County, New York",Solar
1,"Lee County, North Carolina",Solar
2,"Yadkin County, North Carolina",Solar
3,"Mercer County, New Jersey",Solar
4,"Norfolk County, Massachusetts",Solar
...,...,...
9828,"Lake County, Illinois",Waste
9829,"Jefferson County, Kentucky",Gas
9830,"Ontario County, New York",Wind
9831,"Goodhue County, Minnesota",Solar


In [6]:
#Get a list of all counties in the dataset
counties = powerplant_data_df["county, state"].unique()
counties


array(['Rockland County, New York', 'Lee County, North Carolina',
       'Yadkin County, North Carolina', ..., 'Stephens County, Georgia',
       'York County, Virginia', 'Marshall County, Indiana'], dtype=object)

In [7]:
#Get a list of all fuel types in the data set
fuel_types = powerplant_data_df["primary_fuel"].unique()
fuel_types

array(['Solar', 'Gas', 'Oil', 'Hydro', 'Wind', 'Coal', 'Biomass', 'Waste',
       'Storage', 'Cogeneration', 'Geothermal', 'Petcoke', 'Nuclear',
       'Other'], dtype=object)

In [18]:
#This is INCREDIBLY inefficient code, but it did give us the data we need - I'd like to look through at a more efficient way to do it

#Create a set of empty lists to store the total number of each type of powerplant by county
total_solar = []
total_gas = []
total_oil = []
total_hydro = []
total_wind = []
total_coal = []
total_biomass = []
total_waste = []
total_storage = []
total_cogeneration = []
total_geothermal = []
total_petcoke = []
total_nuclear = []
total_other = []
total = 0

#Run through every county in the list
for county in counties:
    #Set the total number of each plant to zero at the start of each run
    num_solar = 0
    num_gas = 0
    num_oil = 0
    num_hydro = 0
    num_wind = 0
    num_coal = 0
    num_biomass = 0
    num_waste = 0
    num_storage = 0
    num_cogeneration = 0
    num_geothermal = 0
    num_petcoke = 0
    num_nuclear = 0
    num_other = 0
    
    #Run through the whole dataframe
    for index, row in powerplant_data_df.iterrows():
        
        #Look for counties that match the county name in each iteration of the loop
        if (powerplant_data_df.loc[index, "county, state"] == county):
        
            #Check the fuel type for each row and add 1 to the relevant number
            if (powerplant_data_df.loc[index, "primary_fuel"] == "Solar"):
                num_solar= num_solar + 1
            elif (powerplant_data_df.loc[index, "primary_fuel"] == "Gas"):
                num_gas = num_gas + 1
            elif (powerplant_data_df.loc[index, "primary_fuel"] == "Oil"):
                num_oil = num_oil + 1
            elif (powerplant_data_df.loc[index, "primary_fuel"] == "Hydro"):
                num_hydro = num_hydro + 1
            elif (powerplant_data_df.loc[index, "primary_fuel"] == "Wind"):
                num_wind = num_wind + 1
            elif (powerplant_data_df.loc[index, "primary_fuel"] == "Coal"):
                num_coal = num_coal + 1   
            elif (powerplant_data_df.loc[index, "primary_fuel"] == "Biomass"):
                num_biomass = num_biomass + 1
            elif (powerplant_data_df.loc[index, "primary_fuel"] == "Waste"):
                num_waste = num_waste + 1
            elif (powerplant_data_df.loc[index, "primary_fuel"] == "Storage"):
                num_storage = num_storage + 1
            elif (powerplant_data_df.loc[index, "primary_fuel"] == "Cogeneration"):
                num_cogeneration = num_cogeneration + 1
            elif (powerplant_data_df.loc[index, "primary_fuel"] == "Geothermal"):
                num_geothermal = num_geothermal + 1
            elif (powerplant_data_df.loc[index, "primary_fuel"] == "Petcoke"):
                num_petcoke = num_petcoke + 1 
            elif (powerplant_data_df.loc[index, "primary_fuel"] == "Nuclear"):
                num_nuclear = num_nuclear + 1
            elif (powerplant_data_df.loc[index, "primary_fuel"] == "Other"):
                num_other = num_other + 1

    #Store the number of each plant in the list. 
    # This will be in the same order as the counties in counties list
    total_solar.append(num_solar)
    total_gas.append(num_gas)
    total_oil.append(num_oil)
    total_hydro.append(num_hydro)
    total_wind.append(num_wind)
    total_coal.append(num_coal)
    total_biomass.append(num_biomass)
    total_waste.append(num_waste)
    total_storage.append(num_storage)
    total_cogeneration.append(num_cogeneration)
    total_geothermal.append(num_geothermal)
    total_petcoke.append(num_petcoke)
    total_nuclear.append(num_nuclear)
    total_other.append(num_other)
    
    #This code was very slow because I was running through the whole dataframe for each county in the list
    #So I included this to let me know the progress as it ran
    total = total + 1
    print(f"Total complete: {total} out of {len(counties)}")



Total complete: 1 out of 2025
Total complete: 2 out of 2025
Total complete: 3 out of 2025
Total complete: 4 out of 2025
Total complete: 5 out of 2025
Total complete: 6 out of 2025
Total complete: 7 out of 2025
Total complete: 8 out of 2025
Total complete: 9 out of 2025
Total complete: 10 out of 2025
Total complete: 11 out of 2025
Total complete: 12 out of 2025
Total complete: 13 out of 2025
Total complete: 14 out of 2025
Total complete: 15 out of 2025
Total complete: 16 out of 2025
Total complete: 17 out of 2025
Total complete: 18 out of 2025
Total complete: 19 out of 2025
Total complete: 20 out of 2025
Total complete: 21 out of 2025
Total complete: 22 out of 2025
Total complete: 23 out of 2025
Total complete: 24 out of 2025
Total complete: 25 out of 2025
Total complete: 26 out of 2025
Total complete: 27 out of 2025
Total complete: 28 out of 2025
Total complete: 29 out of 2025
Total complete: 30 out of 2025
Total complete: 31 out of 2025
Total complete: 32 out of 2025
Total complete: 3

Total complete: 261 out of 2025
Total complete: 262 out of 2025
Total complete: 263 out of 2025
Total complete: 264 out of 2025
Total complete: 265 out of 2025
Total complete: 266 out of 2025
Total complete: 267 out of 2025
Total complete: 268 out of 2025
Total complete: 269 out of 2025
Total complete: 270 out of 2025
Total complete: 271 out of 2025
Total complete: 272 out of 2025
Total complete: 273 out of 2025
Total complete: 274 out of 2025
Total complete: 275 out of 2025
Total complete: 276 out of 2025
Total complete: 277 out of 2025
Total complete: 278 out of 2025
Total complete: 279 out of 2025
Total complete: 280 out of 2025
Total complete: 281 out of 2025
Total complete: 282 out of 2025
Total complete: 283 out of 2025
Total complete: 284 out of 2025
Total complete: 285 out of 2025
Total complete: 286 out of 2025
Total complete: 287 out of 2025
Total complete: 288 out of 2025
Total complete: 289 out of 2025
Total complete: 290 out of 2025
Total complete: 291 out of 2025
Total co

Total complete: 518 out of 2025
Total complete: 519 out of 2025
Total complete: 520 out of 2025
Total complete: 521 out of 2025
Total complete: 522 out of 2025
Total complete: 523 out of 2025
Total complete: 524 out of 2025
Total complete: 525 out of 2025
Total complete: 526 out of 2025
Total complete: 527 out of 2025
Total complete: 528 out of 2025
Total complete: 529 out of 2025
Total complete: 530 out of 2025
Total complete: 531 out of 2025
Total complete: 532 out of 2025
Total complete: 533 out of 2025
Total complete: 534 out of 2025
Total complete: 535 out of 2025
Total complete: 536 out of 2025
Total complete: 537 out of 2025
Total complete: 538 out of 2025
Total complete: 539 out of 2025
Total complete: 540 out of 2025
Total complete: 541 out of 2025
Total complete: 542 out of 2025
Total complete: 543 out of 2025
Total complete: 544 out of 2025
Total complete: 545 out of 2025
Total complete: 546 out of 2025
Total complete: 547 out of 2025
Total complete: 548 out of 2025
Total co

Total complete: 775 out of 2025
Total complete: 776 out of 2025
Total complete: 777 out of 2025
Total complete: 778 out of 2025
Total complete: 779 out of 2025
Total complete: 780 out of 2025
Total complete: 781 out of 2025
Total complete: 782 out of 2025
Total complete: 783 out of 2025
Total complete: 784 out of 2025
Total complete: 785 out of 2025
Total complete: 786 out of 2025
Total complete: 787 out of 2025
Total complete: 788 out of 2025
Total complete: 789 out of 2025
Total complete: 790 out of 2025
Total complete: 791 out of 2025
Total complete: 792 out of 2025
Total complete: 793 out of 2025
Total complete: 794 out of 2025
Total complete: 795 out of 2025
Total complete: 796 out of 2025
Total complete: 797 out of 2025
Total complete: 798 out of 2025
Total complete: 799 out of 2025
Total complete: 800 out of 2025
Total complete: 801 out of 2025
Total complete: 802 out of 2025
Total complete: 803 out of 2025
Total complete: 804 out of 2025
Total complete: 805 out of 2025
Total co

Total complete: 1031 out of 2025
Total complete: 1032 out of 2025
Total complete: 1033 out of 2025
Total complete: 1034 out of 2025
Total complete: 1035 out of 2025
Total complete: 1036 out of 2025
Total complete: 1037 out of 2025
Total complete: 1038 out of 2025
Total complete: 1039 out of 2025
Total complete: 1040 out of 2025
Total complete: 1041 out of 2025
Total complete: 1042 out of 2025
Total complete: 1043 out of 2025
Total complete: 1044 out of 2025
Total complete: 1045 out of 2025
Total complete: 1046 out of 2025
Total complete: 1047 out of 2025
Total complete: 1048 out of 2025
Total complete: 1049 out of 2025
Total complete: 1050 out of 2025
Total complete: 1051 out of 2025
Total complete: 1052 out of 2025
Total complete: 1053 out of 2025
Total complete: 1054 out of 2025
Total complete: 1055 out of 2025
Total complete: 1056 out of 2025
Total complete: 1057 out of 2025
Total complete: 1058 out of 2025
Total complete: 1059 out of 2025
Total complete: 1060 out of 2025
Total comp

Total complete: 1280 out of 2025
Total complete: 1281 out of 2025
Total complete: 1282 out of 2025
Total complete: 1283 out of 2025
Total complete: 1284 out of 2025
Total complete: 1285 out of 2025
Total complete: 1286 out of 2025
Total complete: 1287 out of 2025
Total complete: 1288 out of 2025
Total complete: 1289 out of 2025
Total complete: 1290 out of 2025
Total complete: 1291 out of 2025
Total complete: 1292 out of 2025
Total complete: 1293 out of 2025
Total complete: 1294 out of 2025
Total complete: 1295 out of 2025
Total complete: 1296 out of 2025
Total complete: 1297 out of 2025
Total complete: 1298 out of 2025
Total complete: 1299 out of 2025
Total complete: 1300 out of 2025
Total complete: 1301 out of 2025
Total complete: 1302 out of 2025
Total complete: 1303 out of 2025
Total complete: 1304 out of 2025
Total complete: 1305 out of 2025
Total complete: 1306 out of 2025
Total complete: 1307 out of 2025
Total complete: 1308 out of 2025
Total complete: 1309 out of 2025
Total comp

Total complete: 1529 out of 2025
Total complete: 1530 out of 2025
Total complete: 1531 out of 2025
Total complete: 1532 out of 2025
Total complete: 1533 out of 2025
Total complete: 1534 out of 2025
Total complete: 1535 out of 2025
Total complete: 1536 out of 2025
Total complete: 1537 out of 2025
Total complete: 1538 out of 2025
Total complete: 1539 out of 2025
Total complete: 1540 out of 2025
Total complete: 1541 out of 2025
Total complete: 1542 out of 2025
Total complete: 1543 out of 2025
Total complete: 1544 out of 2025
Total complete: 1545 out of 2025
Total complete: 1546 out of 2025
Total complete: 1547 out of 2025
Total complete: 1548 out of 2025
Total complete: 1549 out of 2025
Total complete: 1550 out of 2025
Total complete: 1551 out of 2025
Total complete: 1552 out of 2025
Total complete: 1553 out of 2025
Total complete: 1554 out of 2025
Total complete: 1555 out of 2025
Total complete: 1556 out of 2025
Total complete: 1557 out of 2025
Total complete: 1558 out of 2025
Total comp

Total complete: 1778 out of 2025
Total complete: 1779 out of 2025
Total complete: 1780 out of 2025
Total complete: 1781 out of 2025
Total complete: 1782 out of 2025
Total complete: 1783 out of 2025
Total complete: 1784 out of 2025
Total complete: 1785 out of 2025
Total complete: 1786 out of 2025
Total complete: 1787 out of 2025
Total complete: 1788 out of 2025
Total complete: 1789 out of 2025
Total complete: 1790 out of 2025
Total complete: 1791 out of 2025
Total complete: 1792 out of 2025
Total complete: 1793 out of 2025
Total complete: 1794 out of 2025
Total complete: 1795 out of 2025
Total complete: 1796 out of 2025
Total complete: 1797 out of 2025
Total complete: 1798 out of 2025
Total complete: 1799 out of 2025
Total complete: 1800 out of 2025
Total complete: 1801 out of 2025
Total complete: 1802 out of 2025
Total complete: 1803 out of 2025
Total complete: 1804 out of 2025
Total complete: 1805 out of 2025
Total complete: 1806 out of 2025
Total complete: 1807 out of 2025
Total comp

Unnamed: 0,County,num_solar,num_gas,num_oil,num_hydro,num_wind,num_coal,num_biomass,num_storage,num_cogeneration,num_geothermal,num_petcoke,num_nuclear,num_other
0,"Rockland County, New York",2,4,2,0,0,0,0,0,0,0,0,0,0
1,"Lee County, North Carolina",8,0,0,0,0,0,0,0,0,0,0,0,0
2,"Yadkin County, North Carolina",4,0,0,0,0,0,0,0,0,0,0,0,0
3,"Mercer County, New Jersey",13,5,0,0,0,0,0,1,0,0,0,0,0
4,"Norfolk County, Massachusetts",14,7,1,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020,"Yankton County, South Dakota",0,1,0,0,0,0,0,0,0,0,0,0,0
2021,"Coweta County, Georgia",0,1,0,0,0,0,0,0,0,0,0,0,0
2022,"Stephens County, Georgia",0,0,0,1,0,0,0,0,0,0,0,0,0
2023,"York County, Virginia",0,0,1,0,0,0,0,0,0,0,0,0,0


In [19]:
#Turn all the lists into one dataframe
powerplants_by_county_df = pd.DataFrame({
    "County": counties,
    "num_solar": total_solar,
    "num_gas": total_gas,
    "num_oil": total_oil,
    "num_hydro": total_hydro,
    "num_wind": total_wind,
    "num_coal": total_coal,
    "num_biomass": total_biomass,
    "num_storage" : total_storage,
    "num_cogeneration" : total_cogeneration,
    "num_geothermal" : total_geothermal,
    "num_petcoke" : total_petcoke,
    "num_nuclear" : total_nuclear, 
    "num_other" : total_other
})
powerplants_by_county_df

Unnamed: 0,County,num_solar,num_gas,num_oil,num_hydro,num_wind,num_coal,num_biomass,num_storage,num_cogeneration,num_geothermal,num_petcoke,num_nuclear,num_other
0,"Rockland County, New York",2,4,2,0,0,0,0,0,0,0,0,0,0
1,"Lee County, North Carolina",8,0,0,0,0,0,0,0,0,0,0,0,0
2,"Yadkin County, North Carolina",4,0,0,0,0,0,0,0,0,0,0,0,0
3,"Mercer County, New Jersey",13,5,0,0,0,0,0,1,0,0,0,0,0
4,"Norfolk County, Massachusetts",14,7,1,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020,"Yankton County, South Dakota",0,1,0,0,0,0,0,0,0,0,0,0,0
2021,"Coweta County, Georgia",0,1,0,0,0,0,0,0,0,0,0,0,0
2022,"Stephens County, Georgia",0,0,0,1,0,0,0,0,0,0,0,0,0
2023,"York County, Virginia",0,0,1,0,0,0,0,0,0,0,0,0,0


In [47]:
#Then publish the  dataframe to a csv file to be analyzed later
powerplants_by_county_df.to_csv("../Output/powerplants_by_county.csv")

In [73]:
#Read the csv
income_data = "../Resources/2017_median_income_by_county.csv"
income_data_df = pd.read_csv(income_data, encoding="ISO-8859-1")
income_data_df

Unnamed: 0,Geography,County,State,Total Households,Median Income
0,0500000US01003,Baldwin County,Alabama,79120,55342
1,0500000US01015,Calhoun County,Alabama,44507,46763
2,0500000US01043,Cullman County,Alabama,30740,45044
3,0500000US01049,DeKalb County,Alabama,25848,39373
4,0500000US01051,Elmore County,Alabama,29176,60558
...,...,...,...,...,...
832,0500000US72113,Ponce Municipio,Puerto Rico,50225,16270
833,0500000US72127,San Juan Municipio,Puerto Rico,140153,21912
834,0500000US72135,Toa Alta Municipio,Puerto Rico,21640,24223
835,0500000US72137,Toa Baja Municipio,Puerto Rico,27841,25487


In [74]:
#Create a column for county and state together
income_data_df = income_data_df[["County", "State", "Total Households", "Median Income"]]
income_data_df["county, state"]= income_data_df["County"] + ',' + income_data_df["State"]
income_data_df

Unnamed: 0,County,State,Total Households,Median Income,"county, state"
0,Baldwin County,Alabama,79120,55342,"Baldwin County, Alabama"
1,Calhoun County,Alabama,44507,46763,"Calhoun County, Alabama"
2,Cullman County,Alabama,30740,45044,"Cullman County, Alabama"
3,DeKalb County,Alabama,25848,39373,"DeKalb County, Alabama"
4,Elmore County,Alabama,29176,60558,"Elmore County, Alabama"
...,...,...,...,...,...
832,Ponce Municipio,Puerto Rico,50225,16270,"Ponce Municipio, Puerto Rico"
833,San Juan Municipio,Puerto Rico,140153,21912,"San Juan Municipio, Puerto Rico"
834,Toa Alta Municipio,Puerto Rico,21640,24223,"Toa Alta Municipio, Puerto Rico"
835,Toa Baja Municipio,Puerto Rico,27841,25487,"Toa Baja Municipio, Puerto Rico"


In [75]:
#Rename the columns for the merge later on
income_data_df.rename(columns={"County": "County (No state)", "county, state": "County"}, inplace=True)
income_data_df

Unnamed: 0,County (No state),State,Total Households,Median Income,County
0,Baldwin County,Alabama,79120,55342,"Baldwin County, Alabama"
1,Calhoun County,Alabama,44507,46763,"Calhoun County, Alabama"
2,Cullman County,Alabama,30740,45044,"Cullman County, Alabama"
3,DeKalb County,Alabama,25848,39373,"DeKalb County, Alabama"
4,Elmore County,Alabama,29176,60558,"Elmore County, Alabama"
...,...,...,...,...,...
832,Ponce Municipio,Puerto Rico,50225,16270,"Ponce Municipio, Puerto Rico"
833,San Juan Municipio,Puerto Rico,140153,21912,"San Juan Municipio, Puerto Rico"
834,Toa Alta Municipio,Puerto Rico,21640,24223,"Toa Alta Municipio, Puerto Rico"
835,Toa Baja Municipio,Puerto Rico,27841,25487,"Toa Baja Municipio, Puerto Rico"


In [76]:
#Get only the important columns
income_data_df = income_data_df[["County", "Total Households", "Median Income"]]
income_data_df

Unnamed: 0,County,Total Households,Median Income
0,"Baldwin County, Alabama",79120,55342
1,"Calhoun County, Alabama",44507,46763
2,"Cullman County, Alabama",30740,45044
3,"DeKalb County, Alabama",25848,39373
4,"Elmore County, Alabama",29176,60558
...,...,...,...
832,"Ponce Municipio, Puerto Rico",50225,16270
833,"San Juan Municipio, Puerto Rico",140153,21912
834,"Toa Alta Municipio, Puerto Rico",21640,24223
835,"Toa Baja Municipio, Puerto Rico",27841,25487


In [77]:
#Merge both dataframes together
total_data_df = pd.merge(powerplants_by_county_df, income_data_df, on="County", how="outer")

total_data_df

Unnamed: 0,County,num_solar,num_gas,num_oil,num_hydro,num_wind,num_coal,num_biomass,num_storage,num_cogeneration,num_geothermal,num_petcoke,num_nuclear,num_other,Total Households,Median Income
0,"Rockland County, New York",2.0,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100167.0,88726.0
1,"Lee County, North Carolina",8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,
2,"Yadkin County, North Carolina",4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,
3,"Mercer County, New Jersey",13.0,5.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,128757.0,79173.0
4,"Norfolk County, Massachusetts",14.0,7.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,263862.0,100829.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2171,"Ponce Municipio, Puerto Rico",,,,,,,,,,,,,,50225.0,16270.0
2172,"San Juan Municipio, Puerto Rico",,,,,,,,,,,,,,140153.0,21912.0
2173,"Toa Alta Municipio, Puerto Rico",,,,,,,,,,,,,,21640.0,24223.0
2174,"Toa Baja Municipio, Puerto Rico",,,,,,,,,,,,,,27841.0,25487.0


In [85]:
#Only keep rows where the median income is not "N/A"
total_data_clean_df = total_data_df[total_data_df['Median Income'].notna()].reset_index(drop=True)
total_data_clean_df

Unnamed: 0,County,num_solar,num_gas,num_oil,num_hydro,num_wind,num_coal,num_biomass,num_storage,num_cogeneration,num_geothermal,num_petcoke,num_nuclear,num_other,Total Households,Median Income
0,"Rockland County, New York",2.0,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100167.0,88726.0
1,"Mercer County, New Jersey",13.0,5.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,128757.0,79173.0
2,"Norfolk County, Massachusetts",14.0,7.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,263862.0,100829.0
3,"Los Angeles County, California",113.0,49.0,2.0,19.0,1.0,0.0,3.0,5.0,4.0,0.0,0.0,0.0,0.0,3311231.0,65006.0
4,"Middlesex County, New Jersey",41.0,7.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,285446.0,85337.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
832,"Ponce Municipio, Puerto Rico",,,,,,,,,,,,,,50225.0,16270.0
833,"San Juan Municipio, Puerto Rico",,,,,,,,,,,,,,140153.0,21912.0
834,"Toa Alta Municipio, Puerto Rico",,,,,,,,,,,,,,21640.0,24223.0
835,"Toa Baja Municipio, Puerto Rico",,,,,,,,,,,,,,27841.0,25487.0


In [86]:
#Create a column to store the total number of powerplants
total_data_clean_df["Total Powerplants"] = (total_data_clean_df["num_solar"] +
                                            total_data_clean_df["num_gas"] +
                                            total_data_clean_df["num_oil"] +
                                            total_data_clean_df["num_hydro"] +
                                            total_data_clean_df["num_wind"] +
                                            total_data_clean_df["num_coal"] +
                                            total_data_clean_df["num_biomass"] +
                                            total_data_clean_df["num_storage"] +
                                            total_data_clean_df["num_cogeneration"] +
                                            total_data_clean_df["num_geothermal"] +
                                            total_data_clean_df["num_petcoke"] +
                                            total_data_clean_df["num_nuclear"] +
                                            total_data_clean_df["num_other"])
total_data_clean_df

Unnamed: 0,County,num_solar,num_gas,num_oil,num_hydro,num_wind,num_coal,num_biomass,num_storage,num_cogeneration,num_geothermal,num_petcoke,num_nuclear,num_other,Total Households,Median Income,Total Powerplants
0,"Rockland County, New York",2.0,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100167.0,88726.0,8.0
1,"Mercer County, New Jersey",13.0,5.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,128757.0,79173.0,19.0
2,"Norfolk County, Massachusetts",14.0,7.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,263862.0,100829.0,23.0
3,"Los Angeles County, California",113.0,49.0,2.0,19.0,1.0,0.0,3.0,5.0,4.0,0.0,0.0,0.0,0.0,3311231.0,65006.0,196.0
4,"Middlesex County, New Jersey",41.0,7.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,285446.0,85337.0,49.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
832,"Ponce Municipio, Puerto Rico",,,,,,,,,,,,,,50225.0,16270.0,
833,"San Juan Municipio, Puerto Rico",,,,,,,,,,,,,,140153.0,21912.0,
834,"Toa Alta Municipio, Puerto Rico",,,,,,,,,,,,,,21640.0,24223.0,
835,"Toa Baja Municipio, Puerto Rico",,,,,,,,,,,,,,27841.0,25487.0,


In [87]:
#Drop any rows where the total number of powerplants is "N/A" (this doesn't remove any 0s)
powerplants_and_income_df = total_data_clean_df[total_data_clean_df['Total Powerplants'].notna()]
powerplants_and_income_df

Unnamed: 0,County,num_solar,num_gas,num_oil,num_hydro,num_wind,num_coal,num_biomass,num_storage,num_cogeneration,num_geothermal,num_petcoke,num_nuclear,num_other,Total Households,Median Income,Total Powerplants
0,"Rockland County, New York",2.0,4.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100167.0,88726.0,8.0
1,"Mercer County, New Jersey",13.0,5.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,128757.0,79173.0,19.0
2,"Norfolk County, Massachusetts",14.0,7.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,263862.0,100829.0,23.0
3,"Los Angeles County, California",113.0,49.0,2.0,19.0,1.0,0.0,3.0,5.0,4.0,0.0,0.0,0.0,0.0,3311231.0,65006.0,196.0
4,"Middlesex County, New Jersey",41.0,7.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,285446.0,85337.0,49.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
681,"Collier County, Florida",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,144354.0,66048.0,0.0
682,"Shawnee County, Kansas",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,73724.0,57095.0,0.0
683,"Putnam County, New York",0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,34549.0,99479.0,1.0
684,"Coweta County, Georgia",0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,52796.0,72787.0,1.0


In [88]:
#Save it to a csv file
powerplants_and_income_df.to_csv("../Output/powerplants_and_income_by_county.csv")