In [1]:
#!pip install reverse_geocoder

import pandas as pd
import reverse_geocoder as rg

In [2]:
# Output
#  df_ts_complete: df with values where time series is complete
#  df_estimate: df where all time series values are NaN but there is an estimate
#  df_ts_incomplete: df with values where at least one of time series values is NaN
# 
# Reads global powerplant data data from CSV, drops irrelevant columns, splits df (see output), 
# and re-indexes df with gppdf_idnr as index
def read_clean_and_split():  
    df = pd.read_csv("..\data\global_power_plant_database.csv")
    df_all = df.drop(['country_long', 'name', 'url', 'geolocation_source', 'wepp_id'], axis=1)
    
    df_ts = (df_all.dropna(how='all', subset=['generation_gwh_2013', 'generation_gwh_2014', 'generation_gwh_2015', 'generation_gwh_2016', 'generation_gwh_2017'])
                      .set_index('gppd_idnr'))
    df_ts_complete = df_ts.dropna(how='any', subset=['generation_gwh_2013', 'generation_gwh_2014', 'generation_gwh_2015', 'generation_gwh_2016', 'generation_gwh_2017'])
    df_estimate = (df_all.dropna(how='any', subset=['estimated_generation_gwh'])
                   .set_index('gppd_idnr'))
    df_ts_incomplete = df_ts[df_ts[['generation_gwh_2013', 'generation_gwh_2014', 'generation_gwh_2015', 'generation_gwh_2016', 'generation_gwh_2017']].isnull().any(axis=1)]
    return df_ts_complete, df_ts_incomplete, df_estimate    

In [3]:
df_ts_complete, *_ = read_clean_and_split()

In [4]:
df_US = df_ts_complete[df_ts_complete['country'] == 'USA']

In [5]:
# Adds a column with us_state to dataframe df
# The used library for reverse geocoding works with K-D trees.
# Might produce inaccurate results for border regions
def coordinate_to_state(df):
    
    coordinates = [*zip(df['latitude'], df['longitude'])]
    #Uncomment below to include (lat, long) tuple as new column in df
    #df.insert(4, 'coordinates', coordinates)

    geo_infos = rg.search(tuple(coordinates))

    locations = list()

    for item in geo_infos:
        locations.append(item['admin1'])

    df.insert(1, 'us_state', locations)   


In [6]:
# Fixes the obvious mistakes made by coordinate_to_state function (where us_state is not a US state)
# Based on looking up the locations manually
def fix_misclassification(df):
    #British Columbia
    df.at['USA0054249', 'us_state'] = 'Idaho'
    df.loc[df['us_state']=='British Columbia', 'us_state'] = 'Washington'
    
    #Quebec
    df.at['USA0056829', 'us_state'] = 'Maine'
    df.loc[df['us_state']=='Quebec', 'us_state'] = 'New York'
    
    #Baja California
    df.at['USA0000120', 'us_state'] = 'Arizona'
    df.loc[df['us_state']=='Baja California', 'us_state'] = 'California'
    
    #Ontario 
    df.at['USA0006369', 'us_state'] = 'Michigan'
    df.at['USA0010487', 'us_state'] = 'Minnesota'
    df.at['USA0002694', 'us_state'] = 'New York'
    
    #Chukotskiy Avtonomnyy Okrug
    df.loc[df['us_state']=='Chukotskiy Avtonomnyy Okrug', 'us_state'] = 'Alaska'
    
    #Tamaulipas
    df.loc[df['us_state']=='Tamaulipas', 'us_state'] = 'Texas'
    
    #Yukon
    df.loc[df['us_state']=='Yukon', 'us_state'] = 'Alaska'   

In [7]:
coordinate_to_state(df_US)
fix_misclassification(df_US)

Loading formatted geocoded file...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


In [8]:
df_US['us_state'].value_counts()

California          912
New York            327
Texas               320
Minnesota           255
Iowa                213
North Carolina      207
Michigan            199
Pennsylvania        185
Illinois            183
New Jersey          182
Wisconsin           169
Massachusetts       160
Washington          132
Idaho               127
Ohio                126
Florida             125
Alaska              125
Colorado            124
Oregon              124
Kansas              117
Virginia            116
Georgia             113
Indiana             100
Missouri             95
South Carolina       93
Arizona              93
Nebraska             90
Maine                89
Oklahoma             77
Connecticut          75
Louisiana            75
Alabama              72
Utah                 67
New Mexico           65
Maryland             63
Nevada               63
New Hampshire        60
Wyoming              58
Vermont              57
Tennessee            53
Arkansas             52
Montana         

In [9]:
df_US

Unnamed: 0_level_0,country,us_state,capacity_mw,latitude,longitude,primary_fuel,other_fuel1,other_fuel2,other_fuel3,commissioning_year,owner,source,year_of_capacity_data,generation_gwh_2013,generation_gwh_2014,generation_gwh_2015,generation_gwh_2016,generation_gwh_2017,estimated_generation_gwh
gppd_idnr,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
USA0059371,USA,New Jersey,1.9,40.2003,-74.5761,Solar,,,,2012.000000,SunRay Power LLC,U.S. Energy Information Administration,2017.0,2.41461,2.35000,2.430000,2.492000,2.27600,
USA0057310,USA,California,1.3,33.7943,-118.2414,Solar,,,,2011.000000,Konoike Pacific,U.S. Energy Information Administration,2017.0,1.52800,2.14900,1.515000,1.592000,1.66000,
USA0057458,USA,New Jersey,3.8,40.5358,-74.3913,Solar,,,,2011.000000,Avidan Energy Solutions,U.S. Energy Information Administration,2017.0,5.03600,4.52400,4.802000,5.051000,4.81900,
USA0007770,USA,Illinois,4.2,41.9084,-89.0466,Gas,,,,2000.000000,Rochelle Municipal Utilities,U.S. Energy Information Administration,2017.0,0.21500,0.17800,0.271000,0.306000,0.26400,
USA0058187,USA,New Jersey,1.9,40.5161,-74.3400,Solar,,,,2012.000000,180 Raritan Energy Solutions LLC,U.S. Energy Information Administration,2017.0,2.74100,2.58400,2.526000,2.659000,2.54700,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
USA0055392,USA,Illinois,596.7,42.4776,-87.8950,Gas,Oil,,,2002.333333,Zion Energy LLC,U.S. Energy Information Administration,2017.0,0.00000,63.65600,132.434005,435.493999,462.06300,
USA0056871,USA,Illinois,7.0,42.4803,-87.8861,Waste,,,,2003.000000,Energy Developments Inc,U.S. Energy Information Administration,2017.0,43.59200,49.13400,46.699000,42.480000,38.66900,
USA0001368,USA,Indiana,18.0,38.2803,-85.7023,Gas,,,,1969.000000,Louisville Gas & Electric Co,U.S. Energy Information Administration,2017.0,0.20300,0.07800,1.058000,0.062000,0.01500,
USA0057648,USA,New York,3.4,42.8869,-76.9683,Wind,,,,2012.000000,Zotos International,U.S. Energy Information Administration,2017.0,3.67100,2.46015,2.489000,1.670000,1.23349,


In [13]:
df_US.primary_fuel.unique()

array(['Solar', 'Gas', 'Oil', 'Wind', 'Hydro', 'Coal', 'Biomass', 'Waste',
       'Cogeneration', 'Geothermal', 'Nuclear', 'Petcoke', 'Other',
       'Storage'], dtype=object)

In [21]:
colors = {'Solar':'yellow', 'Gas':'lightgreen', 'Oil':'olive', 'Wind':'cyan', 'Hydro':'blue', 'Coal':'black', 'Biomass':'green', 'Waste':'brown', 'Cogeneration':'bisque', 'Geothermal':'red', 'Nuclear':'orange', 'Petcoke':'teal', 'Other':'grey', 'Storage':'lime'}

In [22]:
import plotly.graph_objects as go

fig = go.Figure(data=go.Scattergeo(
        lon = df_US['longitude'],
        lat = df_US['latitude'],
        text = df_US['primary_fuel'],
        mode = 'markers',
        marker_color = df_US['primary_fuel'].apply(lambda x: colors[x]),
        ))

fig.update_layout(
        title = 'Powerplants US',
        geo_scope='usa',
    )
fig.show()

In [11]:
import plotly.express as px
df = px.data.gapminder().query("year == 2007")
fig = px.scatter_geo(df, locations="iso_alpha",
                     color="continent", # which column to use to set the color of markers
                     hover_name="country", # column added to hover information
                     size="pop", # size of markers
                     projection="natural earth")
fig.show()