In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
from shapely.geometry import Point
import matplotlib.pyplot as plt
import folium
from folium.plugins import MarkerCluster
from folium.plugins import FastMarkerCluster


In [2]:
wind = gpd.read_file('../data/USWTDB/uswtdb_v3_2_20201014.geojson')

In [3]:
wind.case_id.sort_values()

14452    3000001.0
50556    3000002.0
64371    3000003.0
50450    3000005.0
45687    3000008.0
           ...    
5908     3105263.0
5854     3105264.0
5829     3105265.0
12168    3105266.0
12167    3105267.0
Name: case_id, Length: 65548, dtype: float64

In [4]:
manufacturers = wind.t_manu.value_counts().to_frame().reset_index()
manufacturers['index'].iloc[40:68]
manufacturers.head()

Unnamed: 0,index,t_manu
0,GE Wind,25858
1,Vestas,13904
2,Siemens,5144
3,Gamesa,3076
4,Mitsubishi,2791


In [5]:
wind.shape

(65548, 18)

In [6]:
wind.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 65548 entries, 0 to 65547
Data columns (total 18 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   case_id     65548 non-null  float64 
 1   t_state     65548 non-null  object  
 2   p_name      65548 non-null  object  
 3   p_year      64055 non-null  float64 
 4   p_tnum      65548 non-null  float64 
 5   p_cap       60580 non-null  float64 
 6   t_manu      59569 non-null  object  
 7   t_model     59245 non-null  object  
 8   t_cap       59571 non-null  float64 
 9   t_hh        58629 non-null  float64 
 10  t_rd        58911 non-null  float64 
 11  t_rsa       58911 non-null  float64 
 12  t_ttlh      58629 non-null  float64 
 13  t_conf_atr  65548 non-null  float64 
 14  t_conf_loc  65548 non-null  float64 
 15  xlong       65548 non-null  float64 
 16  ylat        65548 non-null  float64 
 17  geometry    65548 non-null  geometry
dtypes: float64(13), geometry(1), object(4)

In [7]:
year_install = wind.p_year.value_counts().to_frame().reset_index()

In [8]:
year_install =  year_install.sort_values(by = 'index')

In [9]:
year_install

Unnamed: 0,index,p_year
36,1981.0,11
17,1982.0,937
22,1983.0,433
30,1984.0,155
19,1985.0,714
31,1986.0,151
28,1987.0,213
26,1988.0,277
25,1989.0,288
24,1990.0,327


In [10]:
wind_old = pd.read_csv('../data/Onshore_Industrial_Wind_Turbine_Locations_for_the_United_States_to_March_2014.csv')

In [11]:
wind_old.columns

Index(['FID', 'unique_id', 'site_name', 'total_turb', 'on_year', 'year_range',
       'on_year_s', 'manufac', 'model', 'type_tower', 'decommiss',
       'MW_turbine', 'total_cpcy', 'total_ht', 'tower_h', 'blade_l',
       'rotor_dia', 'rotor_s_a', 'lat_DD', 'long_DD', 'state', 'county',
       'conf_attr', 'conf_loc', 'WENDI_name', 'EIA_name', 'FAA_jdate',
       'FAA_AGL', 'FAA_ORS', 'image_name', 'image_year', 'comments'],
      dtype='object')

In [12]:
wind_old['geometry'] = wind_old.apply(lambda x: Point((float(x.long_DD), 
                                                         float(x.lat_DD))), 
                                        axis=1)

In [13]:
wind_old.head()

Unnamed: 0,FID,unique_id,site_name,total_turb,on_year,year_range,on_year_s,manufac,model,type_tower,...,conf_loc,WENDI_name,EIA_name,FAA_jdate,FAA_AGL,FAA_ORS,image_name,image_year,comments,geometry
0,0,982,unknown Gilliam County,2,unknown,unknown,-99999,unknown,unknown,unknown,...,0,unknown,unknown,2013192,124.09,41-020577,Bing Maps Aerial,unknown,"FAA lists as Dismantle, google temporal analys...",POINT (-120.298786 45.653994)
1,1,1065,unknown Gilliam County,2,unknown,unknown,-99999,unknown,unknown,unknown,...,0,unknown,unknown,2013192,124.09,41-020578,Bing Maps Aerial,unknown,"FAA lists as Dismantle, google temporal analys...",POINT (-120.305986 45.663494)
2,2,1895,Banner Wind Project,2,2008,no,2008,Entegrity,EW50,trestle,...,2,Banner Wind Project,unknown,2009065,38.72,02-020064,Bing Maps Aerial,unknown,"trestle turbines removed, new monopoles instal...",POINT (-165.427476 64.569901)
3,3,1897,Banner Wind Project,2,2008,no,2008,Entegrity,EW50,trestle,...,2,Banner Wind Project,unknown,2009065,38.72,02-020070,Bing Maps Aerial,unknown,"trestle turbines removed, new monopoles instal...",POINT (-165.426376 64.57090100000001)
4,4,2608,unknown Tehachapi Wind Resource Area 1,549,1982_1990,yes,1982,unknown,unknown,monopole,...,2,unknown,unknown,-99999,-99999.0,unknown,NAIP,2012,,POINT (-118.321488 35.059895)


In [14]:
wind_old_geo = gpd.GeoDataFrame(wind_old, 
                           crs = wind.crs, 
                           geometry = wind_old['geometry'])

In [16]:
wind_de = pd.merge(wind, wind_old, on = 'geometry', how = 'inner')

In [17]:
wind_de['decommiss'].value_counts()

no    611
Name: decommiss, dtype: int64

In [18]:
wind_de.columns

Index(['case_id', 't_state', 'p_name', 'p_year', 'p_tnum', 'p_cap', 't_manu',
       't_model', 't_cap', 't_hh', 't_rd', 't_rsa', 't_ttlh', 't_conf_atr',
       't_conf_loc', 'xlong', 'ylat', 'geometry', 'FID', 'unique_id',
       'site_name', 'total_turb', 'on_year', 'year_range', 'on_year_s',
       'manufac', 'model', 'type_tower', 'decommiss', 'MW_turbine',
       'total_cpcy', 'total_ht', 'tower_h', 'blade_l', 'rotor_dia',
       'rotor_s_a', 'lat_DD', 'long_DD', 'state', 'county', 'conf_attr',
       'conf_loc', 'WENDI_name', 'EIA_name', 'FAA_jdate', 'FAA_AGL', 'FAA_ORS',
       'image_name', 'image_year', 'comments'],
      dtype='object')

In [40]:
wind_old_geo.columns

Index(['FID', 'unique_id', 'site_name', 'total_turb', 'on_year', 'year_range',
       'on_year_s', 'manufac', 'model', 'type_tower', 'decommiss',
       'MW_turbine', 'total_cpcy', 'total_ht', 'tower_h', 'blade_l',
       'rotor_dia', 'rotor_s_a', 'lat_DD', 'long_DD', 'state', 'county',
       'conf_attr', 'conf_loc', 'WENDI_name', 'EIA_name', 'FAA_jdate',
       'FAA_AGL', 'FAA_ORS', 'image_name', 'image_year', 'comments',
       'geometry'],
      dtype='object')

In [21]:
wind.groupby('p_year').t_hh.mean()

p_year
1981.0          NaN
1982.0          NaN
1983.0    22.800000
1984.0    24.000000
1985.0    24.386341
1986.0    25.285714
1987.0          NaN
1988.0    23.000000
1989.0          NaN
1990.0    30.000000
1991.0          NaN
1992.0    41.500000
1994.0    40.000000
1995.0    39.651163
1996.0    50.000000
1997.0    41.647059
1998.0    52.080645
1999.0    57.633149
2000.0    57.180556
2001.0    58.414576
2002.0    62.699782
2003.0    69.234783
2004.0    65.421779
2005.0    75.148028
2006.0    75.845436
2007.0    78.132560
2008.0    78.195780
2009.0    78.621082
2010.0    79.325188
2011.0    80.877153
2012.0    83.711371
2013.0    80.418803
2014.0    82.666133
2015.0    82.456762
2016.0    82.940776
2017.0    86.010600
2018.0    88.251646
2019.0    90.078615
2020.0    90.112127
Name: t_hh, dtype: float64

In [22]:
wind.groupby('p_year').t_hh.max()

p_year
1981.0      NaN
1982.0      NaN
1983.0     22.8
1984.0     24.0
1985.0     24.6
1986.0     60.0
1987.0      NaN
1988.0     23.0
1989.0      NaN
1990.0     30.0
1991.0      NaN
1992.0     43.0
1994.0     40.0
1995.0     40.0
1996.0     50.0
1997.0     50.0
1998.0     65.0
1999.0     80.0
2000.0     67.0
2001.0     80.0
2002.0     80.0
2003.0     80.0
2004.0     80.0
2005.0     85.0
2006.0     80.0
2007.0    105.0
2008.0    100.0
2009.0     80.0
2010.0    100.0
2011.0    100.0
2012.0    100.0
2013.0    103.0
2014.0    100.0
2015.0    100.0
2016.0    116.5
2017.0     95.0
2018.0    130.0
2019.0    131.0
2020.0    112.0
Name: t_hh, dtype: float64

In [23]:
hub_height_year = wind.groupby(['p_year', 't_manu']).t_hh.mean().to_frame().reset_index()

In [24]:
hub_height_year.head(50)

Unnamed: 0,p_year,t_manu,t_hh
0,1982.0,Lolland,
1,1982.0,Micon,
2,1982.0,Vestas,
3,1983.0,Vestas,22.8
4,1984.0,Bonus,24.0
5,1984.0,Micon,
6,1984.0,Nordtank,
7,1985.0,Bonus,24.6
8,1985.0,Micon,
9,1985.0,Nordtank,24.0


In [25]:
turbine_count_year = wind.groupby(['p_year', 't_manu']).case_id.count().to_frame().reset_index()

In [26]:
turbine_count_year.head(50)

Unnamed: 0,p_year,t_manu,case_id
0,1982.0,Lolland,2
1,1982.0,Micon,1
2,1982.0,Vestas,1
3,1983.0,Vestas,432
4,1984.0,Bonus,20
5,1984.0,Micon,1
6,1984.0,Nordtank,134
7,1985.0,Bonus,132
8,1985.0,Micon,108
9,1985.0,Nordtank,73


In [27]:
wind.groupby(['p_year', 't_state']).case_id.count()

p_year  t_state
1981.0  CA          11
1982.0  CA         937
1983.0  CA         432
        OK           1
1984.0  CA         155
                  ... 
2020.0  TX         882
        VA           2
        WA          38
        WV          20
        WY         153
Name: case_id, Length: 502, dtype: int64

In [28]:
wind[wind['p_year'] == 1981.0].shape

(11, 18)

In [29]:
wind_v1 = pd.read_csv('../data/uswtdb_v1_0_20180419.csv')

In [30]:
wind_v1[wind_v1['p_year'] == 1981].shape

(537, 24)

In [31]:
wind_v1_year = wind_v1.p_year.value_counts().to_frame().reset_index().sort_values('index')
wind_v1_year.columns = ['year', 'count_v1']
year_install.columns = ['year', 'count_v3']

In [32]:
old_new = pd.merge(year_install, wind_v1_year, on = 'year', how = 'left')

In [33]:
old_new

Unnamed: 0,year,count_v3,count_v1
0,1981.0,11,537.0
1,1982.0,937,1910.0
2,1983.0,433,979.0
3,1984.0,155,216.0
4,1985.0,714,1467.0
5,1986.0,151,311.0
6,1987.0,213,406.0
7,1988.0,277,282.0
8,1989.0,288,
9,1990.0,327,432.0


In [66]:
wind_2014_by_year =wind_old_geo.groupby(['on_year_s']).unique_id.count().to_frame().reset_index()

In [67]:
wind_2014_by_year.head(50)

Unnamed: 0,on_year_s,unique_id
0,-99999,485
1,1981,3679
2,1982,3635
3,1983,1343
4,1984,668
5,1985,602
6,1986,452
7,1987,264
8,1988,302
9,1989,440


In [57]:
wind_old_geo.tower_h.value_counts()

 80.0       23925
-99999.0    10324
 100.0       1744
 68.0        1500
 50.0        1092
            ...  
 34.0           1
 58.0           1
 120.0          1
 36.7           1
 66.0           1
Name: tower_h, Length: 63, dtype: int64

In [63]:
wind.head(50)

Unnamed: 0,case_id,t_state,p_name,p_year,p_tnum,p_cap,t_manu,t_model,t_cap,t_hh,t_rd,t_rsa,t_ttlh,t_conf_atr,t_conf_loc,xlong,ylat,geometry
0,3005443.0,CA,251 Wind,1987.0,194.0,18.43,Vestas,,95.0,,,,,2.0,3.0,-118.351089,35.091896,POINT (-118.35109 35.09190)
1,3072704.0,CA,251 Wind,1987.0,194.0,18.43,Vestas,,95.0,,,,,2.0,3.0,-118.364197,35.077644,POINT (-118.36420 35.07764)
2,3072695.0,CA,251 Wind,1987.0,194.0,18.43,Vestas,,95.0,,,,,2.0,3.0,-118.36441,35.077435,POINT (-118.36441 35.07744)
3,3072661.0,CA,251 Wind,1987.0,194.0,18.43,Vestas,,95.0,,,,,2.0,3.0,-118.363762,35.077908,POINT (-118.36376 35.07791)
4,3005333.0,CA,251 Wind,1987.0,194.0,18.43,Vestas,,95.0,,,,,2.0,3.0,-118.36869,35.075294,POINT (-118.36869 35.07529)
5,3001247.0,CA,251 Wind,1987.0,194.0,18.43,Vestas,,95.0,,,,,2.0,3.0,-118.368187,35.075794,POINT (-118.36819 35.07579)
6,3009410.0,CA,251 Wind,1987.0,194.0,18.43,Vestas,,95.0,,,,,2.0,3.0,-118.368088,35.075893,POINT (-118.36809 35.07589)
7,3073377.0,CA,251 Wind,1987.0,194.0,18.43,Vestas,,95.0,,,,,2.0,3.0,-118.352165,35.088856,POINT (-118.35216 35.08886)
8,3072663.0,CA,251 Wind,1987.0,194.0,18.43,Vestas,,95.0,,,,,2.0,3.0,-118.368202,35.075695,POINT (-118.36820 35.07570)
9,3072670.0,CA,251 Wind,1987.0,194.0,18.43,Vestas,,95.0,,,,,2.0,3.0,-118.368408,35.075542,POINT (-118.36841 35.07554)
