In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import os

from shapely.geometry import Polygon
from shapely.geometry import Point
import geopy.distance

In [None]:
gsod_2017=pd.read_csv('/home/jnueno/climate_data/gsod2016')
gsod_2015=pd.read_csv('/home/jnueno/climate_data/gsod2015')
gsod_2014=pd.read_csv('/home/jnueno/climate_data/gsod2014')
gsod_2013=pd.read_csv('/home/jnueno/climate_data/gsod2013')
gsod_2012=pd.read_csv('/home/jnueno/climate_data/gsod2012')
gsod_2011=pd.read_csv('/home/jnueno/climate_data/gsod2011')
gsod_2010=pd.read_csv('/home/jnueno/climate_data/gsod2010')
gsod_2009=pd.read_csv('/home/jnueno/climate_data/gsod2009')
gsod_2008=pd.read_csv('/home/jnueno/climate_data/gsod2008')
gsod_2007=pd.read_csv('/home/jnueno/climate_data/gsod2007')

In [86]:
def aggregate_dataset(gsod):
    clean_dataset=gsod[gsod['prcp']!=99.99]
    month_averages=clean_dataset.groupby(['stn', 'wban'])['temp','prcp'].mean().reset_index()
    return month_averages

weather_dict = {}
weather_dict['2016']=aggregate_dataset(gsod_2016)
weather_dict['2015']=aggregate_dataset(gsod_2015)
weather_dict['2014']=aggregate_dataset(gsod_2014)
weather_dict['2013']=aggregate_dataset(gsod_2013)
weather_dict['2012']=aggregate_dataset(gsod_2012)
weather_dict['2011']=aggregate_dataset(gsod_2011)
weather_dict['2010']=aggregate_dataset(gsod_2010)
weather_dict['2009']=aggregate_dataset(gsod_2009)
weather_dict['2008']=aggregate_dataset(gsod_2008)
weather_dict['2007']=aggregate_dataset(gsod_2007)


In [134]:
stationpath='/home/jnueno/Final_Project/Weather data/Station location/'
station_pts=gpd.read_file(stationpath+'station_location.shp',crs={'init':'epsg:4326'})
   
def select_stations_in_range(neigh_shp,station_map,radius=1):
    centroides=neigh_shp.centroid
    centroides.crs = {'init' :'epsg:4326'}
    buffered_centroids= gpd.GeoDataFrame(centroides.buffer(radius)).rename(columns={0:'geometry'}).set_geometry('geometry')

    buffered_centroids['idx']=1
    buffer_master = buffered_centroids.dissolve(by='idx')
    buffer_master.crs = {'init' :'epsg:4326'}


    matches_index=list(station_map.within(buffer_master.geometry.iloc[0]))

    within_range_stations=station_pts.iloc[matches_index]
    return within_range_stations

def merge_stations(in_range_stations,month_averages):
    
    in_range_stations.loc[:,'station_id']=in_range_stations['USAF'].str.rjust(6,'0')+'-'+in_range_stations['WBAN'].astype(str).str.rjust(5,'0')
    month_averages.loc[:,'station_id']=month_averages['stn'].astype(str).str.rjust(6,'0')+'-'+month_averages['wban'].astype(str).str.rjust(5,'0')
    new_df = in_range_stations.merge(month_averages, how='inner', on='station_id')
    return new_df


def generate_grid_weather_averages_latlong(weather_neigh,gsod,yr):

    #weather_neigh=neigh_shp
    
    stations_range=select_stations_in_range(weather_neigh,station_pts)
    
    weather_db=merge_stations(stations_range,gsod)
    
    weather_neigh["temp"+str(yr)]=0.0000
    weather_neigh["prcp"+str(yr)]=0.0000
    
    neigh_latlong=pd.DataFrame(weather_neigh["geometry"].centroid.x)
    neigh_latlong['y']=pd.DataFrame(weather_neigh["geometry"].centroid.y)
    neigh_latlong.columns=['n_x','n_y']
    neigh_latlong.reset_index(drop=True,inplace=True)

    station_latlong=pd.DataFrame(weather_db["geometry"].x)
    station_latlong['y']=pd.DataFrame(weather_db["geometry"].y)
    station_latlong['temp']=pd.DataFrame(weather_db["temp"])
    station_latlong['prcp']=pd.DataFrame(weather_db["prcp"])
    station_latlong.columns=['s_x','s_y','temp','prcp']    
    station_latlong.reset_index(drop=True,inplace=True)
    
    
    tmp_list=[]
    prc_list=[]

    for i, row in neigh_latlong.iterrows():
        coords_n = (row[1], row[0])

        accumulator=0
        TEMP_NUM=0.000
        PRCP_NUM=0.000
        dist_vec=pd.concat([pd.DataFrame({'distance': [geopy.distance.vincenty(coords_n, (item[1],item[0])).km]}) for idxs, item in station_latlong.iterrows()], ignore_index=True)
        max_dist=dist_vec['distance'].max()
        station_latlong['DIST']=dist_vec['distance']
        
        for j,stat in station_latlong.iterrows():
            
            TEMP_NUM+=stat[2]*(2-stat[4]/max_dist)
            PRCP_NUM+=stat[3]*(2-stat[4]/max_dist)

            accumulator+=(2-stat[4]/max_dist)

            

 
        tmp_list.append(float(TEMP_NUM/accumulator))
        prc_list.append(float(PRCP_NUM/accumulator))

    newseriesT = pd.Series( (v for v in tmp_list) )
    newseriesP = pd.Series( (v for v in prc_list) )

    weather_neigh["temp"+str(yr)]=newseriesT.values
    weather_neigh["prcp"+str(yr)]=newseriesP.values

    return weather_neigh

def update_and_save(shp):
    
    #shp=row['fire_id']
    
    geodf=gpd.read_file(grid_fire_dir+shp+'.shp')
    
    for key, df in weather_dict.items():
        generate_grid_weather_averages_latlong(geodf,df,key)
        
    geodf.to_file(driver='ESRI Shapefile',filename=grid_fire_dir+shp+'.shp')
    cols=geodf.columns
    flatdf=pd.DataFrame(geodf.values)
    flatdf.columns=cols
    flatdf.to_csv(flat_data_dir+shp+'.csv')

        
        


In [167]:
lkup_dir='/home/jnueno/Final_Project/Model/Lookups/'
masterpath='/home/jnueno/Final_Project/HexGrid US/'
grid_fire_dir='/home/jupyter/data/perimeters/'
flat_data_dir='/home/jupyter/data/csv/'
shp_path='/home/jnueno/Final_Project/HexGrid US/Fires/Shp/'

fire_lkup = pd.read_excel(lkup_dir+"fire_file_lkup.xlsx")
idx = fire_lkup.groupby(['fire_id'])['observation date'].transform(max) == fire_lkup['observation date']
last_day=fire_lkup[idx]
idx2 = last_day.groupby(['fire_id'])['observation time'].transform(max) == last_day['observation time']
final_perimeter=last_day[idx2]

#import glob

#final_perimeter=final_perimeter.head(760)
#allFiles = glob.glob(flat_data_dir + "*.csv")

#all_files_clean = {x.replace('/home/jupyter/data/csv/','').replace('.csv', '') for x in allFiles}
#final_perimeter=final_perimeter[~final_perimeter['fire_id'].isin(all_files_clean)]
#print(len(allFiles))
#print(final_perimeter.shape)
#print(np.array_split(final_perimeter.sample(frac=1).reset_index(drop=True), 15))

703
(54, 12)


In [168]:
import warnings
warnings.filterwarnings('ignore')

import multiprocessing

def func(d):
    for i,row in d.iterrows():
        try:
            update_and_save(row['fire_id'])
        except:
            pass

def parallelize_dataframe(df, func):
    num_cores = multiprocessing.cpu_count()-1 
    print(num_cores)
    num_partitions = num_cores 
    df_split = np.array_split(df.sample(frac=1).reset_index(drop=True), num_partitions)
    pool = multiprocessing.Pool(num_cores)
    pool.map(func, df_split)
    pool.close()

    return df

parallelize_dataframe(final_perimeter,func)

15


Unnamed: 0,fire_num_id,file name,fire_id,observations,perimeter timestamp,observation date,observation time,raw t,t,final_id,state,state.1
5,6,AROUF-000119 Railroad Ridge 11-09-2017 0000.gpx,AROUF-000119,1,11-09-2017 0000,11-09-2017,0,1.0,0.0,AROUF_000119_t0,AR,Arkansas
8,9,AROUF-000458 Glory Mountain 11-09-2017 0000.gpx,AROUF-000458,1,11-09-2017 0000,11-09-2017,0,1.0,0.0,AROUF_000458_t0,AR,Arkansas
10,11,AROUF-000619 Lenox 11-09-2017 0000.gpx,AROUF-000619,1,11-09-2017 0000,11-09-2017,0,1.0,0.0,AROUF_000619_t0,AR,Arkansas
11,12,AROUF-000630 Glover 06-19-2017 0000.gpx,AROUF-000630,1,06-19-2017 0000,06-19-2017,0,1.0,0.0,AROUF_000630_t0,AR,Arkansas
14,15,AROUF-000809 Holly 11-09-2017 0000.gpx,AROUF-000809,1,11-09-2017 0000,11-09-2017,0,1.0,0.0,AROUF_000809_t0,AR,Arkansas
19,20,AROUF-000994 Mountain Fork 11-09-2017 0000.gpx,AROUF-000994,1,11-09-2017 0000,11-09-2017,0,1.0,0.0,AROUF_000994_t0,AR,Arkansas
28,29,AROUF-001071 Choctaw Road 11-30-2017 0800.gpx,AROUF-001071,1,11-30-2017 0800,11-30-2017,8,1.0,0.0,AROUF_001071_t0,AR,Arkansas
29,30,AROUF-001115 Dry Camp 11-30-2017 0730.gpx,AROUF-001115,1,11-30-2017 0730,11-30-2017,7,1.0,0.0,AROUF_001115_t0,AR,Arkansas
35,36,AROUF-001291 Mudline Road 12-21-2017 0000.gpx,AROUF-001291,1,12-21-2017 0000,12-21-2017,0,1.0,0.0,AROUF_001291_t0,AR,Arkansas
36,37,AROUF-001297 Sandy 12-21-2017 0000.gpx,AROUF-001297,1,12-21-2017 0000,12-21-2017,0,1.0,0.0,AROUF_001297_t0,AR,Arkansas


In [149]:
#CAINF-001264???
pool.close()