In [53]:
import pandas as pd
import numpy as np
import geopandas as gpd
import os

from shapely.geometry import Polygon
from shapely.geometry import Point
import geopy.distance
import glob

In [58]:
lkup_dir='/home/jnueno/Final_Project/Model/Lookups/'

fire_lkup = pd.read_excel(lkup_dir+"fire_file_lkup.xlsx")

shp_path='/home/jnueno/Final_Project/HexGrid US/Fires/Shp/'

clim_data_dir='/home/jupyter/NeuralNetwork/Spot_climate/'
grid_fire_dir='/home/jupyter/data/perimeters/'

In [59]:

gsod_2017=pd.read_csv('/home/jnueno/climate_data/gsod2017')

allFiles = glob.glob('/home/jupyter/data/csv_nn/' + "*.csv")
allFiles = pd.Series((v.replace('/home/jupyter/data/csv_nn/','').replace('.csv','') for v in allFiles) ).values

california_fires=fire_lkup[fire_lkup['final_id'].isin(allFiles)]

idx = california_fires.groupby(['fire_id'])['observation date'].transform(max) == california_fires['observation date']
last_day=california_fires[idx]
idx2 = last_day.groupby(['fire_id'])['observation time'].transform(max) == last_day['observation time']
final_perimeter=last_day[idx2]

final_perimeter_merge=final_perimeter[['fire_id','observation date']]
final_perimeter_merge.columns=['fire_id','final date']

idx = california_fires.groupby(['fire_id'])['observation date'].transform(min) == california_fires['observation date']
first_day=california_fires[idx]
idx2 = first_day.groupby(['fire_id'])['observation time'].transform(min) == first_day['observation time']
initial_perimeter=first_day[idx2]

initial_perimeter_merge=initial_perimeter[['fire_id','observation date']]
initial_perimeter_merge.columns=['fire_id','initial date']

fire_interval=initial_perimeter_merge.merge(final_perimeter_merge, on='fire_id')

fire_interval['initial_md'] = pd.DatetimeIndex(fire_interval['initial date']).month*100+pd.DatetimeIndex(fire_interval['initial date']).day
fire_interval['final_md'] = pd.DatetimeIndex(fire_interval['final date']).month*100+pd.DatetimeIndex(fire_interval['final date']).day

#fire_interval.head()



In [56]:
stationpath='/home/jnueno/Final_Project/Weather data/Station location/'
station_pts=gpd.read_file(stationpath+'station_location.shp',crs={'init':'epsg:4326'})

def aggregate_dataset(fire_id):
    
    interval=fire_interval[fire_interval['fire_id']==fire_id]
    
    interval_dataset=gsod_2017[gsod_2017['prcp']!=99.99]
    interval_dataset=interval_dataset[interval_dataset['wdsp']!=999.99]
    
    interval_dataset['moda']=interval_dataset['mo']*100+interval_dataset['da']
    
    interval_dataset=interval_dataset[interval_dataset['moda']<=interval['final_md'].iloc[0]]
    
    interval_dataset=interval_dataset[interval_dataset['moda']>=interval['initial_md'].iloc[0]]
                                      
    month_averages=interval_dataset.groupby(['stn', 'wban'])['temp','prcp','wdsp'].mean().reset_index()
    
    #print(month_averages.head(10))
    return month_averages

#aaa=aggregate_dataset('CABDF-009443')

def select_stations_in_range(neigh_shp,station_map,radius=1):
    centroides=neigh_shp.centroid
    centroides.crs = {'init' :'epsg:4326'}
    buffered_centroids= gpd.GeoDataFrame(centroides.buffer(radius)).rename(columns={0:'geometry'}).set_geometry('geometry')

    buffered_centroids['idx']=1
    buffer_master = buffered_centroids.dissolve(by='idx')
    buffer_master.crs = {'init' :'epsg:4326'}


    matches_index=list(station_map.within(buffer_master.geometry.iloc[0]))

    within_range_stations=station_pts.iloc[matches_index]
    return within_range_stations

def merge_stations(in_range_stations,month_averages):
    
    in_range_stations.loc[:,'station_id']=in_range_stations['USAF'].str.rjust(6,'0')+'-'+in_range_stations['WBAN'].astype(str).str.rjust(5,'0')
    month_averages.loc[:,'station_id']=month_averages['stn'].astype(str).str.rjust(6,'0')+'-'+month_averages['wban'].astype(str).str.rjust(5,'0')
    new_df = in_range_stations.merge(month_averages, how='inner', on='station_id')
    return new_df


def generate_grid_weather_averages_latlong(weather_neigh,fire_id):
    
    stations_range=select_stations_in_range(weather_neigh,station_pts)
    
    spot_weather=aggregate_dataset(fire_id)
    weather_db=merge_stations(stations_range,spot_weather)
    
    weather_neigh["temp"]=0.0000
    weather_neigh["prcp"]=0.0000
    weather_neigh["wdsp"]=0.0000
    
    neigh_latlong=pd.DataFrame(weather_neigh["geometry"].centroid.x)
    neigh_latlong['y']=pd.DataFrame(weather_neigh["geometry"].centroid.y)
    neigh_latlong.columns=['n_x','n_y']
    neigh_latlong.reset_index(drop=True,inplace=True)

    station_latlong=pd.DataFrame(weather_db["geometry"].x)
    station_latlong['y']=pd.DataFrame(weather_db["geometry"].y)
    station_latlong['temp']=pd.DataFrame(weather_db["temp"])
    station_latlong['prcp']=pd.DataFrame(weather_db["prcp"])
    station_latlong['wdsp']=pd.DataFrame(weather_db["wdsp"])
    station_latlong.columns=['s_x','s_y','temp','prcp', 'wdsp']    
    station_latlong.reset_index(drop=True,inplace=True)
    
    
    tmp_list=[]
    prc_list=[]
    wds_list=[]

    for i, row in neigh_latlong.iterrows():
        coords_n = (row[1], row[0])

        accumulator=0
        TEMP_NUM=0.000
        PRCP_NUM=0.000
        WDSP_NUM=0.000
        dist_vec=pd.concat([pd.DataFrame({'distance': [geopy.distance.vincenty(coords_n, (item[1],item[0])).km]}) for idxs, item in station_latlong.iterrows()], ignore_index=True)
        max_dist=dist_vec['distance'].max()
        station_latlong['DIST']=dist_vec['distance']
        
        for j,stat in station_latlong.iterrows():
            
            TEMP_NUM+=stat[2]*(2-stat[5]/max_dist)
            PRCP_NUM+=stat[3]*(2-stat[5]/max_dist)
            WDSP_NUM+=stat[4]*(2-stat[5]/max_dist)

            accumulator+=(2-stat[5]/max_dist)


 
        tmp_list.append(float(TEMP_NUM/accumulator))
        prc_list.append(float(PRCP_NUM/accumulator))
        wds_list.append(float(WDSP_NUM/accumulator))

    newseriesT = pd.Series( (v for v in tmp_list) )
    newseriesP = pd.Series( (v for v in prc_list) )
    newseriesW = pd.Series( (v for v in wds_list) )

    weather_neigh["temp"]=newseriesT.values
    weather_neigh["prcp"]=newseriesP.values
    weather_neigh["wdsp"]=newseriesW.values
    

    return weather_neigh

def update_and_save(shp):
    
    geodf=gpd.read_file(grid_fire_dir+shp+'.shp')
    
    generate_grid_weather_averages_latlong(geodf,shp)

    cols=geodf.columns
    flatdf=pd.DataFrame(geodf.values)
    flatdf.columns=cols
    flatdf[['us_gid_to_','temp','prcp','wdsp']].to_csv(clim_data_dir+shp+'.csv')

        
        


In [57]:
import warnings
warnings.filterwarnings('ignore')

import multiprocessing

def func(d):
    for i,row in d.iterrows():
        try:
            update_and_save(row['fire_id'])
        except:
            pass



def parallelize_dataframe(df, func):
    num_cores = multiprocessing.cpu_count()-1 
    print(num_cores)
    num_partitions = num_cores 
    df_split = np.array_split(df.sample(frac=1).reset_index(drop=True), num_partitions)
    pool = multiprocessing.Pool(num_cores)
    pool.map(func, df_split)
    pool.close()

    return df

parallelize_dataframe(final_perimeter,func)

15


Unnamed: 0,fire_num_id,file name,fire_id,observations,perimeter timestamp,observation date,observation time,raw t,t,final_id,state,state.1
442,443,CAAEU-021638 RANCH 07-31-2017 1159.gpx,CAAEU-021638,1,07-31-2017 1159,07-31-2017,11,1.0,0.0,CAAEU_021638_t0,CA,California
443,444,CAAEU-021652 Latrobe Incident 07-31-2017 1158.gpx,CAAEU-021652,1,07-31-2017 1158,07-31-2017,11,1.0,0.0,CAAEU_021652_t0,CA,California
447,448,CAANF-002273 LAKE 06-20-2017 0000.gpx,CAANF-002273,4,06-20-2017 0000,06-20-2017,00,23.0,42.0,CAANF_002273_t42,CA,California
453,454,CAANF-004552 WILSON 10-22-2017 1953.gpx,CAANF-004552,6,10-22-2017 1953,10-22-2017,19,43.0,139.0,CAANF_004552_t139,CA,California
463,464,CABDF-009443 HOLCOMB 06-25-2017 0251.gpx,CABDF-009443,10,06-25-2017 0251,06-25-2017,02,6.0,126.0,CABDF_009443_t126,CA,California
465,466,CABDF-009802 MART 06-28-2017 0054.gpx,CABDF-009802,2,06-28-2017 0054,06-28-2017,00,1.0,1.0,CABDF_009802_t1,CA,California
467,468,CABDU-010918 BRYANT 2 08-04-2017 1400.gpx,CABDU-010918,1,08-04-2017 1400,08-04-2017,14,1.0,0.0,CABDU_010918_t0,CA,California
476,477,CABTU-010975 WALL 07-14-2017 0052.gpx,CABTU-010975,9,07-14-2017 0052,07-14-2017,00,25.0,143.0,CABTU_010975_t143,CA,California
487,488,CABTU-013737 PONDEROSA 09-06-2017 0130.gpx,CABTU-013737,11,09-06-2017 0130,09-06-2017,01,24.0,148.0,CABTU_013737_t148,CA,California
488,489,CABTU-014487 DIXIE 09-14-2017 1600.gpx,CABTU-014487,1,09-14-2017 1600,09-14-2017,16,1.0,0.0,CABTU_014487_t0,CA,California
