# Prepare the EDA Environment

In [1]:
import sys
import os
import types
import warnings
import glob
import regex as re
import netCDF4 as nc
import rasterio
import numpy as np
import xarray as xr
import hvplot.xarray
import holoviews as hv
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import geopandas as gpd
import cartopy.crs as ccrs
import cartopy.feature as cf
import ipywidgets as widgets
import matplotlib.ticker as mticker
import matplotlib.dates as mdates
import chardet
import pyperclip
from geopandas import GeoDataFrame
from shapely.geometry import Point
from bokeh.io import output_notebook, show
from bokeh.resources import INLINE
from rasterio.transform import from_origin
from rasterstats import zonal_stats
from shapely.geometry import LineString
from matplotlib.path import Path
from matplotlib.colors import Normalize
from netCDF4 import Dataset
from pyproj import CRS
from IPython.display import display
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
from _windbreaks_helpers import *

In [2]:
warnings.filterwarnings('ignore')
%store -r src_dir
%store -r extent_coords
%store -r ksne_data
# %store -r stormbuf_gdf
%store -r ksne_gdf
# %store -r crop_cvr_gdf
%store -r ksne_se_count_gdf

## Set data source variables

In [3]:
# Path to the directory
directory = 'Data'

# Check if the directory exists
if os.path.isdir(directory):
    src_dir = directory
else:
    src_dir = None

print(src_dir)
%store src_dir

Data
Stored 'src_dir' (str)


## Set the extents of the Area of Interest (AOI)

In [4]:
# Project extents
extent_coords = {'min_lat': 36.9314320, 'max_lat': 43.1345169,
                 'min_lon': -104.2232889, 'max_lon': -94.5426297}
%store extent_coords

Stored 'extent_coords' (dict)


In [5]:
# Use Jupyter magik to list all variables and function loaded in the interactive workspace
%whos

Variable                     Type             Data/Info
-------------------------------------------------------
CRS                          type             <class 'pyproj.crs.crs.CRS'>
Dataset                      type             <class 'netCDF4._netCDF4.Dataset'>
GeoDataFrame                 type             <class 'geopandas.geodataframe.GeoDataFrame'>
INLINE                       Resources        Resources(mode='inline')
LATITUDE_FORMATTER           FuncFormatter    <matplotlib.ticker.FuncFo<...>ct at 0x000001B418313D10>
LONGITUDE_FORMATTER          FuncFormatter    <matplotlib.ticker.FuncFo<...>ct at 0x000001B418CAB7D0>
LineString                   type             <class 'shapely.geometry.linestring.LineString'>
Normalize                    type             <class 'matplotlib.colors.Normalize'>
Path                         type             <class 'matplotlib.path.Path'>
Point                        type             <class 'shapely.geometry.point.Point'>
calendar                

## Load basemaps and boundary files for AOI

# Examine Storm Event Data

## - Examine Storm Event csv's

In [6]:
# prefix = 'StormEvents_KSNE'
# combined_gdf = combine_csv_files(src_dir+'/Storm_event/', prefix)

In [7]:
# combined_gdf.head()

In [8]:
# print_cols(combined_gdf)

In [9]:
# combined_gdf.to_csv(os.path.join(src_dir, 'Storm_event/StormEvents_all.csv'))

In [16]:
ksnesdnd_se_data = pd.read_csv(os.path.join(src_dir, 'Storm_event/ksnesdnd_se_count_mag_ind_by_fips_year.csv'))
%store ksnesdnd_se_data

Stored 'ksnesdnd_se_data' (DataFrame)


In [17]:
ksnesdnd_se_data.head()

Unnamed: 0,Yr_Num,Mo_Num,Month_Name,FIPS,Count_StormEvents,Sum_Magnitude,Avg_Magnitude,Max_Magnitude,Magnitude_Rel_Se,Sum_Indemnity
0,2019,1,,20005,1.0,0.0,0.0,0.0,,1803.0
1,2019,1,,20007,2.0,0.0,0.0,0.0,,4846.5
2,2019,1,,20011,1.0,52.0,52.0,52.0,52.0,39534.0
3,2019,1,,20013,1.0,0.0,0.0,0.0,,2622.0
4,2019,1,,20015,1.0,0.0,0.0,0.0,,605.0


In [18]:
ksnesdnd_se_data['Month_Name'] = ksnesdnd_se_data['Mo_Num'].apply(month_number_to_name)
ksnesdnd_se_data.head()

Unnamed: 0,Yr_Num,Mo_Num,Month_Name,FIPS,Count_StormEvents,Sum_Magnitude,Avg_Magnitude,Max_Magnitude,Magnitude_Rel_Se,Sum_Indemnity
0,2019,1,January,20005,1.0,0.0,0.0,0.0,,1803.0
1,2019,1,January,20007,2.0,0.0,0.0,0.0,,4846.5
2,2019,1,January,20011,1.0,52.0,52.0,52.0,52.0,39534.0
3,2019,1,January,20013,1.0,0.0,0.0,0.0,,2622.0
4,2019,1,January,20015,1.0,0.0,0.0,0.0,,605.0


In [19]:
print_cols(ksnesdnd_se_data)

Index: 0, Column Name: Yr_Num, Data Type: int64
Index: 1, Column Name: Mo_Num, Data Type: int64
Index: 2, Column Name: Month_Name, Data Type: object
Index: 3, Column Name: FIPS, Data Type: int64
Index: 4, Column Name: Count_StormEvents, Data Type: float64
Index: 5, Column Name: Sum_Magnitude, Data Type: float64
Index: 6, Column Name: Avg_Magnitude, Data Type: float64
Index: 7, Column Name: Max_Magnitude, Data Type: float64
Index: 8, Column Name: Magnitude_Rel_Se, Data Type: float64
Index: 9, Column Name: Sum_Indemnity, Data Type: float64



In [20]:
# ksnesdnd_se_data.drop(columns=['geometry', 'STATE_FIPS'])
# # Rename long-name fields to <= 10 character for shp file constraints
# ksnesdnd_se_data = ksnesdnd_se_data.rename(columns={'BEGIN_YEARMONTH': 'YRMO', 'MONTH_NAME': 'MO_NAME', 'COUNTY_NAME': 'CO_NAME', 'MAGNITUDE': 'MAG', 'MAGNITUDE_TYPE':'MAG_TYPE', 'EPISODE_NARRATIVE':'EP_NARR', 'EVENT_NARRATIVE':'EVENT_NARR'})
# print_cols(ksnesdnd_se_data)

In [21]:
# cp1_ksnesdnd_se_data = ksnesdnd_se_data
# # st_abb dictionary
# state_abb = {'KANSAS': 'KS', 'NEBRASKA': 'NE'}
# 
# ksnesdnd_se_data['ST_ABB'] = ksnesdnd_se_data['STATE'].map(state_abb).astype('object')
# ksnesdnd_se_data.head()

In [22]:
print_cols(ksnesdnd_se_data)

Index: 0, Column Name: Yr_Num, Data Type: int64
Index: 1, Column Name: Mo_Num, Data Type: int64
Index: 2, Column Name: Month_Name, Data Type: object
Index: 3, Column Name: FIPS, Data Type: int64
Index: 4, Column Name: Count_StormEvents, Data Type: float64
Index: 5, Column Name: Sum_Magnitude, Data Type: float64
Index: 6, Column Name: Avg_Magnitude, Data Type: float64
Index: 7, Column Name: Max_Magnitude, Data Type: float64
Index: 8, Column Name: Magnitude_Rel_Se, Data Type: float64
Index: 9, Column Name: Sum_Indemnity, Data Type: float64



In [23]:
# # List of columns in the desired order
# column_order = (
#         ['YRMO', 'FIPS', 'YEAR', 'MONTH', 'MO_NAME', 'ST_ABB', 'CO_NAME', 'EVENT_ID', 'EVENT_TYPE'] +
#         [
#             c for c in ksnesdnd_se_data.columns
#             if c not in {
#             'YRMO',
#             'FIPS',
#             'YEAR',
#             'MONTH',
#             'MO_NAME',
#             'ST_ABB',
#             'CO_NAME',
#             'EVENT_ID',
#             'EVENT_TYPE'
#         }
#         ]
# )
# # Create a new DataFrame with columns in the desired order
# ksnesdnd_se_data = ksnesdnd_se_data[column_order]
# 
# # Print the columns of the new DataFrame
# print_cols(ksnesdnd_se_data)

In [24]:
%store ksnesdnd_se_data
ksnesdnd_se_data.to_csv(os.path.join(src_dir, 'Storm_event/StormEvents_ksnesdnd_pdmod.csv'))

Stored 'ksnesdnd_se_data' (DataFrame)


In [25]:
ksnesdnd_se_data.head(1000)

Unnamed: 0,Yr_Num,Mo_Num,Month_Name,FIPS,Count_StormEvents,Sum_Magnitude,Avg_Magnitude,Max_Magnitude,Magnitude_Rel_Se,Sum_Indemnity
0,2019,1,January,20005,1.0,0.00,0.000000,0.0,,1803.00
1,2019,1,January,20007,2.0,0.00,0.000000,0.0,,4846.50
2,2019,1,January,20011,1.0,52.00,52.000000,52.0,52.000000,39534.00
3,2019,1,January,20013,1.0,0.00,0.000000,0.0,,2622.00
4,2019,1,January,20015,1.0,0.00,0.000000,0.0,,605.00
...,...,...,...,...,...,...,...,...,...,...
995,2019,5,May,31149,,,,,,938514.00
996,2019,5,May,31151,,,,,,107599.25
997,2019,5,May,31153,6.0,171.25,34.250000,65.0,34.250000,710210.29
998,2019,5,May,31155,9.0,131.00,18.714286,73.0,18.714286,671666.27


In [None]:
# source = ksnesdnd_se_data
# output = 'Output/stormevents_ksne.shp'

# ksne_gdf = df_gdf_or_csv_to_shp(source, output, extent_coords)

In [None]:
# # Group by 'YEARMO' and FIPS and aggregate
# # ksne_gdf = ksne_gdf.groupby(['YEARMO', 'FIPS']).sum().reset_index()
# 
# min_value = ksne_gdf['YRMO'].min()
# max_value = ksne_gdf['YRMO'].max()
# 
# print('Minimum value in YRMO column:', min_value)
# print('Maximum value in YRMO column:', max_value)


In [None]:

# # Specify the path to the input geodataframe
# input_gdf = ksne_gdf
# # Specify the buffer distance as a float or int
# buf_dist = 3
# # or you can specify a str if you want to buffer by meters or feet instead of kilometers and miles, respectively
# # buf_dist = '3000ft'
# buf_dist_str = str(buf_dist).replace('.', '_')
# # Specify the path to the output shapefile
# output_path = f'Output/storm_line_{buf_dist_str}_buf.shp'
# 
# # Create a buffered GeoDataFrame
# # Note: For Geographic Coordinate Systems, provide buffer distance in kilometers.
# # For State Plane Coordinate Systems, provide buffer distance in miles.
# gdf_buf(input_gdf, buf_dist, output_path)


In [None]:
# # I need crop cover
# # Load the storm line data from a shapefile
# buf_dist_str = '3'
# stormbuf_gdf = gpd.read_file(f'Output/storm_line_{buf_dist_str}_buf.shp')
# # Perform the spatial join (intersection)
# intersect_gdf = gpd.sjoin(crop_cvr_gdf, stormbuf_gdf, how='inner', op='intersects')
# intersect_gdf.head()

In [None]:
# # get column names with their index
# print('\n', 'stormbuf_gdf columns:', '\n')
# print_cols(stormbuf_gdf)

In [None]:
# print('crop_cvr_gdf', ksne_gdf.crs)
# print('ksne_gdf', ksne_gdf.crs)
# print('stormbuf_gdf', stormbuf_gdf.crs)

In [None]:
%whos

In [None]:
# ksne_gdf.head()

In [None]:
# print("Unique Years/Months: ", ksne_gdf['YRMO'].unique())
# print("Unique Counties by FIPS: ", ksne_gdf['FIPS'].unique())


In [None]:
# ksne_se_count = ksne_gdf.groupby(['YRMO', 'FIPS']).size().reset_index(name='EVENT_COUNT')
# # Save the DataFrame to a CSV
# ksne_se_count.to_csv('Output/ksne_se_count.csv', index=False)
# print(ksne_se_count)

In [None]:
ksne_se_count = pd.read_csv('Output/ksne_se_count.csv')

# print_cols(ksne_se_count)

# Store notebook variables for use in other notebooks in the project.

In [None]:
%store src_dir
%store extent_coords
%store ksnesdnd_se_data
# %store stormbuf_gdf
%store ksne_gdf
# %store crop_cvr_gdf
%store ksne_se_count