## Utility Functions

In [2]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn import tree
from sklearn.metrics import precision_recall_fscore_support
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler
# from geopy.geocoders import Nominatim
import geopandas as gpd
import fiona
# from shapely.geometry import Point, Polygon
# import plotly.express as px
# import datetime
# import folium
# from geopy.distance import geodesic
import pathlib
import urllib
import seaborn as sns
import statsmodels.api as sm
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression



In [3]:
def get_quarter_from_month(month, season):
    year = str(season)[-2:]
    if month in list(range(1, 3 + 1)):
        result = f'1Q{year}'
    elif month in list(range(4, 6 + 1)):
        result = f'2Q{year}'
    elif month in list(range(7, 9 + 1)):
        result = f'3Q{year}'
    else:
        result = f'4Q{year}'
    return result

def get_quarter_from_date(date):
    season = date.year  
    month = date.month
    year = str(season)[-2:]
    if month in list(range(1, 3 + 1)):
        result = f'1Q{year}'
    elif month in list(range(4, 6 + 1)):
        result = f'2Q{year}'
    elif month in list(range(7, 9 + 1)):
        result = f'3Q{year}'
    else:
        result = f'4Q{year}'
    return result

def is_exposed(distance, radius_km):
    distance = float(distance)
    radius_km = float(radius_km)
    if distance <= radius_km:
        return "High Exposure"
    elif distance - radius_km < 32:
        return "Risk"
    elif distance - radius_km < 80:
        return "Low Exposure"
    else:
        return "No Exposure"

def build_coastline_map(resolution='l'):
    plt.ioff()
    m = Basemap(projection='robin', lon_0=0, resolution=resolution)
    coast = m.drawcoastlines()
    coordinates = np.vstack(coast.get_segments())
    lons, lats = m(coordinates[:,0], coordinates[:,1], inverse=True)
    return lons, lats 

def distance_from_coast(lon, lat, degree_in_km=111.12):
    dists = np.sqrt((lons - lon)**2 + (lats - lat)**2)
    return np.min(dists) * degree_in_km

def build_address(street, city, state, zip_code):
    return f'{street}, {city}, {state} {zip_code}'

def haversine(lon1, lat1, lon2, lat2):
    lon1, lat1, lon2, lat2 = np.radians([lon1, lat1, lon2, lat2])
    dlon = lon2 - lon1
    dlat = lat2 - lat1

    haver_formula = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2

    r = 6371 # 6371 for distance in KM for miles use 3958.756
    dist = 2 * r * np.arcsin(np.sqrt(haver_formula))
    return pd.Series(dist)

def process_address_and_time(df):
    quarters = {'1': '03-31-', '2':'06-30-', '3':'09-30-', '4':'12-31-'}
    df[['Street Address', 'City', 'State', 'Zip']] = df['Address'].str.split(', ', expand=True)
    df['Year'] = df['Quarters'].str[2:]  # extract the two digits after 'Q'
    df['Year'] = np.where(df['Year'].astype(int) > 23, '19' + df['Year'], '20' + df['Year'])
    df['Quarter'] = df['Quarters'].str[:1]
    df['Quarter String'] = [quarters[i] for i in list(df['Quarter'])]
    df['quarter date penultimate'] = df['Quarter String'] + df['Year']
    df['Quarter Date Final'] = pd.to_datetime(df['quarter date penultimate'])
    return df

In [4]:
lons, lats = build_coastline_map()