In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
from numpy import nan

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('max_seq_item', None)

In [3]:
# DATA SOURCE 1:
# Data received from Office of Superintendent of Public Instruction, Aug. 14, 2024, via public records request
# from the Information and Condition of Schools (ICOS) database, Pre-Disaster Mitigation module.
# https://www.k12.wa.us/policy-funding/school-buildings-facilities/information-and-condition-schools-icos
df_icos_original=pd.read_csv("data/df_icos_clean_8.14.24.csv")

In [4]:
# Copy the data and work with the copy.
df_icos = df_icos_original

In [5]:
df_icos.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6678 entries, 0 to 6677
Data columns (total 32 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   Unique_Identifier                    6678 non-null   object 
 1   District                             6678 non-null   object 
 2   Site                                 6678 non-null   object 
 3   Site_Latitude                        6678 non-null   float64
 4   Site_Longitude                       6678 non-null   float64
 5   Site_Type                            6678 non-null   object 
 6   Site_EQHazardLevel                   6678 non-null   object 
 7   Site_EQRiskLevel                     2838 non-null   object 
 8   Site_Soil                            6638 non-null   object 
 9   Site_Liquefaction                    6638 non-null   object 
 10  Site_GroundMotionPercentile          6638 non-null   float64
 11  Site_HasGeotechnicalStudy     

In [7]:
# How many schools are located in "high" or "very high" seismic hazard areas?
df_hazard = df_icos[['Unique_Identifier', 'Site_EQHazardLevel']].drop_duplicates()
df_hazard['Site_EQHazardLevel'].value_counts(normalize=True).mul(100).round(1).astype(str) + '%'
# According to ICOS, 28.3% of schools have "high" or "very high" earthquake hazard levels.

Site_EQHazardLevel
Moderate     46.2%
High         27.0%
Low          25.1%
Very High     1.3%
no value      0.4%
Name: proportion, dtype: object

In [9]:
# Compare hazard data from the ICOS dataset to hazard data found in district-level reports also derived from ICOS.

In [10]:
# The ICOS dataset contains one hazard level field called "Site_EQHazardLevel".
# This field is defined in the ICOS glossary as:
# "The highest calculated building area earthquake hazard at the site. A qualitative measure based on the 
# earthquake ground motion percentile. Earthquake hazard is a measure of the frequency and severity of 
# earthquake expected for this site. This is not a measure of earthquake risk. A building built to recent or 
# current building codes in a high earthquake hazard location may have low risk because the building is well 
# designed to resist earthquake forces. Conversely, a building built before earthquake codes were adopted or 
# decades ago when the earthquake provisions were lower than now in a low earthquake hazard location may have 
# relatively high risk because the building may suffer high levels of damage at relatively low levels of 
# earthquake ground shaking." 

# But a "hazards" tab in the dataset documentation shows calculations in which hazard levels are lowered when a 
# school campus has low estimated levels of liquefaction, a soil condition that exacerbates earthquake shaking.

# See the ICOS dataset documentation here:
# https://github.com/efkodon/seismic_school_evaluations/blob/main/icos_documentation.ipynb

In [12]:
# District-level reports generated from ICOS were obtained for Seattle Public Schools and the Shoreline School 
# District. These reports are called "Building Earthquake EPAT Summary." The reports show two hazard levels: 
# "Earthquake Ground Shaking Hazard Level"; and "Combined Earthquake Hazard Level," which is defined as 
# "Earthquake ground shaking and liquefaction potential."

# The "Site_EQHazardLevel" in the statewide dataset corresponds to the combined hazard level in the EPAT reports, 
# which is lower than the earthquake hazard level in most cases within this sample. 

# EPAT is the state's Earthquake Performance Assessment Tool.
# https://www.eeri.org/images/sesi/EPATGuidance-8-10-2017.pdf

In [14]:
mask = (df_icos['District'] == 'Seattle') | \
(df_icos['District'] == 'Shoreline')
df_icos_seattle_shoreline = df_icos[mask]

In [15]:
df_icos_seattle_shoreline = df_icos_seattle_shoreline[['Unique_Identifier', 'Site_EQHazardLevel', \
                                                      'Site_Liquefaction']].drop_duplicates()

In [16]:
# There is one school in ICOS that is not listed in the Seattle Public Schools data. Remove this school
# from the Seattle ICOS list to merge only those that appear in both datasets for comparison.
mask = (df_icos_seattle_shoreline['Unique_Identifier'] != 'Seattle_Kimball Elementary School (New)')
df_icos_seattle_shoreline = df_icos_seattle_shoreline[mask]

In [17]:
df_icos_seattle_shoreline.shape

(119, 3)

In [20]:
# This data was compiled from EPAT reports provided by Seattle Public Schools.
df_seattle=pd.read_csv("data/hazard_levels_seattle_7.15.24.csv")

In [21]:
df_seattle.shape

(103, 3)

In [22]:
# For Seattle, 100 out of 103 schools had a lower risk result for the combined hazard level than in the 
# earthquake hazard level. The combined level corresponds to the "Site_EQHazardLevel" in the statewide data.
df_seattle[['EPAT_earthquake_hazard_level', 'EPAT_combined_hazard_level']].value_counts()

EPAT_earthquake_hazard_level  EPAT_combined_hazard_level
High                          Moderate                      57
Very High                     High                          43
                              Very High                      2
High                          High                           1
Name: count, dtype: int64

In [23]:
# This data was compiled from EPAT reports received from OSPI for the Shoreline School District.
df_shoreline=pd.read_csv("data/hazard_levels_shoreline_5.8.24.csv")

In [24]:
# For Shoreline, 15 out of 16 schools had a lower risk result for the combined hazard level than for the 
# earthquake hazard level. The combined level corresponds to the "Site_EQHazardLevel" in the statewide data.
df_shoreline[['EPAT_earthquake_hazard_level', 'EPAT_combined_hazard_level']].value_counts()

EPAT_earthquake_hazard_level  EPAT_combined_hazard_level
High                          Moderate                      15
                              High                           1
Name: count, dtype: int64

In [25]:
df_seattle_shoreline = pd.concat([df_shoreline, df_seattle], axis=0)

In [26]:
df_merged_seattle_shoreline = df_icos_seattle_shoreline.merge(df_seattle_shoreline, how = 'left', on = \
                                                              ['Unique_Identifier'])

In [27]:
# rename ICOS column to make the comparison more clear
df_merged_seattle_shoreline = df_merged_seattle_shoreline.rename(columns={'Site_EQHazardLevel': \
                                  'ICOS_hazard_level'})

In [28]:
# reorder columns for easier comparison
df_merged_seattle_shoreline = df_merged_seattle_shoreline[['Unique_Identifier', 'ICOS_hazard_level', \
                    'EPAT_combined_hazard_level', 'EPAT_earthquake_hazard_level', 'Site_Liquefaction']]

In [29]:
df_merged_seattle_shoreline[['ICOS_hazard_level', 'EPAT_earthquake_hazard_level']].value_counts()

ICOS_hazard_level  EPAT_earthquake_hazard_level
Moderate           High                            70
High               Very High                       43
                   High                             2
Very High          Very High                        2
Name: count, dtype: int64

In [30]:
# create a column to see if the earthquake hazard levels match between the data received via public records
# request from OSPI, and the data received from Seattle and Shoreline
df_merged_seattle_shoreline['hazard_level_match'] = df_merged_seattle_shoreline.apply(lambda x: 'Yes' if\
                x['ICOS_hazard_level'] == x['EPAT_earthquake_hazard_level'] else 'No',axis=1)

In [31]:
df_merged_seattle_shoreline['hazard_level_match'].value_counts()

hazard_level_match
No     115
Yes      4
Name: count, dtype: int64

In [32]:
mask = (df_merged_seattle_shoreline['hazard_level_match'] == 'No')
df_merged_seattle_shoreline_unmatched_hazard = df_merged_seattle_shoreline[mask]

In [33]:
# Create a column to see if the earthquake hazard levels in the OSPI public records data match the 
# EPAT_combined_hazard_level.
df_merged_seattle_shoreline['combined_hazard_level_match'] = df_merged_seattle_shoreline.apply(lambda x: 'Yes' if\
                x['ICOS_hazard_level'] == x['EPAT_combined_hazard_level'] else 'No',axis=1)

In [34]:
# Yes, the hazard level given in the public records data from ICOS matches the EPAT_combined_hazard_level
# in the SPS data, which matches the the earthquake hazard level in the SPS data only some of the time.
df_merged_seattle_shoreline['combined_hazard_level_match'].value_counts()

combined_hazard_level_match
Yes    117
No       2
Name: count, dtype: int64

In [35]:
# What is the liquefaction level for the hazard levels that do match? Is there a pattern?
mask = (df_merged_seattle_shoreline['hazard_level_match'] == 'Yes')
df_merged_seattle_shoreline_matched_hazard = df_merged_seattle_shoreline[mask]

In [36]:
# Show liquefaction levels for the hazard levels that matched
df_merged_seattle_shoreline_matched_hazard['Site_Liquefaction'].value_counts()

Site_Liquefaction
Moderate to High    4
Name: count, dtype: int64

In [37]:
# Show liquefaction levels for the hazard levels that did not match
df_merged_seattle_shoreline_unmatched_hazard['Site_Liquefaction'].value_counts()

Site_Liquefaction
Very Low           107
Low to Moderate      4
Very Low to Low      3
Low                  1
Name: count, dtype: int64

In [38]:
# The above confirms that when liquefaction is 'Moderate to High', the EPAT earthquake hazard level equals the EPAT
# combined hazard level, in this sample. When liquefaction is lower, the hazard level is dropped one level.

In [39]:
# Comparing hazard levels in the School Seismic Safety Project phase 1 to ICOS also shows the differences.

In [40]:
import warnings
warnings.filterwarnings("ignore")

In [41]:
# DATA SOURCE 2: 
# School Seismic Safety Project phase 1 engineering reports
# https://fortress.wa.gov/dnr/geologydata/school_seismic_safety/SSSP_2019_Engineering_Vol1_Engineering_Report.pdf

In [None]:
# The SSSP published hazard levels in the phase 1 engineering reports but not in the corresponding tables
# in the phase 2 engineering reports, so only phase 1 is included here.

In [42]:
df_sssp_phase1=pd.read_csv("data/sssp_2019_engineering_reports.csv")

In [43]:
# Create a unique ID column because there are duplicate school names in different districts.
df_sssp_phase1['Unique_Identifier'] = df_sssp_phase1['District Name'].\
       astype(str) +"_"+ df_sssp_phase1['Facility Name'].astype(str)

In [44]:
# Move the unique ID to the first column
df_sssp_phase1 = df_sssp_phase1[ ['Unique_Identifier'] + [ col for col in df_sssp_phase1.columns \
                                                          if col != 'Unique_Identifier' ] ]

In [45]:
df_sssp_phase1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 222 entries, 0 to 221
Data columns (total 28 columns):
 #   Column                                          Non-Null Count  Dtype  
---  ------                                          --------------  -----  
 0   Unique_Identifier                               222 non-null    object 
 1   District Name                                   222 non-null    object 
 2   ICOS Facility ID                                222 non-null    int64  
 3   Facility Name                                   222 non-null    object 
 4   ICOS Building ID                                218 non-null    float64
 5   Building Name                                   222 non-null    object 
 6   Enrollment                                      222 non-null    int64  
 7   Latitude                                        222 non-null    float64
 8   Longitude                                       222 non-null    float64
 9   FEMA Construction Type                     

In [46]:
df_sssp_phase1['Year Built'] = df_sssp_phase1['Year Built'].astype('Int64')

In [47]:
# The following compares hazard levels from the SSSP phase 1 document to the ICOS dataset.

In [48]:
df_sssp_phase1['Earthquake Hazard Level'] = df_sssp_phase1['Earthquake Hazard Level'].fillna('none')

In [49]:
# Make a smaller dataframe of only school-wide hazard levels to compare with hazard levels in ICOS
df_sssp_engineering_small = df_sssp_phase1[['Unique_Identifier', 'Earthquake Hazard Level']].\
         drop_duplicates()

In [50]:
# remove rows with no hazard level with which to compare to ICOS
mask = (df_sssp_engineering_small['Earthquake Hazard Level'] != 'none')
df_sssp_engineering_small=df_sssp_engineering_small[mask]

In [51]:
df_sssp_engineering_small.info()

<class 'pandas.core.frame.DataFrame'>
Index: 102 entries, 0 to 217
Data columns (total 2 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   Unique_Identifier        102 non-null    object
 1   Earthquake Hazard Level  102 non-null    object
dtypes: object(2)
memory usage: 2.4+ KB


In [52]:
# Make a list of just the unique IDs to search for them in ICOS
df_sssp_engineering_ids = df_sssp_engineering_small['Unique_Identifier']
df_sssp_engineering_ids.shape

(102,)

In [53]:
# exclude any schools with newer buildings

In [54]:
df_icos_age = df_icos[['Unique_Identifier', 'Building', 'Building_YearBuilt', 'BuildingArea_YearBuilt']].\
                     drop_duplicates()

In [55]:
df_hazards_age = df_icos_age[df_icos_age['Unique_Identifier'].isin\
                (df_sssp_engineering_ids)]
df_hazards_age.shape

(346, 4)

In [56]:
mask = (df_hazards_age['Building_YearBuilt'] > 2017) | \
(df_hazards_age['BuildingArea_YearBuilt'] > 2017)
df_hazards_age_new = df_hazards_age[mask]
df_hazards_age_new
# Exclude these buildings in the comparison in case the new construction was in a different location than the
# older buildings that were assessed in the SSSP, which could change their ground hazard level. 
# Some of these schools have other older buildings still in the data.

Unnamed: 0,Unique_Identifier,Building,Building_YearBuilt,BuildingArea_YearBuilt
4186,Pateros_Pateros K-12 School,Covered Playshed,2021.0,2021.0
4190,Pateros_Pateros K-12 School,Main Building,1948.0,2022.0
4300,Port Townsend_Port Townsend High School,Main Building,1934.0,2019.0
4303,Port Townsend_Port Townsend High School,Math Science Annex,1928.0,2019.0
5992,Tonasket_Tonasket Elementary School,Tonasket Elementary,1995.0,2021.0
6065,Tumwater_Black Lake Elementary School,Building A,1982.0,2019.0
6518,White Salmon Valley_Columbia High School,Multi-purpose,2020.0,2020.0
6519,White Salmon Valley_Hulan L. Whitson Elementary School,Covered Play,2020.0,2020.0


In [57]:
# pull hazard levels from ICOS to compare with SSSP hazard levels
df_icos_hazard = df_icos[['Unique_Identifier', 'Site_EQHazardLevel']].drop_duplicates()

In [58]:
df_hazards = df_icos_hazard[df_icos_hazard['Unique_Identifier'].isin\
                (df_sssp_engineering_ids)]

In [59]:
df_merged_hazards = df_hazards.merge(df_sssp_engineering_small, how = 'left', on = ['Unique_Identifier'])

In [60]:
df_merged_hazards.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 98 entries, 0 to 97
Data columns (total 3 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   Unique_Identifier        98 non-null     object
 1   Site_EQHazardLevel       98 non-null     object
 2   Earthquake Hazard Level  98 non-null     object
dtypes: object(3)
memory usage: 2.4+ KB


In [61]:
df_merged_hazards.rename(columns={'Site_EQHazardLevel': 'ICOS hazard level', 'Earthquake Hazard Level': \
                                  'SSSP hazard level'}, inplace=True)

In [62]:
df_merged_hazards[['SSSP hazard level', 'ICOS hazard level']].value_counts()
# Out of 98 schools with hazard levels listed in both the ICOS dataset and the School Seismic Safety Project,
# ICOS has lower hazard levels than the SSSP for 69 schools. 

SSSP hazard level  ICOS hazard level
Moderate           Low                  22
Very High          High                 20
High               Moderate             16
                   High                 14
Extremely High     Very High             8
Moderate to High   Moderate              8
                   High                  6
Very High          Moderate              2
Extremely High     High                  1
Very High          Very High             1
Name: count, dtype: int64