In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
from numpy import nan

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('max_seq_item', None)

In [3]:
# Data received from Office of Superintendent of Public Instruction, Aug. 14, 2024, via public records request
# from the Information and Condition of Schools (ICOS) database, Pre-Disaster Mitigation module.
# https://www.k12.wa.us/policy-funding/school-buildings-facilities/information-and-condition-schools-icos
df_original=pd.read_csv("data/df_icos_clean_8.14.24.csv")

In [4]:
df=df_original

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6678 entries, 0 to 6677
Data columns (total 32 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   Unique_Identifier                    6678 non-null   object 
 1   District                             6678 non-null   object 
 2   Site                                 6678 non-null   object 
 3   Site_Latitude                        6678 non-null   float64
 4   Site_Longitude                       6678 non-null   float64
 5   Site_Type                            6678 non-null   object 
 6   Site_EQHazardLevel                   6678 non-null   object 
 7   Site_EQRiskLevel                     2838 non-null   object 
 8   Site_Soil                            6638 non-null   object 
 9   Site_Liquefaction                    6638 non-null   object 
 10  Site_GroundMotionPercentile          6638 non-null   float64
 11  Site_HasGeotechnicalStudy     

In [6]:
df['BuildingArea_YearBuilt'] = df['BuildingArea_YearBuilt'].astype('Int64')
df['Building_YearBuilt'] = df['Building_YearBuilt'].astype('Int64')

In [7]:
# How many high or very high risk building areas would be deemed low risk with a retrofit?
# Compare the building area risk value to the retrofit risk value for rows that have these values.

In [8]:
# Retrofit risk is defined in the database glossary as: 
# "The calculated building area risk taking into consideration the buildings retrofit"

In [9]:
df['BuildingArea_RetrofitRisk'].value_counts()

BuildingArea_RetrofitRisk
Low          2352
Moderate       85
Very High       2
High            2
Name: count, dtype: int64

In [10]:
df_small = df[['Unique_Identifier', 'Building', 'BuildingArea_Name', 'BuildingArea_ExistingRisk', \
               'BuildingArea_HasRetrofit', 'BuildingArea_RetrofitRisk']]
df_small.shape

(6678, 6)

In [11]:
# Take only the building area rows that have a value of moderate, high or very high for retrofit risk
mask = (df_small['BuildingArea_ExistingRisk'] == 'Moderate') | \
(df_small['BuildingArea_ExistingRisk'] == 'High') | \
(df_small['BuildingArea_ExistingRisk'] == 'Very High')
df_small_2 = df_small[mask]
df_small_2.shape

(1878, 6)

In [12]:
# Take only the rows from those that also have a value for retrofit risk
mask = (df_small_2['BuildingArea_RetrofitRisk'] != 'no value')
df_small_3 = df_small_2[mask]
df_small_3.shape

(1878, 6)

In [13]:
df_small_3['BuildingArea_RetrofitRisk'].value_counts(normalize=True).mul(100).round(1).astype(str) + '%'

BuildingArea_RetrofitRisk
Low          95.1%
Moderate      4.7%
Very High     0.1%
High          0.1%
Name: proportion, dtype: object

In [14]:
df_small_3[['BuildingArea_ExistingRisk', 'BuildingArea_RetrofitRisk']].value_counts()

BuildingArea_ExistingRisk  BuildingArea_RetrofitRisk
Very High                  Low                          827
Moderate                   Low                          477
High                       Low                          411
Very High                  Moderate                      38
High                       Moderate                      32
Moderate                   Moderate                      14
High                       High                           2
                           Very High                      1
Very High                  Very High                      1
Name: count, dtype: int64

In [15]:
# The database calculations show that 95% of building areas with moderate, high or very high risks would be
# brought down to low risk with a retrofit.