In [38]:
import pandas as pd
import seaborn as sns
import numpy as np
from numpy import nan

In [39]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('max_seq_item', None)

In [40]:
# Data received from Office of Superintendent of Public Instruction, Aug. 14, 2024, via public records request
# from the Information and Condition of Schools (ICOS) database, Pre-Disaster Mitigation module.
# https://www.k12.wa.us/policy-funding/school-buildings-facilities/information-and-condition-schools-icos
df_original=pd.read_csv("data/df_icos_clean_8.14.24.csv")

In [41]:
df=df_original

In [42]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6678 entries, 0 to 6677
Data columns (total 32 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   Unique_Identifier                    6678 non-null   object 
 1   District                             6678 non-null   object 
 2   Site                                 6678 non-null   object 
 3   Site_Latitude                        6678 non-null   float64
 4   Site_Longitude                       6678 non-null   float64
 5   Site_Type                            6678 non-null   object 
 6   Site_EQHazardLevel                   6678 non-null   object 
 7   Site_EQRiskLevel                     6678 non-null   object 
 8   Site_Soil                            6638 non-null   object 
 9   Site_Liquefaction                    6638 non-null   object 
 10  Site_GroundMotionPercentile          6638 non-null   float64
 11  Site_HasGeotechnicalStudy     

In [32]:
# Convert the data type to numbers for selected columns in order to use them for calculations.
df['Building_YearBuilt']=df['Building_YearBuilt'].astype('Int64')
df['BuildingArea_YearBuilt']=df['BuildingArea_YearBuilt'].astype('Int64')

In [33]:
# How many buildings that were built prior to 1958, when unreinforced masonry was common, have no construction
# type identified? The year 1958 was chosen because the state Department of Commerce used this year as a cutoff 
# for including buildings in its 2018 URM dashboard.

In [34]:
df_small = df[['Unique_Identifier', 'Building', 'Building_YearBuilt', 'BuildingArea_ConstructionType']]\
                          .drop_duplicates()

In [35]:
mask = (df_small['Building_YearBuilt'] < 1958)
df_pre_1958 = df_small[mask]

In [36]:
# Construction type is provided for building areas, which are portions of total buildings.
# Some buildings have a construction value for some areas but not all.
# Find the total number of buildings in this group and subtract those with a construction type listed for one
# or more building area rows. 

In [37]:
# There are 741 buildings that were built before 1958.
df_pre_1958_buildings = df_pre_1958[['Unique_Identifier', 'Building']].drop_duplicates()
buildings_all = df_pre_1958_buildings.shape[0]
buildings_all

741

In [20]:
# How many pre-1958 buildings have one or more building areas with a construction type listed for any row?
mask = (df_pre_1958['BuildingArea_ConstructionType'] != 'no value')
df_pre_1958_known = df_pre_1958[mask]

In [21]:
# There are 476 pre-1958 buildings that have a construction type listed for one or more rows.
df_pre_1958_known_buildings = df_pre_1958_known[['Unique_Identifier', 'Building']].drop_duplicates()
buildings_info = df_pre_1958_known_buildings.shape[0]
buildings_info

476

In [25]:
# The remaining buildings have no construction type listed for any rows.
no_construction_type = buildings_all - buildings_info

In [26]:
print("There are", no_construction_type, "buildings that were built before 1958 and have no construction type \
listed for any rows.")

There are 265 buildings that were built before 1958 and have no construction type listed for any rows.
