In [33]:
import pandas as pd
import seaborn as sns
import numpy as np
from numpy import nan

In [34]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('max_seq_item', None)

In [39]:
# DATA SOURCE: INFORMATION AND CONDITION OF SCHOOLS (ICOS), Pre-Disaster Mitigation module
# Data received from WA Office of Superintendent of Public Instruction (OSPI), 8/14/24, via public records request.
# https://www.k12.wa.us/policy-funding/school-buildings-facilities/information-and-condition-schools-icos
df_original=pd.read_csv("data/df_icos_clean_8.14.24.csv")

In [40]:
df=df_original

In [41]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6678 entries, 0 to 6677
Data columns (total 32 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   Unique_Identifier                    6678 non-null   object 
 1   District                             6678 non-null   object 
 2   Site                                 6678 non-null   object 
 3   Site_Latitude                        6678 non-null   float64
 4   Site_Longitude                       6678 non-null   float64
 5   Site_Type                            6678 non-null   object 
 6   Site_EQHazardLevel                   6678 non-null   object 
 7   Site_EQRiskLevel                     6678 non-null   object 
 8   Site_Soil                            6638 non-null   object 
 9   Site_Liquefaction                    6638 non-null   object 
 10  Site_GroundMotionPercentile          6638 non-null   float64
 11  Site_HasGeotechnicalStudy     

In [42]:
# Convert the data type to numbers for selected columns in order to use them for calculations.
df['Building_YearBuilt']=df['Building_YearBuilt'].astype('Int64')
df['BuildingArea_YearBuilt']=df['BuildingArea_YearBuilt'].astype('Int64')
df['Site_Enrollment']=df['Site_Enrollment'].str.replace(',','') # remove commas from the numbers
df['Site_Enrollment']=df['Site_Enrollment'].astype('Int64')

In [43]:
# How many students attended K-12 public schools with no evaluations and no retrofits in the 2023-'24 school year?

In [44]:
# Find the number of pre-1998 buildings have no evaluations or retrofits.
mask = (df['Building_YearBuilt'] < 1998)
df_old = df[mask]

In [45]:
df_old = df_old[['Unique_Identifier', 'Site_Enrollment', 'Building', 'Building_HasEvaluation', \
                      'BuildingArea_HasRetrofit']].drop_duplicates()

In [46]:
mask = (df_old['Building_HasEvaluation'] == 'No')
df_old_evals_no = df_old[mask]

In [47]:
# There are 805 schools with one or more buildings that is built before 1998 and has no seismic evaluations.
df_old_evals_no_2 = df_old_evals_no[['Unique_Identifier', 'Site_Enrollment']].drop_duplicates()
df_old_evals_no_2.shape

(805, 2)

In [48]:
df_old_evals_no_ids = df_old_evals_no[['Unique_Identifier']].drop_duplicates()
df_old_evals_no_ids.shape

(805, 1)

In [49]:
df_old_evals_no_ids = df_old_evals_no_ids['Unique_Identifier'].drop_duplicates()
df_old_evals_no_ids.shape

(805,)

In [50]:
# There are 3,085 rows of data for these 805 schools.
df_test = df[df['Unique_Identifier'].isin(df_old_evals_no_ids)]
df_test.shape

(3085, 32)

In [51]:
mask = (df_test['BuildingArea_HasRetrofit'] == 'Structural')  | \
(df_test['BuildingArea_HasRetrofit'] == 'Non-Structural')
df_test2 = df_test[mask]

In [52]:
df_test3 = df_test2[['Unique_Identifier', 'Building']].drop_duplicates()
df_test3.shape # 35 buildings in these schools have retrofits. 

(35, 2)

In [53]:
enrollment_test = df_test2[['Unique_Identifier', 'Site_Enrollment']].drop_duplicates()
enrollment_test.shape # These 35 buildings are at 25 schools.

(25, 2)

In [54]:
enrollment_test_ids = enrollment_test['Unique_Identifier']
enrollment_test_ids.shape

(25,)

In [55]:
enrollment_test_3 = df_old_evals_no[~df_old_evals_no['Unique_Identifier'].isin\
                                      (enrollment_test_ids)]

In [56]:
enrollment_test_4 = enrollment_test_3[['Unique_Identifier', 'Site_Enrollment']].drop_duplicates()
enrollment_test_4.shape
# There are 780 schools with pre-1998 buildings that have no evaluations or retrofits. 780 + 25 = 805

(780, 2)

In [57]:
p = enrollment_test_4['Site_Enrollment'].sum()
print("There are", "{:,}".format(p), "students who attended schools with one or more pre-1998 \
buildings that have no evaluations and no retrofits in the '23-'24 school year.")
# There are 378,590 students who attended school last year at schools with one or more pre-1998 buildings that 
# have no evaluations and no retrofits.

There are 378,590 students who attended schools with one or more pre-1998 buildings that have no evaluations and no retrofits in the '23-'24 school year.


In [58]:
# How many students attended schools with "high" or "very high" site risks in the '23-'24 school year?

In [59]:
df_risk = df[['Unique_Identifier', 'Site_EQRiskLevel', 'Site_Enrollment']].drop_duplicates()

In [60]:
df_risk['Site_EQRiskLevel'].value_counts()

Site_EQRiskLevel
no value     1201
Low           375
Very High     242
Moderate      121
High           84
Name: count, dtype: int64

In [61]:
mask = (df_risk['Site_EQRiskLevel'] == "High") | \
(df_risk['Site_EQRiskLevel'] == "Very High")
df_high_risk = df_risk[mask]
m = df_high_risk.shape[0]

In [62]:
x = df_high_risk['Site_Enrollment'].sum()
print("There are", "{:,}".format(x), "students who attended", m, "schools with high or very high site risks in \
the '23-'24 school year.")

There are 167,114 students who attended 326 schools with high or very high site risks in the '23-'24 school year.


In [None]:
# Note from OSPI: "Enrollment figures are from the October 1st Report Card snapshot; enrollment figures may differ 
# from Report Card, since the latter is updated closer to the end of the year to account for students that move 
# between schools/grade levels. Enrollment is captured at the school-level that are tied to a site.""