In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
from numpy import nan

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('max_seq_item', None)

In [4]:
# DATA SOURCE: INFORMATION AND CONDITION OF SCHOOLS (ICOS), Pre-Disaster Mitigation module
# Data received from WA Office of Superintendent of Public Instruction (OSPI), 8/14/24, via public records request.
# https://www.k12.wa.us/policy-funding/school-buildings-facilities/information-and-condition-schools-icos
df_icos_original=pd.read_csv("data/df_icos_clean_8.14.24.csv")

In [5]:
# Copy the data and work with the copy.
df_icos = df_icos_original

In [6]:
df_icos['BuildingID'] = df_icos['BuildingID'].astype('Int64')
df_icos['Building_YearBuilt'] = df_icos['Building_YearBuilt'].astype('Int64')
df_icos['BuildingArea_YearBuilt'] = df_icos['BuildingArea_YearBuilt'].astype('Int64')

In [7]:
# Create a copy of the original school names before re-formatting, to show what changes were made.
df_icos['original_Site'] = df_icos['Site']

In [219]:
# DATA CLEANING AND MERGING

In [220]:
# ICOS: Seattle 

In [8]:
# Take only the Seattle rows from the statewide ICOS data
mask = (df_icos['District'] == 'Seattle')
df_icos_seattle = df_icos[mask]

In [9]:
df_icos_seattle.info() # There are no shear wave velocities added for any Seattle schools.

<class 'pandas.core.frame.DataFrame'>
Index: 332 entries, 4716 to 5047
Data columns (total 33 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   Unique_Identifier                    332 non-null    object 
 1   District                             332 non-null    object 
 2   Site                                 332 non-null    object 
 3   Site_Latitude                        332 non-null    float64
 4   Site_Longitude                       332 non-null    float64
 5   Site_Type                            332 non-null    object 
 6   Site_EQHazardLevel                   332 non-null    object 
 7   Site_EQRiskLevel                     332 non-null    object 
 8   Site_Soil                            332 non-null    object 
 9   Site_Liquefaction                    332 non-null    object 
 10  Site_GroundMotionPercentile          332 non-null    float64
 11  Site_HasGeotechnicalStudy        

In [10]:
# Drop the district column because all of these schools are in the same district.
df_icos_seattle = df_icos_seattle.drop(columns=['District'])

In [11]:
# How many rows (Building Areas, which are a subset of Buildings) are in ICOS for Seattle district schools?
df_icos_seattle.shape

(332, 32)

In [12]:
# Adjust school names in ICOS to match styling used by the district, so the datasets can be merged by the names.
# Re-format school names to match styles found here: https://www.seattleschools.org/schools/

In [13]:
# ICOS lists many school names with the word "school" at the end of the name, while the district does not include 
# the word "school" in these names. This code erases the last word "school" for the schools where this is true. 
schools = ['Adams Elementary School', 'Arbor Heights Elementary School', \
           'Bailey Gatzert Elementary School', 'Broadview-Thomson K-8 School', 'Bryant Elementary School', \
           'Cascadia Elementary School', 'Catharine Blaine K-8 School', 'Cedar Park Elementary School', \
           'Daniel Bagley Elementary School', 'Decatur Elementary School', 'Dunlap Elementary School', \
           'Emerson Elementary School', 'Fairmount Park Elementary School', 'Gatewood Elementary School', \
           'Genesee Hill Elementary School', 'Graham Hill Elementary School', 'Green Lake Elementary School', \
           'Greenwood Elementary School', 'Hawthorne Elementary School', 'Hazel Wolf K-8 School', \
           'Highland Park Elementary School', 'John Hay Elementary School', 'John Muir Elementary School', \
           'John Rogers Elementary School', 'Lafayette Elementary School', 'Laurelhurst Elementary School', \
           'Lawton Elementary School', 'Leschi Elementary School', 'Lowell Elementary School', \
           'Loyal Heights Elementary School', 'Magnolia Elementary School', 'Maple Elementary School', \
           'McGilvra Elementary School', 'North Beach Elementary School', \
           'Olympic Hills Elementary School', 'Olympic View Elementary School', 'Queen Anne Elementary School', \
           'Rainier View Elementary School', 'Rising Star Elementary School', 'Sacajawea Elementary School', \
           'Sand Point Elementary School', 'Sanislo Elementary School', 'Stevens Elementary School', \
           'View Ridge Elementary School', 'Viewlands Elementary School', 'Wedgwood Elementary School', \
           'West Seattle Elementary School', 'West Woodland Elementary School', 'Whittier Elementary School', ]

for i in schools:
    my_list = df_icos_seattle[df_icos_seattle['Site'] == i]['Site'].str.split(" ")
    my_list = my_list.str[:-1]
    my_list = my_list.str.join(' ')
    df_icos_seattle['Site'] = df_icos_seattle['Site'].replace([i], [my_list])

In [14]:
#df_icos_seattle[df_icos_seattle['original_Site'] == i]['Site'] # check that the above renaming worked

In [15]:
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(["Alki Elementary School"], \
                                                          "Alki Elementary")
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(["Beacon Hill Int'l Elementary School"], \
                                                          "Beacon Hill International Elementary")
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(['B. F. Day Elementary School'], \
                                                          'B.F. Day Elementary')
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(['Frantz Coe Elementary School'], 'Coe Elementary')
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(["Concord Int'l Elementary School"], \
                                                          "Concord International Elementary")
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(['Cooper (Pathfinder) Elementary School'], \
                                                          'Pathfinder K-8')
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(["Dearborn Park Intl'l Elementary School"], \
                                                          "Dearborn Park International Elementary")
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(["Ingraham Int'l High School"], "Ingraham High School")
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(['John Marshall'], \
                                                          'John Marshall (Interim Site)')
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(["John Stanford Int'l Elementary School"], \
                                                          "John Stanford International Elementary")
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(['Kimball Elementary School (New)'], \
                                                          'Kimball Elementary')
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(['Louisa Boren (STEM)'], 'Louisa Boren STEM K-8')
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(['Martin Luther King Elementary School'], \
                                                          'Martin Luther King, Jr. Elementary')
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(['Madrona K-8 School'], 'Madrona Elementary')
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(['Mann (Nova) High School'], 'Nova High School')
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(["McDonald Int'l Elementary School"], \
                                                          "McDonald International Elementary")
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(['Monroe (Salmon Bay K-8)'], 'Salmon Bay K-8')
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(['Montlake Elementary School'], \
                                                          'Montlake Elementary (old building)')
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(['North Queen Anne Elementary School (CCCP)'], \
                                                          'Cascade Parent Partnership')
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(['Orca K-8'], \
                                                          'Whitworth (Orca K-8)')
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(['Seward (TOPS) K-8 School'], 'TOPS K-8')
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(['Schmitz Park (Temp old Genesee Hill ES)'], \
                                                          'Schmitz Park (Interim Site)')
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(['Seattle World School'], \
                                                          'Seattle World School (@T.T. Minor)')
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(['South Shore K-8 School'], 'South Shore PK-8')
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(['Thornton Creek School'], 'Thornton Creek Elementary')
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(['Thurgood Marshall (Colman) Elementary School'], \
                                                          'Thurgood Marshall Elementary')
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(['Orca K-8'], 'Whitworth (Orca K-8)')
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(['Wing Luke Elementary (New)'], 'Wing Luke Elementary')
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(['Northgate Elementary School'], \
                                                          'James Baldwin Elementary')
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(['Queen Anne Gym'], 'Queen Anne Gymnasium (Interagency)')
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(['Schmitz Park (Temp old Genesee Hill ES)'], \
                                                          'Schmitz Park (Interim Site)')
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(['Schmitz Park (Interim site)'], \
                                                          'Schmitz Park (Interim Site)')
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(['TOPS K-8'], 'Seward (TOPS K-8)')
df_icos_seattle['Site'] = df_icos_seattle['Site'].replace(['Van Asselt (Old) Elementary School'], \
                                  'Van Asselt (Interim Site)')

In [16]:
# df_icos_seattle['Site'].drop_duplicates().sort_values() # check that all of the renaming worked

In [17]:
# How many Seattle schools are in ICOS?
df_icos_seattle['Site'].nunique()

104

In [18]:
# There are 104 schools (col 2), but combined with retrofit status (col 29) and de-duplicated, there are 140 rows. 
# This means some schools have more than one value for retrofit status within all rows for that school.
# This could indicate partial retrofits.
df_icos_seattle_retrofit = df_icos_seattle.iloc[:, [2, 29]]
df_icos_seattle_retrofit_2 = df_icos_seattle_retrofit.drop_duplicates()
df_icos_seattle_retrofit_2.shape 

(143, 2)

In [19]:
# DATA SOURCE: Seattle Public Schools. Information updated by the district in July 2024.
# https://www.seattleschools.org/departments/capital-projects-and-planning/facilities-master-plan/seismic-information/

In [22]:
df_seattle_original=pd.read_csv("data/seattle_seismic_upgrades_July_2024.csv")

In [23]:
# Copy the district data and work with the copy.
df_seattle = df_seattle_original

In [24]:
df_seattle.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 108 entries, 0 to 107
Data columns (total 2 columns):
 #   Column                                Non-Null Count  Dtype 
---  ------                                --------------  ----- 
 0   Building/School                       108 non-null    object
 1   Year of Most Recent Seismic Retrofit  108 non-null    object
dtypes: object(2)
memory usage: 1.8+ KB


In [25]:
# rename column
df_seattle = df_seattle.rename(columns= {"Building/School": "school_building"})

In [26]:
df_seattle.loc[75] # this row needs to be deleted because it is just a placeholder for James Baldwin

school_building                         Northgate (Now James Baldwin)
Year of Most Recent Seismic Retrofit               See Baldwin, James
Name: 75, dtype: object

In [27]:
df_seattle.drop([75], axis=0, inplace=True)

In [28]:
df_seattle["Year of Most Recent Seismic Retrofit"]=df_seattle["Year of Most Recent Seismic Retrofit"].\
    str.replace(':',' ')

In [29]:
# Separate into two columns, splitting after the first listed year. 
df_seattle[['sps_upgrade_year', 'sps_upgrade_notes']] = df_seattle\
['Year of Most Recent Seismic Retrofit'].str.split(" ", n=1, expand=True)

In [30]:
df_seattle = df_seattle.drop(columns=\
                             ['Year of Most Recent Seismic Retrofit'])

In [31]:
# Merge with SPS unreinforced masonry list

In [32]:
# DATA SOURCE: Seattle Public Schools URMs list. Information updated by the district in July 2024.
# https://www.seattleschools.org/wp-content/uploads/2024/08/urm_listed_seismic_improvements_updated_July2024.pdf
df_seattle_urms=pd.read_csv("data/seattle_urm_seismic_improvements_July_2024.csv")

In [33]:
# rename columns
df_seattle_urms = df_seattle_urms.rename(columns=\
            {"Seattle Public School building shown on the City of Seattle's Unreinforced Masonry (URM) list": \
             "school_building", "Year of most recent seismic retrofits": "sps_urms_retrofit_yr_2024", \
             "Note": 'sps_urms_note_2024'})

In [34]:
# Add the older version of the district URM list for comparison too.
# DATA SOURCE: Seattle Public Schools URMs list. Information updated by the district in October 2019.
# https://www.seattleschools.org/wp-content/uploads/2021/07/urm_listed_seismic_improvements_updated_oct2019.pdf
df_seattle_urms_2019=pd.read_csv("data/seattle_urm_seismic_improvements_updated_Oct_2019.csv")

In [35]:
# rename columns
df_seattle_urms_2019 = df_seattle_urms_2019.rename(columns=\
            {"Seattle Public School building shown on the City of Seattle's Unreinforced Masonry (URM) List": \
             "school_building", "Year of most recent seismic retrofits": "sps_urms_retrofit_yr_2019", \
             "Note": 'sps_urms_note_2019'})

In [36]:
# Adjust the school names in the 2019 SPS list to merge with the 2024 SPS list
df_seattle_urms_2019['school_building'] = df_seattle_urms_2019['school_building'].replace\
                               (['Bagley (Daniel Bagley)'], 'Bagley, Daniel')
df_seattle_urms_2019['school_building'] = df_seattle_urms_2019['school_building'].replace\
                               (['B.F. Day'], 'Day, B.F.')
df_seattle_urms_2019['school_building'] = df_seattle_urms_2019['school_building'].replace\
                               (['Marshall (John Marshall)'], 'Marshall, John (interim site)')
df_seattle_urms_2019['school_building'] = df_seattle_urms_2019['school_building'].replace\
                               (['Queen Anne ES'], 'Queen Anne ES (aka Old Hay)')

In [37]:
# Merge SPS urms data from 2019 and 2024
df_seattle_urms_merged = df_seattle_urms.merge(df_seattle_urms_2019, how = 'left', on = ['school_building'])

In [38]:
df_seattle_urms_merged['sps_urms_retrofit_yr_2019'] = df_seattle_urms_merged['sps_urms_retrofit_yr_2019']\
    .astype('Int64')

In [39]:
df_seattle_urms_merged.shape

(24, 5)

In [40]:
df_seattle_urms = df_seattle_urms_merged

In [41]:
# add a column to mark these as urms for merging with the rest of the schools
df_seattle_urms['unreinforced_masonry_list'] = 'yes'

In [42]:
df_seattle_urms[['sps_urms_note_2019', 'sps_urms_note_2024']] = df_seattle_urms\
[['sps_urms_note_2019', 'sps_urms_note_2024']].fillna(value='n/a')

In [43]:
df_seattle_urms # Garfield and West Seattle Elementary were listed in 2019 but do not appar below because
# they were no longer listed in 2024

Unnamed: 0,school_building,sps_urms_retrofit_yr_2024,sps_urms_note_2024,sps_urms_retrofit_yr_2019,sps_urms_note_2019,unreinforced_masonry_list
0,"Bagley, Daniel",2020,Major renovation,2020.0,Undergoing modernization,yes
1,Concord,2000,,2000.0,Complete renovation,yes
2,"Day, B.F.",2017,,2017.0,,yes
3,Dunlap,2000,Major renovation,2000.0,Complete renovation,yes
4,"Hughes, E.C. (Roxhill ES)",2018,Major renovation,,,yes
5,Emerson,2001,Major renovation,2001.0,Complete renovation,yes
6,Franklin,2015,,2015.0,,yes
7,Gatewood,2017,,2017.0,,yes
8,Greenwood,2002,,,,yes
9,Laurelhurst,2019,,2019.0,,yes


In [44]:
# Edit the school names in the URM list to match the school names used by the district found earlier
df_seattle_urms['school_building'] = df_seattle_urms['school_building'].replace(['Hughes, E.C. (Roxhill ES)'], \
                                                                      'Hughes, E.C. (Roxhill)')
df_seattle_urms['school_building'] = df_seattle_urms['school_building'].replace(['Marshall, John (interim site)'], \
                                                                      'Marshall, John (Interim site)')
df_seattle_urms['school_building'] = df_seattle_urms['school_building'].replace(['North Queen Anne'], \
                                          'North Queen Anne School (Cascade Parent Partnership)')
df_seattle_urms['school_building'] = df_seattle_urms['school_building'].replace(['Queen Anne ES (aka Old Hay)'], \
                                                                      'Hay (Queen Anne)')
df_seattle_urms['school_building'] = df_seattle_urms['school_building'].replace(['Webster (Licton Springs K-8)'], \
                                                                      'Webster')
df_seattle_urms['school_building'] = df_seattle_urms['school_building'].replace(['West Seattle HS'], \
                                                                      'West Seattle High School')

In [45]:
# Merge Seattle ICOS and district data
df_seattle_merged = df_seattle.merge(df_seattle_urms, how = 'left', on = ['school_building'])

In [46]:
df_seattle_merged['unreinforced_masonry_list'] = df_seattle_merged['unreinforced_masonry_list'].fillna('no')
df_seattle_merged['sps_upgrade_notes'] = df_seattle_merged['sps_upgrade_notes'].fillna('none')

In [47]:
df_seattle_merged.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 107 entries, 0 to 106
Data columns (total 8 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   school_building            107 non-null    object 
 1   sps_upgrade_year           107 non-null    object 
 2   sps_upgrade_notes          107 non-null    object 
 3   sps_urms_retrofit_yr_2024  24 non-null     float64
 4   sps_urms_note_2024         24 non-null     object 
 5   sps_urms_retrofit_yr_2019  19 non-null     Int64  
 6   sps_urms_note_2019         24 non-null     object 
 7   unreinforced_masonry_list  107 non-null    object 
dtypes: Int64(1), float64(1), object(6)
memory usage: 6.9+ KB


In [48]:
df_seattle = df_seattle_merged

In [49]:
# School names are formatted differently within the district website. Re-format school names to match styles
# found here: https://www.seattleschools.org/schools/. Some are adjusted to reflect temporary relocations.

In [50]:
# Create a copy of the original school names before re-formatting, to show what changes were made.
df_seattle['original_school_building'] = df_seattle['school_building']

In [51]:
# This data from the district has some school names with unusual styling as proper names 
# (last name, first name), which is inconsistent with their  usual styling by the district. 
# Split the names at the comma, get rid of the comma, and reorder the words.
schools = [0, 1, 5, 6, 9, 17, 21, 23, 33, 38, 41, 48, 57, 62, 63, 68, 72, 82]

for i in schools:
    df_seattle['school_building'].iloc[i] = df_seattle.school_building.str.split(",").str[::-1]\
         .str.join(" ").apply(lambda x: x.strip() if isinstance(x, str) else x).iloc[i]
    df_seattle['school_building'].iloc[i] = df_seattle.school_building.str.split(" ").iloc[i]
    my_list = (df_seattle['school_building'].iloc[i][:])
    my_list.insert(1, my_list.pop(-1))
    df_seattle['school_building'].iloc[i] = ' '.join(my_list)
    
# 'strip' gets rid of a space that was added before each school name during the above process

In [52]:
# Seattle Public Schools omits the word 'Elementary' at the end of school names in this list. Add this word.
schools = ['Alki', 'Arbor Heights', 'Bailey Gatzert', 'Bryant', 'Cascadia', 'Cedar Park', 'Daniel Bagley', \
           'Dunlap', 'Emerson', 'Fairmount Park', 'Gatewood', 'Genesee Hill', 'Graham Hill', 'Green Lake', \
           'Greenwood', 'Hawthorne', 'Highland Park', 'James Baldwin', 'John Hay', 'John Muir', 'Lafayette', \
           'Laurelhurst', 'Lawton', 'Leschi', 'Lowell', 'Loyal Heights', 'Madrona', 'Magnolia', 'Maple', \
           'McGilvra', 'North Beach', 'Olympic Hills', 'Olympic View', 'Rainier View', 'Sacajawea', 'Sand Point', \
            'Sanislo', 'Stevens', 'Thornton Creek', 'Thurgood Marshall', 'View Ridge', 'Viewlands', \
            'Wedgwood', 'West Woodland', 'Whittier', 'Wing Luke']

for i in schools:
    my_list = df_seattle[df_seattle['school_building'] == i]['school_building']
    my_list = my_list+ " Elementary"
    df_seattle['school_building'] = df_seattle['school_building'].replace([i], [my_list])

In [53]:
# Seattle Public Schools omits the words 'Middle School' at the end of school names in this list. Add this.
schools = ['Aki Kurose', 'Jane Addams', 'Denny International', 'Eckstein', 'McClure', 'Meany', 'Washington']

for i in schools:
    my_list = df_seattle[df_seattle['school_building'] == i]['school_building']
    my_list = my_list+ " Middle School"
    df_seattle['school_building'] = df_seattle['school_building'].replace([i], [my_list])

In [54]:
# Seattle Public Schools omits the words 'High School' at the end of school names in this list. Add this.
schools = ['Ballard', 'Franklin', 'Garfield', 'Nathan Hale', 'Ingraham', 'Lincoln', 'Rainier Beach', 'Roosevelt']

for i in schools:
    my_list = df_seattle[df_seattle['school_building'] == i]['school_building']
    my_list = my_list+ " High School"
    df_seattle['school_building'] = df_seattle['school_building'].replace([i], [my_list])

In [55]:
# Adjusting school names for matching with ICOS, based on school names listed by Seattle Public Schools online.
df_seattle['school_building'] = df_seattle['school_building'].replace(['John Adams'], 'Adams Elementary')
df_seattle['school_building'] = df_seattle['school_building'].replace(['African American Academy (Rising Star)'], \
                                                                      'Rising Star Elementary')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Beacon Hill Int’l'], \
                                                                      'Beacon Hill International Elementary')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Catharine Blaine'], 'Catharine Blaine K-8')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Boren, Louisa STEM K-8'], \
                                                                      'Louisa Boren STEM K-8')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Broadview-Thomson'], \
                                                                      'Broadview-Thomson K-8')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Chief Sealth'], \
                                                                      'Chief Sealth International High School')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Cleveland STEM'], 'Cleveland High School')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Frantz Coe'], 'Coe Elementary')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Columbia'], 'Columbia School')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Concord'], \
                                                                      'Concord International Elementary')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Cooper, Frank B. (Pathfinder)'], 
                                                                      'Pathfinder K-8')
df_seattle['school_building'] = df_seattle['school_building'].replace(['B.F. Day'], \
                                                                      'B.F. Day Elementary')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Dearborn Park'], \
                                                                      'Dearborn Park International Elementary')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Stephen Decatur'], 'Decatur Elementary')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Eagle Staff, Robert'], \
                                                                      'Robert Eagle Staff Middle School')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Hamilton'], \
                                                                      'Hamilton International Middle School')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Hay (Queen Anne)'], 'Queen Anne Elementary')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Hughes, E.C. (Roxhill)'], \
                                                                      'Roxhill Elementary (at former Hughes ES)')
df_seattle['school_building'] = df_seattle['school_building'].replace(['John Marshall (Interim site)'], \
                                                                      'John Marshall (Interim Site)')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Kimball, Captain George W.'], \
                                                                      'Kimball Elementary')
df_seattle['school_building'] = df_seattle['school_building'].replace(['King, Jr., Martin Luther'], \
                                                                      'Martin Luther King, Jr. Elementary')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Latona (John Stanford)'], \
                                                                      'John Stanford International Elementary')
df_seattle['school_building'] = df_seattle['school_building'].replace(['McDonald'], \
                                                                      'McDonald International Elementary')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Sugiyama, Alan T.'], \
                                                                      'Alan T. Sugiyama High School')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Mann, Horace (Nova)'], 'Nova High School')
df_seattle['school_building'] = df_seattle['school_building'].replace(['James Madison'], 'Madison Middle School')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Whitman, Marcus'], 'Whitman Middle School')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Pinehurst (Hazel Wolf K-8)'], \
                                                                      'Hazel Wolf K-8')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Whitman, Marcus'], 'Whitman Middle School')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Rogers, John'], \
                                                                      'John Rogers Elementary')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Monroe (Salmon Bay K-8)'], \
                                                                      'Salmon Bay K-8')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Montlake'], \
                                                                      'Montlake Elementary (old building)')
df_seattle['school_building'] = df_seattle['school_building'].replace\
                         (['North Queen Anne School (Cascade Parent Partnership)'], 'Cascade Parent Partnership')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Queen Anne Gymnasium'], \
                                                                      'Queen Anne Gymnasium (Interagency)')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Roxhill (building)'], \
                                                                      'SW Interagency Academy (at former Roxhill ES)')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Minor, T.T.'], \
                                                                      'Seattle World School (@T.T. Minor)')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Schmitz Park (Interim site)'], \
                                                                      'Schmitz Park (Interim Site)')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Seward (TOPS)'], 'Seward (TOPS K-8)')
df_seattle['school_building'] = df_seattle['school_building'].replace(['South Shore'], 'South Shore PK-8')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Van Asselt (Interim site)'], \
                                                                      'Van Asselt (Interim Site)')
df_seattle['school_building'] = df_seattle['school_building'].replace(['Whitman, Marcus'], 'Whitman Middle School')

In [56]:
df_seattle = df_seattle.rename(columns={'school_building': 'Site'})

In [57]:
#df_seattle['Site'] # check that the renaming worked

In [58]:
df_seattle.shape

(107, 9)

In [59]:
df_icos_seattle.shape

(332, 32)

In [60]:
df_icos_seattle['Site'].nunique()

104

In [61]:
# Merge Seattle ICOS and district data
df_merged_seattle = df_seattle.merge(df_icos_seattle, how = 'left', on = ['Site'])

In [62]:
df_merged_seattle[['sps_urms_note_2024', 'sps_urms_note_2019']] = \
     df_merged_seattle[['sps_urms_note_2024', 'sps_urms_note_2019']].fillna(value='n/a')

In [63]:
df_merged_seattle.shape

(335, 40)

In [64]:
out = df_merged_seattle[~df_merged_seattle['Site'].isin(df_icos_seattle['Site'])]
out[['Site', 'sps_upgrade_year', 'sps_upgrade_notes']] 
# there rows did not merge because the sites are not in ICOS

Unnamed: 0,Site,sps_upgrade_year,sps_upgrade_notes
218,Asa Mercer,1989,(replacement under construction)
279,Stanford Center (JSCEE),2001,none
306,Webster,2020,major renovation


In [65]:
df_merged_seattle['Site'].nunique()

107

In [66]:
df_merged_seattle.shape

(335, 40)

In [67]:
# ANALYSIS: CONSTRUCTION TYPE (UNREINFORCED MASONRY)

In [68]:
df_merged_seattle_small = df_merged_seattle[['Site', 'sps_upgrade_year', 'sps_upgrade_notes', \
                  'Site_EQRiskLevel', 'Site_Enrollment', 'Building', 'Building_YearBuilt', \
                  'Building_HasEvaluation', 'BuildingArea_Name', 'BuildingArea_YearBuilt', \
                  'BuildingArea_CodeYear', 'unreinforced_masonry_list', 'BuildingArea_ConstructionType', \
                  'BuildingArea_ExistingRisk', 'BuildingArea_HasRetrofit', \
                  'BuildingArea_RetrofitRisk']].drop_duplicates()

In [69]:
df_merged_seattle_urms = df_merged_seattle[['Site', 'unreinforced_masonry_list', 'BuildingArea_ConstructionType']]\
                       .drop_duplicates()

In [70]:
# Find any schools on the URM list that do not have a URM construction type for any rows in ICOS
mask = (df_merged_seattle_urms['unreinforced_masonry_list'] == 'yes')
df_merged_seattle_urms_2 = df_merged_seattle_urms[mask]

In [71]:
# These are all the schools on the district URM list
df_merged_seattle_urms_2['Site'].unique()

array(['Daniel Bagley Elementary', 'Concord International Elementary',
       'B.F. Day Elementary', 'Dunlap Elementary', 'Emerson Elementary',
       'Franklin High School', 'Gatewood Elementary',
       'Greenwood Elementary', 'Queen Anne Elementary',
       'Roxhill Elementary (at former Hughes ES)',
       'Laurelhurst Elementary', 'Lincoln High School',
       'Lowell Elementary', 'Magnolia Elementary', 'Nova High School',
       'John Marshall (Interim Site)',
       'McDonald International Elementary', 'McGilvra Elementary',
       'Salmon Bay K-8', 'Cascade Parent Partnership',
       'Seward (TOPS K-8)', 'View Ridge Elementary', 'Webster',
       'West Seattle High School'], dtype=object)

In [72]:
df_merged_seattle_urms_2['Site'].nunique()

24

In [73]:
# Which schools from the list above of those on the district URM list also have a URM in ICOS?
mask = (df_merged_seattle_urms_2['BuildingArea_ConstructionType'] == 'Unreinforced Masonry Bearing Walls') | \
(df_merged_seattle_urms_2['BuildingArea_ConstructionType'] == 'Concrete Frame Buildings with URM Infill Walls') | \
(df_merged_seattle_urms_2['BuildingArea_ConstructionType'] == 'Unreinforced Masonry Bearing Walls')
df_merged_seattle_urms_3 = df_merged_seattle_urms_2[mask]

In [74]:
# These are all the schools on the district URM list that also have URM construction in ICOS
df_merged_seattle_urms_3['Site'].unique()

array(['B.F. Day Elementary', 'Dunlap Elementary', 'Emerson Elementary',
       'Franklin High School', 'Gatewood Elementary',
       'Roxhill Elementary (at former Hughes ES)', 'Lowell Elementary',
       'Nova High School', 'John Marshall (Interim Site)',
       'McDonald International Elementary', 'McGilvra Elementary',
       'Salmon Bay K-8', 'Seward (TOPS K-8)', 'View Ridge Elementary'],
      dtype=object)

In [75]:
# 14 out of 24 schools on the district URM list have a URM construction type in the ICOS data.
# Make a list of the opposite from above, the schools from the district list do not have URM construction in ICOS
df_merged_seattle_urms_3_schools = df_merged_seattle_urms_3['Site']
df_merged_seattle_urms_3_schools.shape

(14,)

In [76]:
df_merged_seattle_urms_missing = df_merged_seattle_urms_2[~df_merged_seattle_urms_2['Site'].isin\
                                                                 (df_merged_seattle_urms_3_schools)]

In [77]:
df_merged_seattle_urms_missing['Site'].nunique()

10

In [78]:
# These 10 schools are on the district URM list but do not have URM construction in ICOS
df_merged_seattle_urms_missing['Site'].unique()

array(['Daniel Bagley Elementary', 'Concord International Elementary',
       'Greenwood Elementary', 'Queen Anne Elementary',
       'Laurelhurst Elementary', 'Lincoln High School',
       'Magnolia Elementary', 'Cascade Parent Partnership', 'Webster',
       'West Seattle High School'], dtype=object)

In [79]:
# These are the construction types listed for the above 10 schools
df_merged_seattle_urms_missing[['Site', 'unreinforced_masonry_list', 'BuildingArea_ConstructionType']]

Unnamed: 0,Site,unreinforced_masonry_list,BuildingArea_ConstructionType
15,Daniel Bagley Elementary,yes,Concrete Shear Walls
17,Daniel Bagley Elementary,yes,Steel Braced Frame
18,Daniel Bagley Elementary,yes,Precast Concrete Tilt-Up Walls
54,Concord International Elementary,yes,Concrete Shear Walls
55,Concord International Elementary,yes,Reinforced Masonry Bearing Walls with Wood or Metal Diaphragm Decks
106,Greenwood Elementary,yes,Concrete Shear Walls
107,Greenwood Elementary,yes,Reinforced Masonry Bearing Walls with Wood or Metal Diaphragm Decks
119,Queen Anne Elementary,yes,"Wood, Commercial and Industrial (>5,000 square feet)"
120,Queen Anne Elementary,yes,Reinforced Masonry Bearing Walls with Wood or Metal Diaphragm Decks
151,Laurelhurst Elementary,yes,Steel Braced Frame


In [80]:
# Now find the group of schools that have the opposite of above, that are URMs in ICOS but not in the district list

In [81]:
# Which schools have URM construction types in ICOS?
mask = (df_merged_seattle_urms['BuildingArea_ConstructionType'] == 'Unreinforced Masonry Bearing Walls') | \
(df_merged_seattle_urms['BuildingArea_ConstructionType'] == 'Concrete Frame Buildings with URM Infill Walls') | \
(df_merged_seattle_urms['BuildingArea_ConstructionType'] == 'Unreinforced Masonry Bearing Walls')
df_merged_seattle_urms_4 = df_merged_seattle_urms[mask]

In [82]:
df_merged_seattle_urms_4.shape

(20, 3)

In [83]:
df_merged_seattle_urms_4['unreinforced_masonry_list'].value_counts()

unreinforced_masonry_list
yes    14
no      6
Name: count, dtype: int64

In [84]:
# Which of those 14 schools are no on the district URM list?
mask = (df_merged_seattle_urms_4['unreinforced_masonry_list'] == 'no')
df_merged_seattle_urms_missing_2 = df_merged_seattle_urms_4[mask]

In [85]:
df_merged_seattle_urms_missing_2.shape

(6, 3)

In [88]:
df_merged_seattle_urms_2['Site'].unique()

array(['Daniel Bagley Elementary', 'Concord International Elementary',
       'B.F. Day Elementary', 'Dunlap Elementary', 'Emerson Elementary',
       'Franklin High School', 'Gatewood Elementary',
       'Greenwood Elementary', 'Queen Anne Elementary',
       'Roxhill Elementary (at former Hughes ES)',
       'Laurelhurst Elementary', 'Lincoln High School',
       'Lowell Elementary', 'Magnolia Elementary', 'Nova High School',
       'John Marshall (Interim Site)',
       'McDonald International Elementary', 'McGilvra Elementary',
       'Salmon Bay K-8', 'Cascade Parent Partnership',
       'Seward (TOPS K-8)', 'View Ridge Elementary', 'Webster',
       'West Seattle High School'], dtype=object)

In [89]:
# These are the schools with URM construction in ICOS that are not on the district URM list
df_merged_seattle_urms_missing_2[['Site', 'unreinforced_masonry_list', 'BuildingArea_ConstructionType']]

Unnamed: 0,Site,unreinforced_masonry_list,BuildingArea_ConstructionType
11,Alki Elementary,no,Concrete Frame Buildings with URM Infill Walls
12,Alki Elementary,no,Unreinforced Masonry Bearing Walls
39,Bryant Elementary,no,Unreinforced Masonry Bearing Walls
102,Graham Hill Elementary,no,Unreinforced Masonry Bearing Walls
161,Leschi Elementary,no,Unreinforced Masonry Bearing Walls
219,Seattle World School (@T.T. Minor),no,Concrete Frame Buildings with URM Infill Walls


In [90]:
# ANALYSIS: RETROFITS
# Which schools are listed as retrofitted on the district list versus in ICOS?

In [91]:
mask = (df_icos_seattle['BuildingArea_HasRetrofit'] == 'Structural')
df_icos_seattle_structural = df_icos_seattle[mask]

In [92]:
df_merged_seattle['BuildingArea_HasRetrofit'].value_counts()

BuildingArea_HasRetrofit
Structural    171
No            160
Name: count, dtype: int64

In [93]:
# Make a list of all Sites with one or more structural retrofits. Then get the list of the opposite, 
# the remaining Sites, those with no retrofits for the entire schools.

In [94]:
retrofit_value = ['Structural']

mask = df_merged_seattle['BuildingArea_HasRetrofit'].isin(retrofit_value)
df_merged_seattle_retrofit = df_merged_seattle[mask] # ~mask for the opposite

In [95]:
df_merged_seattle_retrofit.shape

(171, 40)

In [96]:
df_merged_seattle_retrofit['BuildingArea_HasRetrofit'].value_counts()

BuildingArea_HasRetrofit
Structural    171
Name: count, dtype: int64

In [97]:
df_merged_seattle_retrofit['Site'].nunique() # there are 59 schools with one or more retrofit

59

In [98]:
# Make a one-column list of the schools with one or more Structural retrofits. Then make an inverse list of the
# remaining schools, which have no retrofits for any roww.

# Create a one-column list of the schools with one or more Structural retrofit values.
df_merged_seattle_retrofit2 = df_merged_seattle_retrofit['Site']
df_merged_seattle_retrofit2 = df_merged_seattle_retrofit2.drop_duplicates()

# Columns became a list of schools; reset the index to restore the dataframe to two columns.
df_merged_seattle_retrofit2 = df_merged_seattle_retrofit2.reset_index() 

# Capture only the Site column. This variable can be used to complete the next step.
df_merged_seattle_retrofit2 = df_merged_seattle_retrofit2['Site']

In [99]:
df_merged_seattle_retrofit2.shape 

(59,)

In [100]:
# Create a df of all rows for the 59 schools identified above.
df_merged_seattle_retrofit3 = df_merged_seattle[df_merged_seattle['Site'].isin\
                                                                 (df_merged_seattle_retrofit2)]

In [101]:
df_merged_seattle_retrofit3.shape

(217, 40)

In [102]:
df_merged_seattle_retrofit3['BuildingArea_HasRetrofit'].value_counts() # this pulls all rows for schools
# with one or more structural retrofits, including the 'no' retrofit rows for those schools

BuildingArea_HasRetrofit
Structural    171
No             46
Name: count, dtype: int64

In [103]:
# Find the inverse list - schools with all No retrofit values (opposite of those with any Structural retrofits)
df_merged_seattle_no_retrofit = df_merged_seattle[~df_merged_seattle['Site'].isin\
                                                                 (df_merged_seattle_retrofit2)]

In [104]:
df_merged_seattle_no_retrofit.shape # 117+217=334, so yes all rows are accounted for.

(118, 40)

In [105]:
df_merged_seattle_no_retrofit['Site'].nunique() # 47+59=106 so there are 47 schools with no retrofits in ICOS

48

In [106]:
df_merged_seattle_no_retrofit['BuildingArea_HasRetrofit'].value_counts()
# there is a difference of 3 because of listings in the district data that are not in ICOS

BuildingArea_HasRetrofit
No    114
Name: count, dtype: int64

In [107]:
df_merged_seattle_no_retrofit2 = df_merged_seattle_no_retrofit[['Site', 'BuildingArea_HasRetrofit', \
                   'sps_upgrade_year', 'sps_upgrade_notes']].drop_duplicates().sort_values('sps_upgrade_year')

In [108]:
df_merged_seattle_no_retrofit2.shape

(48, 4)

In [109]:
# df_merged_seattle_no_retrofit2.to_csv('df_merged_seattle_no_retrofit2.csv', sep=',', index=False, encoding='utf-8')

In [110]:
df_merged_seattle_no_retrofit2['sps_upgrade_notes'].value_counts()

sps_upgrade_notes
 new construction                        18
none                                     17
 major renovation                         9
(replacement under construction)          1
 new construction 2021  new addition      1
(major renovation under construction)     1
new construction                          1
Name: count, dtype: int64

In [111]:
df_merged_seattle_no_retrofit2.sort_values('sps_upgrade_notes')
# Three facilities from the district list are not in ICOS, those show NaN for BuildingArea_HasRetrofit.
# SPS says seismic upgrades that are part of major renovations may not be shown as retrofits. New buildings
# are also not considered retrofitted. Excluding those, there are 14 schools with upgrades after 1998,
# when seismic building codes were updated, that show seismic upgrades in the SPS list and no retrofits in ICOS.

Unnamed: 0,Site,BuildingArea_HasRetrofit,sps_upgrade_year,sps_upgrade_notes
37,Bryant Elementary,No,2001,major renovation
306,Webster,,2020,major renovation
82,Emerson Elementary,No,2001,major renovation
15,Daniel Bagley Elementary,No,2020,major renovation
47,Cleveland High School,No,2007,major renovation
148,John Stanford International Elementary,No,2000,major renovation
93,Garfield High School,No,2006,major renovation
44,Cedar Park Elementary,No,2015,major renovation
45,Chief Sealth International High School,No,2010,major renovation
275,South Shore PK-8,No,2009,new construction


In [112]:
# Create a list of just the school names from the merged set, to pull up all rows and columns for those schools
df_merged_seattle_no_retrofit3 = df_merged_seattle_no_retrofit2['Site']
df_merged_seattle_no_retrofit3 = df_merged_seattle_no_retrofit3.drop_duplicates()
df_merged_seattle_no_retrofit3.shape

(48,)

In [113]:
df_merged_seattle_no_retrofit4 = df_icos_seattle[df_icos_seattle['Site'].isin(df_merged_seattle_no_retrofit3)]
df_merged_seattle_no_retrofit4.shape

(115, 32)

In [114]:
df_merged_seattle_no_retrofit5 = df_merged_seattle_no_retrofit4[['Site', 'Site_EQRiskLevel', 'Building', \
              'BuildingArea_Name', 'BuildingArea_ExistingRisk', 'BuildingArea_HasRetrofit', \
              'BuildingArea_RetrofitRisk']]

In [115]:
# Confirms that the list of schools in ICOS that say no retrofit applies to the entire school.
df_merged_seattle_no_retrofit5['BuildingArea_HasRetrofit'].value_counts()

BuildingArea_HasRetrofit
No    114
Name: count, dtype: int64