In [5]:
import pandas as pd
import json
import numpy as np

In [2]:
with open('./assets/People_of_Color_Washington.json') as file:
    data = json.load(file)

# Extract the list of features
features = data['features']

# Use json_normalize to flatten the nested 'properties' into a DataFrame
df = pd.json_normalize(features, sep='_', max_level=1)

# Print the columns of the DataFrame to see the structure
print(df.columns)

Index(['type', 'id', 'geometry_type', 'geometry_coordinates',
       'properties_OBJECTID', 'properties_Census_Tract', 'properties_IBL_Rank',
       'properties_EHD_Rank', 'properties_Env_SEF_Rank',
       'properties_Total_Population', 'properties_People_of_Color',
       'properties_Percent_People_of_Color'],
      dtype='object')


In [3]:
# Convert the "Percent_People_of_Color" to numeric since it's stored as a string
df['properties_Percent_People_of_Color'] = pd.to_numeric(df['properties_Percent_People_of_Color'], errors='coerce')

# Normalize the "Percent_People_of_Color" column using Min-Max normalization
df['Normalized_Percent_People_of_Color'] = (df['properties_Percent_People_of_Color'] - df['properties_Percent_People_of_Color'].min()) / (df['properties_Percent_People_of_Color'].max() - df['properties_Percent_People_of_Color'].min())
df['Normalized_Percent_People_of_Color'] = df['Normalized_Percent_People_of_Color'] * 100

# Display the first few rows of the DataFrame to verify
print(df.head())

      type  id geometry_type  \
0  Feature   1       Polygon   
1  Feature   2       Polygon   
2  Feature   3       Polygon   
3  Feature   4       Polygon   
4  Feature   5       Polygon   

                                geometry_coordinates  properties_OBJECTID  \
0  [[[-122.407697325044, 47.2258904770488], [-122...                    1   
1  [[[-122.420943214232, 47.1786642375196], [-122...                    2   
2  [[[-122.282534781502, 47.1178639615889], [-122...                    3   
3  [[[-122.407697325044, 47.2258904770488], [-122...                    4   
4  [[[-122.407676307464, 47.2191904455033], [-122...                    5   

  properties_Census_Tract  properties_IBL_Rank  properties_EHD_Rank  \
0             53053062000                  9.0                 10.0   
1             53053071601                 10.0                 10.0   
2             53053073111                  5.0                  7.0   
3             53053940007                 10.0              

In [4]:
df.to_json('./assets/Normalized_People_of_Color_Washington.json', orient='records', indent=4)

# Confirm that the file has been saved successfully
print("JSON file has been saved as 'Normalized_People_of_Color_Washington.json'")

JSON file has been saved as 'Normalized_People_of_Color_Washington.json'


In [2]:
with open('./assets/Unaffordable_Housing_v2.json') as file:
    data = json.load(file)

# Extract the list of features
features = data['features']

# Use json_normalize to flatten the nested 'properties' into a DataFrame
df = pd.json_normalize(features, sep='_', max_level=1)

# Print the columns of the DataFrame to see the structure
print(df.columns)

Index(['type', 'id', 'geometry_type', 'geometry_coordinates',
       'properties_OBJECTID', 'properties_Census_Tract', 'properties_IBL_Rank',
       'properties_EHD_Rank', 'properties_Env_SEF_Rank', 'properties_Count_',
       'properties_Population', 'properties_Percentage', 'properties_Lower_ME',
       'properties_Upper_ME', 'properties_Per'],
      dtype='object')


In [3]:
# Convert the "Percent_People_of_Color" to numeric since it's stored as a string
df['properties_Per'] = pd.to_numeric(df['properties_Per'], errors='coerce')

# Normalize the "Percent_People_of_Color" column using Min-Max normalization
df['Normalized_Percent'] = (df['properties_Per'] - df['properties_Per'].min()) / (df['properties_Per'].max() - df['properties_Per'].min())
df['Normalized_Percent'] = df['Normalized_Percent'] * 100

# Display the first few rows of the DataFrame to verify
print(df.head())

      type  id geometry_type  \
0  Feature   1       Polygon   
1  Feature   2       Polygon   
2  Feature   3       Polygon   
3  Feature   4       Polygon   
4  Feature   5       Polygon   

                                geometry_coordinates  properties_OBJECTID  \
0  [[[-122.407711857337, 47.2258962633226], [-122...                    1   
1  [[[-122.420957738327, 47.1786700198163], [-122...                    2   
2  [[[-122.282549269399, 47.1178697545886], [-122...                    3   
3  [[[-122.407711857337, 47.2258962633226], [-122...                    4   
4  [[[-122.407690838279, 47.2191962314073], [-122...                    5   

  properties_Census_Tract  properties_IBL_Rank  properties_EHD_Rank  \
0             53053062000                 10.0                 10.0   
1             53053071601                  8.0                 10.0   
2             53053073111                  3.0                  7.0   
3             53053940007                  8.0              

In [4]:
df.to_json('./assets/Normalized_Unaffordable_Housing.json', orient='records', indent=4)

# Confirm that the file has been saved successfully
print("JSON file has been saved as 'Normalized_Unaffordable_Housing.json'")

JSON file has been saved as 'Normalized_Unaffordable_Housing.json'


In [6]:
with open('./assets/Unemployed_Population_Washington.json') as file:
    data = json.load(file)

# Extract the list of features
features = data['features']

# Use json_normalize to flatten the nested 'properties' into a DataFrame
df = pd.json_normalize(features, sep='_', max_level=1)

# Print the columns of the DataFrame to see the structure
print(df.columns)

Index(['type', 'id', 'geometry_type', 'geometry_coordinates',
       'properties_OBJECTID', 'properties_Census_Tract', 'properties_IBL_Rank',
       'properties_EHD_Rank', 'properties_Env_SEF_Rank',
       'properties_Num_Unemployed', 'properties_Employable_Population_Over15',
       'properties_Percent_Unemployed', 'properties_Lower_ME',
       'properties_Upper_ME'],
      dtype='object')


In [7]:
# Convert the "Percent_People_of_Color" to numeric since it's stored as a string
df['properties_Percent_Unemployed'] = pd.to_numeric(df['properties_Percent_Unemployed'], errors='coerce')

# Normalize the "Percent_People_of_Color" column using Min-Max normalization
df['Normalized_Unemployed_Percent'] = (df['properties_Percent_Unemployed'] - df['properties_Percent_Unemployed'].min()) / (df['properties_Percent_Unemployed'].max() - df['properties_Percent_Unemployed'].min())
df['Normalized_Unemployed_Percent'] = df['Normalized_Unemployed_Percent'] * 100


# Display the first few rows of the DataFrame to verify
print(df.head())

      type  id geometry_type  \
0  Feature   1       Polygon   
1  Feature   2       Polygon   
2  Feature   3       Polygon   
3  Feature   4       Polygon   
4  Feature   5       Polygon   

                                geometry_coordinates  properties_OBJECTID  \
0  [[[-122.407697325044, 47.2258904770488], [-122...                    1   
1  [[[-122.420943214232, 47.1786642375196], [-122...                    2   
2  [[[-122.282534781502, 47.1178639615889], [-122...                    3   
3  [[[-122.407697325044, 47.2258904770488], [-122...                    4   
4  [[[-122.407676307464, 47.2191904455033], [-122...                    5   

  properties_Census_Tract  properties_IBL_Rank  properties_EHD_Rank  \
0             53053062000                  7.0                 10.0   
1             53053071601                  8.0                 10.0   
2             53053073111                  6.0                  7.0   
3             53053940007                  9.0              

In [8]:
df.to_json('./assets/Normalized_Unemployed_Population.json', orient='records', indent=4)

# Confirm that the file has been saved successfully
print("JSON file has been saved as 'Normalized_Unemployed_Population.json'")

JSON file has been saved as 'Normalized_Unemployed_Population.json'


In [5]:
with open('./assets/Poverty_Population_Washington.json') as file:
    data = json.load(file)

# Extract the list of features
features = data['features']

# Use json_normalize to flatten the nested 'properties' into a DataFrame
df = pd.json_normalize(features, sep='_', max_level=1)

# Print the columns of the DataFrame to see the structure
print(df.columns)

Index(['type', 'id', 'geometry_type', 'geometry_coordinates',
       'properties_OBJECTID', 'properties_Census_Tract', 'properties_IBL_Rank',
       'properties_EHD_Rank', 'properties_Env_SEF_Rank',
       'properties_Total_Population', 'properties_Num_Living_in_Poverty',
       'properties_Percent_Living_in_Poverty'],
      dtype='object')


In [7]:
# Convert the "Percent_People_of_Color" to numeric since it's stored as a string
df['properties_Percent_Living_in_Poverty'] = pd.to_numeric(df['properties_Percent_Living_in_Poverty'], errors='coerce')

# Normalize the "Percent_People_of_Color" column using Min-Max normalization
df['Normalized_Percent_Living_in_Poverty'] = (df['properties_Percent_Living_in_Poverty'] - df['properties_Percent_Living_in_Poverty'].min()) / (df['properties_Percent_Living_in_Poverty'].max() - df['properties_Percent_Living_in_Poverty'].min())
df['Normalized_Percent_Living_in_Poverty'] = df['Normalized_Percent_Living_in_Poverty'] * 100


# Display the first few rows of the DataFrame to verify
print(df.head())

      type  id geometry_type  \
0  Feature   1       Polygon   
1  Feature   2       Polygon   
2  Feature   3       Polygon   
3  Feature   4       Polygon   
4  Feature   5       Polygon   

                                geometry_coordinates  properties_OBJECTID  \
0  [[[-122.407697325044, 47.2258904770488], [-122...                    1   
1  [[[-122.420943214232, 47.1786642375196], [-122...                    2   
2  [[[-122.282534781502, 47.1178639615889], [-122...                    3   
3  [[[-122.407697325044, 47.2258904770488], [-122...                    4   
4  [[[-122.407676307464, 47.2191904455033], [-122...                    5   

  properties_Census_Tract  properties_IBL_Rank  properties_EHD_Rank  \
0             53053062000                 10.0                 10.0   
1             53053071601                  9.0                 10.0   
2             53053073111                  5.0                  7.0   
3             53053940007                  9.0              

In [8]:
df.to_json('./assets/Normalized_Poverty_Population_Washington.json', orient='records', indent=4)

# Confirm that the file has been saved successfully
print("JSON file has been saved as 'Normalized_Poverty_Population_Washington.json'")

JSON file has been saved as 'Normalized_Poverty_Population_Washington.json'


In [2]:
with open('./assets/No_High_School_Diploma_Washington.json') as file:
    data = json.load(file)

# Extract the list of features
features = data['features']
 
# Use json_normalize to flatten the nested 'properties' into a DataFrame
df = pd.json_normalize(features, sep='_', max_level=1)

# Print the columns of the DataFrame to see the structure
print(df.columns)

Index(['type', 'id', 'geometry_type', 'geometry_coordinates',
       'properties_OBJECTID', 'properties_Census_Tract', 'properties_IBL_Rank',
       'properties_EHD_Rank', 'properties_Env_SEF_Rank',
       'properties_Num_Without_Diploma', 'properties_Population_Over24',
       'properties_Percent_Without_Diploma', 'properties_Lower_ME',
       'properties_Upper_ME'],
      dtype='object')


In [5]:
# Convert the "Percent_People_of_Color" to numeric since it's stored as a string
df['properties_Percent_Without_Diploma'] = pd.to_numeric(df['properties_Percent_Without_Diploma'], errors='coerce')

# Normalize the "Percent_People_of_Color" column using Min-Max normalization
df['Normalized_Percent_Without_Diploma'] = (df['properties_Percent_Without_Diploma'] - df['properties_Percent_Without_Diploma'].min()) / (df['properties_Percent_Without_Diploma'].max() - df['properties_Percent_Without_Diploma'].min())
df['Normalized_Percent_Without_Diploma'] = df['Normalized_Percent_Without_Diploma'] * 100


# Display the first few rows of the DataFrame to verify
print(df.head())

      type  id geometry_type  \
0  Feature   1       Polygon   
1  Feature   2       Polygon   
2  Feature   3       Polygon   
3  Feature   4       Polygon   
4  Feature   5       Polygon   

                                geometry_coordinates  properties_OBJECTID  \
0  [[[-122.407697325044, 47.2258904770488], [-122...                    1   
1  [[[-122.420943214232, 47.1786642375196], [-122...                    2   
2  [[[-122.282534781502, 47.1178639615889], [-122...                    3   
3  [[[-122.407697325044, 47.2258904770488], [-122...                    4   
4  [[[-122.407676307464, 47.2191904455033], [-122...                    5   

  properties_Census_Tract  properties_IBL_Rank  properties_EHD_Rank  \
0             53053062000                 10.0                 10.0   
1             53053071601                 10.0                 10.0   
2             53053073111                  5.0                  7.0   
3             53053940007                  9.0              

In [6]:
df.to_json('./assets/Normalized_No_High_School_Diploma_Washington.json', orient='records', indent=4)

# Confirm that the file has been saved successfully
print("JSON file has been saved as 'Normalized_No_High_School_Diploma_Washington.json'")

JSON file has been saved as 'Normalized_No_High_School_Diploma_Washington.json'


In [2]:
with open('./assets/Low_Birth_Weight.json') as file:
    data = json.load(file)

# Extract the list of features
features = data['features']
 
# Use json_normalize to flatten the nested 'properties' into a DataFrame
df = pd.json_normalize(features, sep='_', max_level=1)

# Print the columns of the DataFrame to see the structure
print(df.columns)

Index(['type', 'id', 'geometry_type', 'geometry_coordinates',
       'properties_OBJECTID', 'properties_Census_Tract', 'properties_IBL_Rank',
       'properties_EHD_Rank', 'properties_Env_SP_Rank', 'properties_Count_',
       'properties_Population', 'properties_Percent_Singleton_Live_Births',
       'properties_Lower_CI', 'properties_Upper_CI'],
      dtype='object')


In [3]:
#Handle "(NR)" by removing it from the string
df['properties_Percent_Singleton_Live_Births'] = df['properties_Percent_Singleton_Live_Births'].str.replace(r'\s*\(NR\)', '', regex=True)

#Replace "**" with NaN (indicating missing or confidential data)
df['properties_Percent_Singleton_Live_Births'] = df['properties_Percent_Singleton_Live_Births'].replace('**', np.nan)

#Convert the column to numeric if the cleaned data is expected to be numeric
df['properties_Percent_Singleton_Live_Births'] = pd.to_numeric(df['properties_Percent_Singleton_Live_Births'], errors='coerce')

# Display the cleaned data
print(df[['properties_Percent_Singleton_Live_Births']].head())

   properties_Percent_Singleton_Live_Births
0                                      3.79
1                                      1.59
2                                       NaN
3                                      2.32
4                                      2.33


In [5]:
# Convert the "Percent_People_of_Color" to numeric since it's stored as a string
df['properties_Percent_Singleton_Live_Births'] = pd.to_numeric(df['properties_Percent_Singleton_Live_Births'], errors='coerce')

# Normalize the "Percent_People_of_Color" column using Min-Max normalization
df['Normalized_Percent_Singleton_Live_Births'] = (df['properties_Percent_Singleton_Live_Births'] - df['properties_Percent_Singleton_Live_Births'].min()) / (df['properties_Percent_Singleton_Live_Births'].max() - df['properties_Percent_Singleton_Live_Births'].min())
df['Normalized_Percent_Singleton_Live_Births'] = df['Normalized_Percent_Singleton_Live_Births'] * 100

# Display the first few rows of the DataFrame to verify
print(df.head())

      type  id geometry_type  \
0  Feature   1       Polygon   
1  Feature   2       Polygon   
2  Feature   3       Polygon   
3  Feature   4       Polygon   
4  Feature   5       Polygon   

                                geometry_coordinates  properties_OBJECTID  \
0  [[[-122.407676307464, 47.2191904455033], [-122...                    1   
1  [[[-122.644709586342, 47.0877419600512], [-122...                    2   
2  [[[-122.457150238437, 47.1701871668543], [-122...                    3   
3  [[[-122.272361625678, 47.0895547615697], [-122...                    4   
4  [[[-122.413510921853, 47.0674807226684], [-122...                    5   

  properties_Census_Tract  properties_IBL_Rank  properties_EHD_Rank  \
0             53053940006                 10.0                 10.0   
1             53053072800                  4.0                  3.0   
2             53053071703                  6.0                  9.0   
3             53053073113                  7.0              

In [6]:
df.to_json('./assets/Normalized_Low_Birth_Weight.json', orient='records', indent=4)

# Confirm that the file has been saved successfully
print("JSON file has been saved as 'Normalized_Low_Birth_Weight.json'")

JSON file has been saved as 'Normalized_Low_Birth_Weight.json'
