In [1]:
import pandas as pd
import json
import numpy as np

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)



In [2]:
df = pd.read_csv('AAGmediaDistrictsAggregate.csv')

In [3]:
# Select only needed columns
needed_columns = [
    'District ID', 
    'District Name', 
    'Student Enrollment', 
    'Students with Disabilities', 
    'Students Experiencing Poverty'
]

In [4]:
# Create a new dataframe with only these columns
selected_df = df[needed_columns].copy()

print(selected_df)

     District ID                     District Name  Student Enrollment  \
0           2063                        Adel SD 21                11.0   
1           2113                      Adrian SD 61               282.0   
2           1899                       Alsea SD 7J               252.0   
3           2252                       Amity SD 4J               725.0   
4           2111                       Annex SD 29               104.0   
5           2005                    Arlington SD 3               130.0   
6           2115                       Arock SD 81                11.0   
7           2041                      Ashland SD 5              2506.0   
8           2051                      Ashwood SD 8                 NaN   
9           1933                      Astoria SD 1              1746.0   
10          2208             Athena-Weston SD 29RJ               526.0   
11          1894                       Baker SD 5J              5013.0   
12          1969                      

In [5]:
# Convert '*' values to "null"
selected_df.replace('*', None, inplace=True)

# Convert '<5%' values to "5"
selected_df.replace('<5%', '5%', inplace=True)

In [6]:
# Process percentage columns - remove % signs and convert to numeric
percentage_columns = ['Students with Disabilities', 'Students Experiencing Poverty']
for col in percentage_columns:
    # Create a new column to hold converted values
    new_col = []
    
    for value in selected_df[col]:
        try:
            if pd.isna(value) or value is None:
                new_col.append(None)
            else:
                # Remove % and convert to float
                cleaned_value = str(value).replace('%', '')
                new_col.append(float(cleaned_value) / 100)
        except:
            # If conversion fails, use None
            new_col.append(None)
    
    # Replace the column with the converted values
    selected_df[col] = new_col

In [7]:
# Rename the percentage columns to indicate they are percentages
column_mapping = {
    'Students with Disabilities': 'Students with Disabilities %',
    'Students Experiencing Poverty': 'Students Experiencing Poverty %'
}
selected_df = selected_df.rename(columns=column_mapping)

In [8]:
# Convert to dictionary format with District ID as the key
result = {}
for _, row in selected_df.iterrows():
    district_id = row['District ID']
    result[str(district_id)] = {
        'District ID': district_id,
        'District Name': row['District Name'],
        'Student Enrollment': row['Student Enrollment'],
        'Students with Disabilities %': row['Students with Disabilities %'],
        'Students Experiencing Poverty %': row['Students Experiencing Poverty %']
    }

In [9]:
# Save to JSON file
with open('district_percents.json', 'w') as f:
    json.dump(result, f, indent=2)