# Welcome to the School Achievement Dashboard! 
##### *Produced by Colin Howard and Luke Moffitt for CSCI 77800 -- EthiCS, Fall 2024*+

#### For the best user experience, please start by the first cell in CleanDash.ipnyb

# Part 1. Poverty and Achievement

In [11]:
#imports

import ipywidgets as widgets
import pandas as pd
import urllib.request as rq
import plotly.express as px
import plotly.graph_objects as go
from IPython.display import Markdown, display
import json

In [14]:
#visualization 1



# variables to populate dropdowns
levels = {
    "New York City": "XN",
    "New York State": "NY",
    "United States": "NT"
}

level = "NT"

level_selector = widgets.Dropdown(options=levels,description="Level:")    

def set_level(choice):
    level = choice



subjects = ["Mathematics","Reading","Science"]

subject = "Mathematics"

subscales = {
    "Mathematics" : "MRPCM",
    "Reading": "RRPCM",
    "Science": "SRPUV"
}

subscale = "RRPCM"

years = {
    "Mathematics": [1990, 1992, 1996, 1996, 2000, 2003, 2005, 2007, 2009, 2011, 2013, 2015, 2017, 2019, 2022][::-1],
    "Reading": [1992, 1994, 1998, 1998, 2002, 2003, 2005, 2007, 2009, 2011, 2013, 2015, 2017, 2019, 2022][::-1],
    "Science": [2009, 2015, 2019][::-1]
}

year = 2019

grades = {
    "Mathematics": [4,8],
    "Reading": [4,8,12],
    "Science": [4,8,12]
}

grade = 8

variable = "SLUNCH3"

stattype = "MN:MN,PC:P1,PC:P2,PC:P5,PC:P7,PC:P9"

stattypes = {
    "MN:MN": "Mean",
    "PC:P1": "10th Percentile",
    "PC:P2": "25th Percentile",
    "PC:P5": "50th Percentile",
    "PC:P7": "75th Percentile",
    "PC:P9": "90th Percentile"
}

marker_colors = {
    "MN:MN": px.colors.qualitative.Prism[10],
    "PC:P1": px.colors.qualitative.Prism[7],
    "PC:P2": px.colors.qualitative.Prism[5],
    "PC:P5": px.colors.qualitative.Prism[4],
    "PC:P7": px.colors.qualitative.Prism[1],
    "PC:P9": px.colors.qualitative.Prism[0]
}


# method to build the dataframe
def build_frame(subject="Mathematics",year=2019, level=level):
    level = level
    year = year
    subject = subject
    subscale = subscales[subject]
    response = rq.urlopen(f"https://www.nationsreportcard.gov/Dataservice/GetAdhocData.aspx?type=data"
                        f"&subject={subject.lower()}"
                        f"&grade={grade}"
                        f"&subscale={subscale}"
                        f"&variable={variable}"
                        f"&jurisdiction={level}"
                        f"&stattype={stattype}"
                        f"&year={year}")
    response_string = response.read().decode('utf-8')
    try:
        df = pd.DataFrame(json.loads(response_string)['result'])
        return df
    except:
        return response_string

# method to build the the figure with graph objects (plotly)
def build_figure(df):
    traces = []
    for stat in stattypes.keys():
        trace = go.Bar(
            x=df.loc[df['stattype']==stat]["varValueLabel"], 
            y=df.loc[df['stattype']==stat]["value"], 
            name=stattypes[stat], 
            marker={"color": marker_colors[stat]}
        )
        traces.append(trace)
    return traces

# create the initial frame and build the figure
df = build_frame(subject,year)
fw = go.FigureWidget(data=build_figure(df),layout=go.Layout(barmode="group",title=f"{grade}th Grade {subject} NAEP Scores and Federal Lunch Program Eligibility, {year}, {dict(zip(levels.values(), levels.keys()))[level]}"))

# method to update the figure
def update_figure(change):
    subject = subject_widget.value
    year = year_widget.value
    level = level_selector.value
    #what happens when the API complains?
    df = build_frame(subject,year, level)
    with fw.batch_update():
        for i, stat in enumerate(stattypes):
            fw.data[i].x=df.loc[df['stattype']==stat]['varValueLabel']
            fw.data[i].y=df.loc[df['stattype']==stat]['value']
        fw.update_layout(title=f"{grade}th Grade {subject} NAEP Scores and Federal Lunch Program Eligibility, {year}, {dict(zip(levels.values(), levels.keys()))[level]}")

# create the widgets

subject_widget = widgets.Dropdown(options=subjects,description="Subject:")
year_widget = widgets.Dropdown(options=years['Mathematics'],description="Year:")

# method to update the years dropdown
# this is called when the subject it changed
def update_years(*args):
    year_widget.options = years[subject_widget.value]


level_selector.observe(update_figure, 'value')
subject_widget.observe(update_years, 'value')
subject_widget.observe(update_figure, 'value')
year_widget.observe(update_figure, 'value')

container = widgets.VBox([widgets.HBox([level_selector, subject_widget,year_widget]),fw])
container

# interact(update_figure,subject=subject_widget,year=year_widget);

VBox(children=(HBox(children=(Dropdown(description='Level:', options={'New York City': 'XN', 'New York State':…

# Part 2. Poverty and Funding

In [None]:
#visualization 2, funding by state over time
import numpy
df = pd.read_csv("FundingData.csv")
#df
#different traces for each state
f = go.FigureWidget()
f.layout.title = "Per Pupil Funding by State over Time"
f.layout.xaxis.title = "Year"
f.layout.yaxis.title = "Funding"
#x = range(2021, 1986, -1)
x = numpy.array(range(2021, 1986, -1))
for state in df["State Name"].unique():
    #x is the year, y is the funding. individual bars are the state
    #y shoulud access the single row of funding for the state
    f.add_scatter(x=x,y=df.loc[df["State Name"] == state].to_numpy()[0][1:],name=state)
    #print(df.loc[df["State Name"] == state].to_numpy())
f

#line graph over time, all 50 states

### Although funding has gone up over time, more funding does not always lead to better outcomes.

In [None]:
#visualization 3, funding vs poverty
#scatterplot that shows school poverty vs funding
#we already know that poverty impacts achievement, from previous graph
#now we want to show the correlation between poverty and funding 
#we are going to do this using data from NCES (?)
from numpy import int64
from pandas import Int64Dtype
import numpy as np

levels2 = {
    "New York State": "NY",
    "United States": "NT"
}

level2 = "NT"

def set_level2(choice):
    level2 = choice

level_selector2 = widgets.Dropdown(options=levels2,description="Level:")
#interact(set_level,choice=level_selector2);

#Source: https://nces.ed.gov/ccd/elsi/tableGenerator.aspx
povertyDF = pd.read_csv("DistrictFundingData.csv")
#Source: https://www.census.gov/data/datasets/2022/demo/saipe/2022-school-districts.html
censusDF = pd.read_csv("ussd22.csv")
censusDF["Agency ID - NCES Assigned [District] Latest available year"] = (censusDF["State FIPS Code"].astype(str).str.zfill(2) + censusDF["District ID"].astype(str).str.zfill(5)).astype(int64)
#combine both FIPS ID with the distrcit ID from census data
df = pd.merge(censusDF,povertyDF)
#divide children in poverty by the total population
df["Percentage in poverty"] = df["Estimated number of relevant children 5 to 17 years old in poverty who are related to the householder"].str.replace(",","").astype(int)/df["Estimated Population 5-17"].str.replace(",","").astype(int)*100
#convert 
df = df[df["Total Expenditures (TOTALEXP) per Pupil (V33) [District Finance] 2021-22"].str.isnumeric()]
df["Expenditures per student"] = df["Total Expenditures (TOTALEXP) per Pupil (V33) [District Finance] 2021-22"].astype(int)
f = go.FigureWidget()
f.layout.title = "Per Pupil Funding by Districts vs. Childhood Poverty"
f.layout.xaxis.title = "Percentage of Childhood Poverty"
f.layout.yaxis.title = "Funding"
scatter = go.Scatter(x=df["Percentage in poverty"],y=df["Expenditures per student"],mode="markers",text=df["Agency Name"])
f.add_trace(scatter)



def update_poverty(change):
    level2 = level_selector2.value
    #need to restrict data based on the area
    
    if level2=="NT":    
        tempdf = df
    elif level2=="NY":
        tempdf = df[df["State Abbr [District] Latest available year"].str.contains("NY")]
        print(tempdf)
    elif level2=="XN":
        #NEW YORK CITY per pupil data not available
        tempdf = df[df["Agency Name [District] 2021-22"].str.contains("NEW YORK CITY")]  
        #print(tempdf)
    with f.batch_update():
        f.data[0].x=tempdf["Percentage in poverty"]
        f.data[0].y=tempdf["Expenditures per student"]
        f.data[0].text=tempdf["Agency Name"]
        


level_selector2.observe(update_poverty, 'value')

container = widgets.VBox([widgets.HBox([level_selector2]),f])
container

### At both the national and state levels, funding per student tends to go down as the percentage of poverty goes up, although there are many outliers in the data.

# Part 3. Factors Affecting Achievement

In [None]:
#visualization 4 (part 3)
#Use the same level and subject selector, make a new variable selector

subjects = ["Mathematics","Reading","Science"]

subject = "Mathematics"

subscales = {
    "Mathematics" : "MWPCM",
    "Reading": "RRPCM",
    "Science": "SRPUV"
}

subscale = "RRPCM"

year = 2019
grade = 12

variables = {
    "Percent of Teachers Absent on average day" : "C036501",
    "School type is independent charter" : "C0863J1",
    "Talk about studies at home" : "B017451",
    "Use laptop or desktop computer during class" : "B034701",
    "Use tablet during class" : "B034801",
    "Days absent from school in the last month" : "B018101"
}

factor = "C036501"
variable = "C036501"

stattype = "MN:MN,PC:P1,PC:P2,PC:P5,PC:P7,PC:P9"

stattypes = {
    "MN:MN": "Mean",
    "PC:P1": "10th Percentile",
    "PC:P2": "25th Percentile",
    "PC:P5": "50th Percentile",
    "PC:P7": "75th Percentile",
    "PC:P9": "90th Percentile"
}

marker_colors = {
    "MN:MN": px.colors.qualitative.Prism[10],
    "PC:P1": px.colors.qualitative.Prism[7],
    "PC:P2": px.colors.qualitative.Prism[5],
    "PC:P5": px.colors.qualitative.Prism[4],
    "PC:P7": px.colors.qualitative.Prism[1],
    "PC:P9": px.colors.qualitative.Prism[0]
}


def build_frame3(subject="Mathematics",factor="Percent of Teachers Absent on average day", level=level):
    level = level
    variable = factor
    subject = subject
    subscale = subscales[subject]
    url = f"https://www.nationsreportcard.gov/Dataservice/GetAdhocData.aspx?type=data" + f"&subject={subject.lower()}" + f"&grade={grade}" + f"&subscale={subscale}" + f"&variable={variable}" + f"&jurisdiction={level}" + f"&stattype={stattype}" + f"&year={year}"
    print(url)
    response = rq.urlopen(url)
    response_string = response.read().decode('utf-8')
    try:
        df = pd.DataFrame(json.loads(response_string)['result'])
        return df
    except:
        return response_string

def build_figure3(df):
    traces = []
    for stat in stattypes.keys():
        trace = go.Bar(
            x=df.loc[df['stattype']==stat]["varValueLabel"], 
            y=df.loc[df['stattype']==stat]["value"], 
            name=stattypes[stat], 
            marker={"color": marker_colors[stat]}
        )
        traces.append(trace)
    return traces

df3 = build_frame3(subject,variable)
fw3 = go.FigureWidget(data=build_figure3(df3),layout=go.Layout(barmode="group",title=f"{grade}th Grade {subject} NAEP Scores and {variable}, {year}, {dict(zip(levels.values(), levels.keys()))[level]}"))


def update_figure3(change):
    subject = subject_widget.value
    variable = variable_selector.value
    level = level_selector.value
    #what happens when the API complains?
    df = build_frame3(subject,year, level)
    with fw.batch_update():
        for i, stat in enumerate(stattypes):
            fw.data[i].x=df.loc[df['stattype']==stat]['varValueLabel']
            fw.data[i].y=df.loc[df['stattype']==stat]['value']
        fw.update_layout(title=f"{grade}th Grade {subject} NAEP Scores and {factor}, {year}, {dict(zip(levels.values(), levels.keys()))[level]}")
    
variable_selector = widgets.Dropdown(options=variables,description="Variable:")


level_selector.observe(update_figure3, 'value')
subject_widget.observe(update_figure3, 'value')
variable_selector.observe(update_figure3, 'value')


container = widgets.VBox([widgets.HBox([level_selector, subject_widget,variable_selector]),fw3])
container

# interact(update_figure,subject=subject_widget,year=year_widget);

### With the understanding that higher funding does not neccesarily lead to better outcomes, care must be taken to understand how the money is being spent.

#### One of the biggest takeaways from this project was the suprise that some schools are spending around 500,000 dollars per year on an individual student. However, even with more than 10x the funding, these schools are not performing 10x as well.

#### Ultimately, policy makers need to have better information about how schools budgets are broken up. Publicly available data would be more useful if it included additonal data points regarding how the money was spent. As is, it is very difficult to authoritatively discern which factors do and do not contribute to student outcomes.


#TO DO LIST
1. Put this into a binder
    a. will the collapsed fields stay collapsed, or do we need another solution?
2. Plan presentation for class
    1. Background on your ethical topic, including a debate on the various dimensions/sides of it. 
        a. What is a quick overview of this issue? 
            We wanted to establish a link between educational outcomes and relative funding and poverty levels to help inform policy making individuals about the best ways to support education.
        b. Why should we be addressing this issue?  (Why is it important?) 
            This is important because we are invested in ensuring a quality education for all students, regardless of their income levels.
        c. What are the ethical concerns? 
            The conern is that, as a society, our education system is biased and provides much better service for individuals of means when compared to folks living in poverty.
        d. What are the ethical justifications for both sides? 
            While one side claims that all children have an equal right to education, the other side affirms their right to provide the best education possible to them, using whatever means they have at their disposal.
        e. What are the possible solutions? 
            While there are no easy answers to this debate, the truth is that funding does not automatically lead to better results. Instead, the decisions made within school districts about how to spend the money may be more informative. 
    2. References that informed your debate. 
        We took information from the NAEP (the Nation's Report Card) and from the US Census Bureau to help us make our case.
    3. Introduction of your software solution. 
        a. How does your software solution relate to the ethical topic? 
            This software makes it easy for a user to look at the disparities in funding and educational outcomes in our nation at a variety of levels. It invites thought and discussion about how to best fund our schools, and encourages policy makers to think outside of the box when creating solutions.
        b. A demonstration of how the software can be used to weigh in on aspects of the ethical topic; perhaps reinforcing the debate in #1. 

    4. Design of your software. 
        a. A deeper dive into interesting aspects of the code, the dataset, or the development process that would be educational to others learning about coding in Python, coding with data, or coding for ethics. 
            Plotly has a bunch of cool stuff. You can also see how we had to manipulate the data to make merge the two sets.
        b. Show us the most complex portions of your code and explain the implementation. 
            See visualization #3, function update_poverty()