In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

In [None]:
education=(pd.read_csv('states_all.csv')
.assign(
    expenditure_per_student=lambda x:x['TOTAL_EXPENDITURE']/x['GRADES_ALL_G'],
    above_avg_math8=lambda x: np.where(
        x['AVG_MATH_8_SCORE']>x['AVG_MATH_8_SCORE'].mean(), 'Above Avg', 'Below Avg'
    )
))

education

In [None]:
# graphs using graph object

fig=go.Figure()

fig.add_scatter(
    x=education.loc[education['above_avg_math8']=='Below Avg','AVG_MATH_8_SCORE'],
    y=education.loc[education['above_avg_math8']=='Below Avg','AVG_READING_8_SCORE'],
    mode="markers",
    marker={'color':'orange'},
    name='Below Avg'
)

fig.add_scatter(
    x=education.loc[education['above_avg_math8']=='Above Avg','AVG_MATH_8_SCORE'],
    y=education.loc[education['above_avg_math8']=='Above Avg','AVG_READING_8_SCORE'],
    mode="markers",
    marker={'color':'green'},
    name='Above Avg'
)

fig.layout.title='Relationship between Math, Reading Scores & Student Funding'
fig.layout.legend.title="Funding"
fig.layout.xaxis.title='Math'
fig.layout.yaxis.title='Reading'

fig.show()

In [None]:
# same graph using plotly express

px.scatter(
    education,
    x='AVG_MATH_8_SCORE',
    y='AVG_READING_8_SCORE',
    color="above_avg_math8",
    title="Relationship between Math, Reading Scores and student funding.",
    color_discrete_map={
        "Below Avg":"orange",
        "Above Avg":"green"
    },
    labels={
        "AVG_MATH_8_SCORE":"Math",
        "AVG_READING_8_SCORE":"Reading"
    }
).update_layout(
    # legend_title="Funding",
    font_color="Green",
    legend_title={
        "text":"Funding",
        "font":{
            "color":"Red",
            "family":"Lato"
        }
    }
)

In [None]:
# line chart
px.line(
    education
    .query("1992 < YEAR< 2017")
    .groupby('YEAR', as_index=False).sum(),
    x='YEAR',
    y='TOTAL_REVENUE',
    title="EDUCATION REVENUE OVER TIME"
)

In [6]:
# multiple line charts

px.line(
    education
    .query("1992 < YEAR < 2017 and STATE in ['CALIFORNIA','MASSACHUSETTS'] ")
    .groupby(['YEAR','STATE'], as_index=False).sum(),
    x='YEAR',
    y='TOTAL_REVENUE',
    color='STATE',
    title='EDUCATION REVENUE OVER TIME'
)