In [27]:
import pandas as pd

df = pd.read_excel('../data/states-score-data.xlsx', "2. Data")

In [28]:
categories = filter(lambda x: not 'Unnamed' in x, df.columns)
indicatorGroups = list(categories)
indicatorGroups

['Launching Future Ready Youth',
 'Navigating into Adulthood 1: Postsecondary outcomes',
 'Navigating into Adulthood 2: Young Adults Deserve a Brighter Future (19-27yrs, 2021)',
 'Social mobility 2021 (34-36yrs)',
 'OVERALL SCORE']

In [29]:
data = df.drop([0, 1])
data.columns = df.iloc[1]
newColNames = list(data.columns)
for i in range(len(newColNames)):
    if "CR Score (Standardized)" in newColNames[i]:
        newColNames[i] = "CR Score (Standardized)"

data.columns = newColNames

In [30]:
colMap = {
    "50 States": "states",
    "RANK": "rank",
    "Rank": "rank",
    "Rank (based on CR score 1 and 2)": "rank_cr12",
    "Rank (based on CR score 3)": "rank_cr3",
    "Elementary/Middle School Counselor Ratio 2020-21": "ele_mid_counselor_ratio",
    "High School Counselor Ratio 2020-21": "high_counselor_ratio",
    "AP Test Performance of 3 or Higher (2020) (%)": "ap",
    "Fafsa Completion Rate (%) 2021-22": "fafsa",
    "HS Completion Rate (%) 2018-19 (Cohort based)": "hs_completion",
    "Post HS Placement - college (%) 2018-19 (Cohort based)": "post_hs_college",
    "Retention rate PT (%) 2019-20": "retention_pt",
    "Retention rate FT (%) 2019-20": "retention_ft",
    "PS completion (2 years) (%) 2019-20": "ps_completion_2y",
    "PS completion (4 years) (%) 2019-20": "ps_completion_4y",
    "Disconnected youth (%)": "disconnected",
    "Median hourly wage of Non-NEET youth($)": "non_neet_wage",
    "Non-NEET earning more than state's living wage (%)": "non_neet_living_wage",
    "Median hourly wage of all adult ($)": "adult_wage",
    "Adults working full time (1820h) (%)": "adult_full_time",
    "Full time adults earning more than MIT wage (%)": "adult_mit_wage",
    "CR Score (Standardized)": "cr_score",
    "CR Score (0~100 score)": "cr_score100",
    "CR Score 1 (average of 4 standardized scores)": "cr_score1",
    "CR Score 2 (0~100 score from CR score 1)": "cr_score2",
    "CR Score 3 (0~100 score by averaging four 0~100 scores)": "cr_score3",
}

In [31]:
data.rename(columns=colMap, inplace=True)

### Split data into different catagories

In [32]:
dfReadyYouth = data.iloc[:, 0:10]
dfAdulthood1 = data.iloc[:, ([0] + list(range(10, 17)))]
dfAdulthood2 = data.iloc[:, ([0] + list(range(17, 23)))]
dfSocial = data.iloc[:, ([0] + list(range(23, 29)))]
dfOverall = data.iloc[:, ([0] + list(range(29, 34)))]


In [33]:
dfReadyYouth.head()

Unnamed: 0,states,rank,ele_mid_counselor_ratio,high_counselor_ratio,ap,fafsa,hs_completion,post_hs_college,cr_score,cr_score100
2,Massachusetts,4,546.6,191.15,0.73,0.693,0.88,0.68,0.780322,85.960714
3,Connecticut,3,533.27,182.07,0.75,0.7,0.89,0.71,0.997157,95.082895
4,New Jersey,5,1048.43,296.13,0.75,0.729,0.91,0.68,0.643388,80.199971
5,New Hampshire,1,215.44,159.67,0.74,0.643,0.88,0.6,1.114037,100.0
6,Rhode Island,11,1486.21,160.94,0.65,0.711,0.84,0.68,0.375622,68.935111


In [34]:
def getProp(df):
    prop = {}
    for state in data["states"].tolist():
        prop[state] = {}
        for col in df.columns[1:]:
            prop[state][col] = df[df["states"] == state][col].tolist()[0]

    return prop

propReadyYouth = getProp(dfReadyYouth)
propAdultHood1 = getProp(dfAdulthood1)
propAdultHood2 = getProp(dfAdulthood2)
propSocial = getProp(dfSocial)
propOverall = getProp(dfOverall)

In [36]:
props = {}
for state in data["states"].tolist():
    props[state] = {
        "name": state,
        "youth": propReadyYouth[state],
        "adulthood1": propAdultHood1[state],
        "adulthood2": propAdultHood2[state],
        "social": propSocial[state],
        "overall": propOverall[state]
    }


### Get States Polygon Coordinates

In [37]:
import json

statesCoors = {}
file = open("../data/states-coors.json", "r")
jsonData = json.load(file)

for feat in jsonData["features"]:
    statesCoors[feat["properties"]["NAME"]] = feat["geometry"]


### Convert Data to GeoJSON

In [38]:
geoJson = {"type": "FeatureCollection", "features": []}

for state in data["states"].tolist():
    geoJson["features"].append({
        "type": "Feature",
        "properties": props[state],
        "geometry": statesCoors[state]
    })


In [40]:
with open("../data/states-careers-score.json", "w") as outfile:
    json.dump(geoJson, outfile)