### Structure data for web tool

This is just a quick and dirty approach because deadline looms. We're going to go with the same structure as last year's table, so I need to take a spreadsheet the way I've formatted up until this point ("from-google.csv") and quickly convert it to the proper JSON structure, so that if I have to make any more manual edits to the spreadshseet, I can quickly update the graphic.

In [1]:
import pandas as pd
import json

In [2]:
# These are the columns the viz needs
columns = ['School', 
           'Applying', # this shoul really be "grade"
           'Residence', # Hartford or Suburban
           'Odds (%)',
           'Number of Applicants', 
           'Offers'] # As a string

# This means I need to break out hartford versus suburban
# and make a row for each, and discard the overall number

In [3]:
from_google = pd.read_csv("from-google.csv")
from_google.head()

Unnamed: 0,operator,school,grade,n_apps,n_apps_hartford,n_apps_suburban,n_offers,n_offers_hartford,n_offers_suburban
0,BPS,Global Experience Magnet,10,9.0,,,11.0,,11.0
1,BPS,Global Experience Magnet,11,,,,,,
2,BPS,Global Experience Magnet,12,,,,,,
3,BPS,Global Experience Magnet,6,32.0,7.0,25.0,72.0,20.0,52.0
4,BPS,Global Experience Magnet,7,13.0,,,9.0,,9.0


In [4]:
import numpy as np

In [5]:
def get_group(group_name=""):
    ret = from_google[["school",
                      "grade",
                      "n_apps_" + group_name,
                      "n_offers_" + group_name]].copy()

    ret["residence"] = group_name.title()#.replace("Suburban","Suburbs")
    
    ret = ret[["school",
               "grade",
               "residence",
               "n_apps_" + group_name,
               "n_offers_" + group_name]]
    
    def get_odds(row):
        offers = row["n_offers_" + group_name]
        apps = row["n_apps_" + group_name] 

        if pd.isnull(offers) or pd.isnull(apps):
            return np.NaN
        
        # Making data all numeric so it sorts properly in the web table
        # old line:
        #return str(min(int(offers * 100 / apps),100)) + "%"
        # new line:
        return min(int(offers * 100 / apps),100)
    
    ret["odds"] = ret.apply(get_odds,axis=1)
    
    #ret["odds"] = ret.apply(lambda["n_offers_" + group_name] * 100 / ret["n_apps_" + group_name]        
    
    #ret["odds"] = ret["odds"].apply(lambda x: str(int(min(100, x))) + "%")
                                                        
    for c in ["n_apps_" + group_name,
              "n_offers_" + group_name,
              "odds"]:
        
        # leave data numeric so it sorts properly in the web table
        # old line:
        #ret[c] = ret[c].fillna("--")
        # new line:
        pass
        
        
    """columns = ['School', 
           ' Applying', # this shoul really be "grade"
           'Residence', # Hartford or Suburban
           'Odds'
           'Number of Applicants', 
           'Offers'] # As a string
    """
    
    #print ret.columns
        
    new_cols = ["school",
               "grade",
               "residence",
               "odds",
              "n_apps_" + group_name,
              "n_offers_" + group_name]
    
    #print new_cols 
    
    #print columns

    ret = ret[new_cols]
        
    ret.columns = columns
    
    return ret #.sort_values(by=["School","Applying","Residence"])

get_group("hartford")

Unnamed: 0,School,Applying,Residence,Odds (%),Number of Applicants,Offers
0,Global Experience Magnet,10,Hartford,,,
1,Global Experience Magnet,11,Hartford,,,
2,Global Experience Magnet,12,Hartford,,,
3,Global Experience Magnet,6,Hartford,100.0,7.0,20.0
4,Global Experience Magnet,7,Hartford,,,
5,Global Experience Magnet,8,Hartford,,,
6,Global Experience Magnet,9,Hartford,100.0,10.0,11.0
7,Wintonbury Early Childhood Magnet,K,Hartford,,,
8,Wintonbury Early Childhood Magnet,PK3,Hartford,100.0,9.0,33.0
9,Wintonbury Early Childhood Magnet,PK4,Hartford,,,


In [6]:
def to_arrays(d):
    return json.loads(d.to_json(orient="values"))

to_arrays(get_group("hartford"))

[[u'Global Experience Magnet', u'10', u'Hartford', None, None, None],
 [u'Global Experience Magnet', u'11', u'Hartford', None, None, None],
 [u'Global Experience Magnet', u'12', u'Hartford', None, None, None],
 [u'Global Experience Magnet', u'6', u'Hartford', 100.0, 7.0, 20.0],
 [u'Global Experience Magnet', u'7', u'Hartford', None, None, None],
 [u'Global Experience Magnet', u'8', u'Hartford', None, None, None],
 [u'Global Experience Magnet', u'9', u'Hartford', 100.0, 10.0, 11.0],
 [u'Wintonbury Early Childhood Magnet', u'K', u'Hartford', None, None, None],
 [u'Wintonbury Early Childhood Magnet', u'PK3', u'Hartford', 100.0, 9.0, 33.0],
 [u'Wintonbury Early Childhood Magnet', u'PK4', u'Hartford', None, None, None],
 [u'Academy of Aerospace and Engineering',
  u'1',
  u'Hartford',
  None,
  27.0,
  None],
 [u'Academy of Aerospace and Engineering',
  u'10',
  u'Hartford',
  None,
  19.0,
  None],
 [u'Academy of Aerospace and Engineering',
  u'11',
  u'Hartford',
  None,
  15.0,
  None],


### Load the old data

In [7]:

LAST_YEARS = LAST_YEARS = "last_years.json"

obj = json.loads(open(LAST_YEARS).read())
#obj["column_names"][1]["title"] = "Grade level"

obj["col"] = "3"
obj["desc_asc"] = "desc"

obj["column_names"] = [{u'title': u'School'},
  {u'title': 'Grade level'},
  {u'title': u'Residence'},
  {u'title': u'Odds (%)'},
  {u'title': u'Applicants'},
  {u'title': u'Offers'}]

# obj["headline"] = "The odds: School choice lottery results, 2017-18"
# obj["subhead"] = "When the number of offers outnumber applicants, "\
# + "that means some students who chose that school for a second or third "\
# + "choice were offered enrollment. The number of applicants listed "\
# + "below only counts students for which that school was the first choice."

obj["headline"] = "Look up the odds yourself"
obj["subhead"] = ""
obj["height"] = "250"

old_data = obj["data"]
obj["data"] = None


In [8]:
obj

{u'byline': u'www.CtMirror.org',
 u'col': '3',
 u'column_names': [{u'title': u'School'},
  {u'title': 'Grade level'},
  {u'title': u'Residence'},
  {u'title': u'Odds (%)'},
  {u'title': u'Applicants'},
  {u'title': u'Offers'}],
 u'data': None,
 u'desc_asc': 'desc',
 u'headline': 'Look up the odds yourself',
 u'height': '250',
 u'paging': False,
 u'sourceline': u'CT State Department of Education',
 u'subhead': ''}

In [9]:
# Here's the old data, looks pretty much like the one we just built...

old_data

[[u'Classical Magnet School ', u'12', u'Suburban', u'1', u'11', u'100%'],
 [u'International Magnet School for Global Citizenship ',
  u'2',
  u'Hartford',
  u'2',
  u'19',
  u'100%'],
 [u'International Magnet School for Global Citizenship ',
  u'3',
  u'Hartford',
  u'1',
  u'9',
  u'100%'],
 [u'Journalism and Media Academy Magnet School',
  u'11',
  u'Suburban',
  u'3',
  u'20',
  u'100%'],
 [u'Metropolitan Learning Center Global/International Studies',
  u'10',
  u'Suburban',
  u'4',
  u'22',
  u'100%'],
 [u'Journalism and Media Academy Magnet School',
  u'9',
  u'Suburban',
  u'12',
  u'63',
  u'100%'],
 [u'CREC Greater Hartford Academy of the Arts [Half-Day]',
  u'10',
  u'Hartford',
  u'2',
  u'10',
  u'100%'],
 [u'International Magnet School for Global Citizenship ',
  u'K',
  u'Hartford',
  u'5',
  u'25',
  u'100%'],
 [u'Global Experience Magnet School', u'12', u'Hartford', u'2', u'9', u'100%'],
 [u'Global Experience Magnet School', u'11', u'Hartford', u'1', u'4', u'100%'],
 [u'

In [10]:
#obj["data"] = to_arrays(get_group("hartford")) + to_arrays(get_group("suburban"))
obj["data"] = to_arrays(pd.concat([get_group("hartford"),get_group("suburban")]).sort_values(by=["School","Applying","Residence"]))


In [11]:
obj

{u'byline': u'www.CtMirror.org',
 u'col': '3',
 u'column_names': [{u'title': u'School'},
  {u'title': 'Grade level'},
  {u'title': u'Residence'},
  {u'title': u'Odds (%)'},
  {u'title': u'Applicants'},
  {u'title': u'Offers'}],
 u'data': [[u'Academy of Aerospace and Engineering',
   u'1',
   u'Hartford',
   None,
   27.0,
   None],
  [u'Academy of Aerospace and Engineering',
   u'1',
   u'Suburban',
   None,
   149.0,
   None],
  [u'Academy of Aerospace and Engineering',
   u'10',
   u'Hartford',
   None,
   19.0,
   None],
  [u'Academy of Aerospace and Engineering',
   u'10',
   u'Suburban',
   None,
   73.0,
   None],
  [u'Academy of Aerospace and Engineering',
   u'11',
   u'Hartford',
   None,
   15.0,
   None],
  [u'Academy of Aerospace and Engineering',
   u'11',
   u'Suburban',
   None,
   31.0,
   None],
  [u'Academy of Aerospace and Engineering',
   u'12',
   u'Hartford',
   None,
   None,
   None],
  [u'Academy of Aerospace and Engineering',
   u'12',
   u'Suburban',
   None,

In [12]:
#open("TheOdds-2018-12345.js","w").write(json.dumps(obj))
open("TheOdds-2018-123456.js","w").write(json.dumps(obj))

In [13]:
from_google.sum()

operator             BPSBPSBPSBPSBPSBPSBPSBPSBPSBPSCRECCRECCRECCREC...
school               Global Experience MagnetGlobal Experience Magn...
grade                1011126789KPK3PK411011122346789KPK3PK410111267...
n_apps                                                           19076
n_apps_hartford                                                   5657
n_apps_suburban                                                  12279
n_offers                                                          7446
n_offers_hartford                                                 2459
n_offers_suburban                                                 3755
dtype: object

In [14]:
from_google["operator"].value_counts()

HPS        152
CREC       111
Goodwin     11
BPS         10
EHPS         2
Name: operator, dtype: int64