In [5]:
import sys
import os
from collections import defaultdict
import numpy as np
import pandas as pd
import re

In [6]:
cs_file = r"data\input\gradcafe_data-master\cs\cs_clean.csv"

df = pd.read_csv(cs_file, header=None)

all_columns = ["rowid", "uni_name", "major", "degree", "season", "decision",
               "decision_method", "decision_date", "decision_timestamp", 
               "ugrad_gpa", "gre_verbal", "gre_quant", "gre_writing", "is_new_gre", "gre_subject", 
               "status", "post_data", "post_timestamp", "comments"]

df.columns = all_columns

In [7]:
columns = ['uni_name', 'major', 'degree', 'season', 'decision', 'ugrad_gpa', 'gre_verbal', 'gre_quant', 
           'gre_writing', 'is_new_gre', 'gre_subject', 'status']

df.head()

Unnamed: 0,rowid,uni_name,major,degree,season,decision,decision_method,decision_date,decision_timestamp,ugrad_gpa,gre_verbal,gre_quant,gre_writing,is_new_gre,gre_subject,status,post_data,post_timestamp,comments
0,0,Tufts University,CS,PhD,S16,Accepted,E-mail,16-11-2015,1447650000.0,3.6,166.0,163.0,4.5,True,,American,16-11-2015,1447650000,Met with professor beforehand. 1 year academic...
1,1,SUNY Stony Brook,CS,MS,S16,Rejected,E-mail,14-11-2015,1447477000.0,3.5,144.0,167.0,3.0,True,,International,14-11-2015,1447477200,non CS background
2,2,Columbia University,CS,MS,S16,Other,Other,14-11-2015,1447477000.0,,,,,,,International,14-11-2015,1447477200,So there was no actual Nov 15 deadline? Or did...
3,3,Columbia University,CS,PhD,S16,Other,Other,13-11-2015,1447391000.0,,,,,,,International,13-11-2015,1447390800,I haven't found the website with the deadline ...
4,4,Columbia University,CS,MS,S16,Other,Other,13-11-2015,1447391000.0,,,,,,,Other,13-11-2015,1447390800,Thanks guys! Good to know I'm not the only one...


In [8]:
df.describe()

Unnamed: 0,rowid,decision_timestamp,ugrad_gpa,gre_verbal,gre_quant,gre_writing,gre_subject,post_timestamp
count,27822.0,27805.0,7222.0,8493.0,8493.0,8248.0,262.0,27822.0
mean,13910.5,1325873000.0,3.698129,288.676439,343.097021,3.863227,800.305344,1326125000.0
std,8031.663931,78625550.0,0.646575,213.479164,277.912011,0.725768,90.468673,78422950.0
min,0.0,401342400.0,0.9,133.0,136.0,2.0,200.0,1139375000.0
25%,6955.25,1267679000.0,3.5,154.0,163.0,3.5,750.0,1267679000.0
50%,13910.5,1331096000.0,3.7,161.0,168.0,4.0,820.0,1331226000.0
75%,20865.75,1394510000.0,3.87,480.0,740.0,4.5,860.0,1394770000.0
max,27821.0,1576386000.0,9.99,800.0,800.0,6.0,990.0,1447650000.0


In [9]:
def print_admission_stat(tmp):
    print("Undergrad GPA")
    print("-"*30)
    print("Min: {}".format(tmp.ugrad_gpa.min()))
    print("Max: {}".format(tmp.ugrad_gpa.max()))
    print("25%: {}".format(tmp.ugrad_gpa.quantile(0.25)))
    print("50%: {}".format(tmp.ugrad_gpa.quantile(0.5)))
    print("75%: {}".format(tmp.ugrad_gpa.quantile(0.75)))
    print("Mean: {}".format(tmp.ugrad_gpa.mean()))
    print("Std: {}".format(tmp.ugrad_gpa.std()))

    print("\nGRE Verbal")
    print("-"*30)
    print("Min: {}".format(tmp.gre_verbal.min()))
    print("Max: {}".format(tmp.gre_verbal.max()))
    print("25%: {}".format(tmp.gre_verbal.quantile(0.25)))
    print("50%: {}".format(tmp.gre_verbal.quantile(0.5)))
    print("75%: {}".format(tmp.gre_verbal.quantile(0.75)))
    print("Mean: {}".format(tmp.gre_verbal.mean()))
    print("Std: {}".format(tmp.gre_verbal.std()))

    print("\nGRE Quant")
    print("-"*30)
    print("Min: {}".format(tmp.gre_quant.min()))
    print("Max: {}".format(tmp.gre_quant.max()))
    print("25%: {}".format(tmp.gre_quant.quantile(0.25)))
    print("50%: {}".format(tmp.gre_quant.quantile(0.50)))
    print("75%: {}".format(tmp.gre_quant.quantile(0.75)))
    print("Mean: {}".format(tmp.gre_quant.mean()))
    print("Std: {}".format(tmp.gre_quant.std()))

    print("\nGRE Writing")
    print("-"*30)
    print("Min: {}".format(tmp.gre_writing.min()))
    print("Max: {}".format(tmp.gre_writing.max()))
    print("25%: {}".format(tmp.gre_writing.quantile(0.25)))
    print("50%: {}".format(tmp.gre_writing.quantile(0.50)))
    print("75%: {}".format(tmp.gre_writing.quantile(0.75)))
    print("Mean: {}".format(tmp.gre_writing.mean()))
    print("Std: {}".format(tmp.gre_writing.std()))

In [16]:
def admission_stat_p(tmp, my_ugrad_gpa, my_gre_verbal, my_gre_quant, my_gre_writing, x):
    ugrad_gpa_min = tmp.ugrad_gpa.min()
    ugrad_gpa_max = tmp.ugrad_gpa.max()
    ugrad_gpa_x = tmp.ugrad_gpa.quantile(x/100)
    ugrad_gpa_25 = tmp.ugrad_gpa.quantile(0.25)
    ugrad_gpa_50 = tmp.ugrad_gpa.quantile(0.5)
    ugrad_gpa_75 = tmp.ugrad_gpa.quantile(0.75)
    ugrad_gpa_mean = tmp.ugrad_gpa.mean()
    ugrad_gpa_std = tmp.ugrad_gpa.std()


    gre_verbal_min = tmp.gre_verbal.min()
    gre_verbal_max = tmp.gre_verbal.max()
    gre_verbal_x = tmp.gre_verbal.quantile(x/100)
    gre_verbal_25 = tmp.gre_verbal.quantile(0.25)
    gre_verbal_50 = tmp.gre_verbal.quantile(0.5)
    gre_verbal_75 = tmp.gre_verbal.quantile(0.75)
    gre_verbal_mean = tmp.gre_verbal.mean()
    gre_verbal_std = tmp.gre_verbal.std()


    gre_quant_min = tmp.gre_quant.min()
    gre_quant_max = tmp.gre_quant.max()
    gre_quant_x = tmp.gre_quant.quantile(x/100)
    gre_quant_25 = tmp.gre_quant.quantile(0.25)
    gre_quant_50 = tmp.gre_quant.quantile(0.50)
    gre_quant_75 = tmp.gre_quant.quantile(0.75)
    gre_quant_mean = tmp.gre_quant.mean()
    gre_quant_std = tmp.gre_quant.std()

    gre_writing_min = tmp.gre_writing.min()
    gre_writing_max = tmp.gre_writing.max()
    gre_writing_x = tmp.gre_writing.quantile(x/100)
    gre_writing_25 = tmp.gre_writing.quantile(0.25)
    gre_writing_50 = tmp.gre_writing.quantile(0.50)
    gre_writing_75 = tmp.gre_writing.quantile(0.75)
    gre_writing_mean = tmp.gre_writing.mean()
    gre_writing_std = tmp.gre_writing.std()

        
        
#     if (my_ugrad_gpa>=ugrad_gpa_x) \
#     and (my_gre_verbal>=gre_verbal_x) \
#     and (my_gre_quant>=gre_quant_x) \
#     and (my_gre_writing>=gre_writing_x):
#         print("Above ", x, "%")

    if (my_ugrad_gpa>=ugrad_gpa_75) \
    and (my_gre_verbal>=gre_verbal_75) \
    and (my_gre_quant>=gre_quant_75) \
    and (my_gre_writing>=gre_writing_75):
        # print("Above 75%")
        return 75

        
    if (my_ugrad_gpa>=ugrad_gpa_50) \
    and (my_gre_verbal>=gre_verbal_50) \
    and (my_gre_quant>=gre_quant_50) \
    and (my_gre_writing>=gre_writing_50):
        # print("Above 50%")
        return 50
    
    if (my_ugrad_gpa>=ugrad_gpa_mean) \
    and (my_gre_verbal>=gre_verbal_mean) \
    and (my_gre_quant>=gre_quant_mean) \
    and (my_gre_writing>=gre_writing_mean):
#         print("Above Average")
        return 50.0
        
    if (my_ugrad_gpa>=ugrad_gpa_25) \
    and (my_gre_verbal>=gre_verbal_25) \
    and (my_gre_quant>=gre_quant_25) \
    and (my_gre_writing>=gre_writing_25):
        # print("Above 25%")
        return 25
    

        
    if (my_ugrad_gpa>=ugrad_gpa_min) \
    and (my_gre_verbal>=gre_verbal_min) \
    and (my_gre_quant>=gre_quant_min) \
    and (my_gre_writing>=gre_writing_min):
#         print("Above minimum")
        return 1
#     else:
#         print("not above min")
    return 0

In [107]:
from scipy import stats


def admission_stat_percent(tmp, my_ugrad_gpa, my_gre_verbal, my_gre_quant, my_gre_writing):
    
    ugrad_gpa_min = tmp.ugrad_gpa.min()
    ugrad_gpa_max = tmp.ugrad_gpa.max()
    ugrad_gpa_x = stats.percentileofscore(tmp.ugrad_gpa, my_ugrad_gpa)
    ugrad_gpa_mean = tmp.ugrad_gpa.mean()
    ugrad_gpa_std = tmp.ugrad_gpa.std()


    gre_verbal_min = tmp.gre_verbal.min()
    gre_verbal_max = tmp.gre_verbal.max()
    gre_verbal_x = stats.percentileofscore(tmp.gre_verbal, my_gre_verbal)
    gre_verbal_mean = tmp.gre_verbal.mean()
    gre_verbal_std = tmp.gre_verbal.std()


    gre_quant_min = tmp.gre_quant.min()
    gre_quant_max = tmp.gre_quant.max()
    gre_quant_x = stats.percentileofscore(tmp.gre_quant, my_gre_quant)
    gre_quant_mean = tmp.gre_quant.mean()
    gre_quant_std = tmp.gre_quant.std()

    gre_writing_min = tmp.gre_writing.min()
    gre_writing_max = tmp.gre_writing.max()
    gre_writing_x = stats.percentileofscore(tmp.gre_writing, my_gre_writing)
    gre_writing_mean = tmp.gre_writing.mean()
    gre_writing_std = tmp.gre_writing.std()
    
    
    return ugrad_gpa_x, gre_verbal_x, gre_quant_x, gre_writing_x

In [108]:
status_list = ['American', 'International', 'International with US Degree', 'Other', np.nan]
decision_list = ['Accepted', 'Interview', 'Other', 'Rejected', 'Wait listed', np.nan]
uni_names = list(set(df.uni_name))
uni_names.sort()

In [178]:
# uni_name = "Stanford University"
major = "CS"
degree = "PhD"
decision = "Accepted"
is_new_gre = True
status = 'International with US Degree'
season = "F15"



my_ugrad_gpa = 3.8
my_gre_verbal = 152
my_gre_quant = 162
my_gre_writing = 3.0

In [179]:
details = True
for uni in uni_names:
    tmp=df[(df["uni_name"]==uni)
          & (df["major"]==major)
          & (df["degree"]==degree)
          & (df["decision"]==decision)
          & (df["is_new_gre"]==is_new_gre)
#           & (df["status"]==status)
#           & (df["season"]==season)
          ]
    tmp=tmp[(tmp["ugrad_gpa"]>=0)
          & (tmp["ugrad_gpa"]<=4.0)
          & (tmp["gre_verbal"]>=130)
          & (tmp["gre_verbal"]<=170)
          & (tmp["gre_quant"]>=130)
          & (tmp["gre_quant"]<=170)
          & (tmp["gre_writing"]>=0.0)
          & (tmp["gre_writing"]<=6.0)
          ]
    

    ugrad_gpa_x, gre_verbal_x, gre_quant_x, gre_writing_x = admission_stat_percent(tmp, my_ugrad_gpa, my_gre_verbal, my_gre_quant, my_gre_writing)
    
    
    ugrad_gpa_cut_off = 10
    gre_verbal_cut_off = 10
    gre_quant_cut_off = 10
    gre_writing_cut_off = 10
    
    if len(tmp)>0 \
    and (ugrad_gpa_x>=ugrad_gpa_cut_off and ugrad_gpa_x<=100) \
    and (gre_verbal_x>=gre_verbal_cut_off and gre_verbal_x<=100) \
    and (gre_quant_x>=gre_quant_cut_off and gre_quant_x<=100) \
    and (gre_writing_x>=gre_writing_cut_off and gre_writing_x<=100):
        
        if details and len(tmp)>5:
            print(uni)
            print("-"*50)
            print("Count          : {}      ".format(len(tmp)))
            print("-"*50)
            print("Undergrad GPA  : {0:.2f}%".format(ugrad_gpa_x))
            print("GRE Verbal     : {0:.2f}%".format(gre_verbal_x))
            print("GRE Quant      : {0:.2f}%".format(gre_quant_x))
            print("GRE Writing    : {0:.2f}%".format(gre_writing_x))
            print("-"*50)
            print("\n\n")
    

Cornell University
--------------------------------------------------
Count          : 14      
--------------------------------------------------
Undergrad GPA  : 21.43%
GRE Verbal     : 14.29%
GRE Quant      : 14.29%
GRE Writing    : 10.71%
--------------------------------------------------



Duke University
--------------------------------------------------
Count          : 6      
--------------------------------------------------
Undergrad GPA  : 66.67%
GRE Verbal     : 33.33%
GRE Quant      : 50.00%
GRE Writing    : 16.67%
--------------------------------------------------



Ecole Polytechnique Federale De Lausanne (EPFL)
--------------------------------------------------
Count          : 8      
--------------------------------------------------
Undergrad GPA  : 62.50%
GRE Verbal     : 62.50%
GRE Quant      : 43.75%
GRE Writing    : 37.50%
--------------------------------------------------



George Mason University (GMU)
--------------------------------------------------
Coun

University Of Utah (UoU)
--------------------------------------------------
Count          : 7      
--------------------------------------------------
Undergrad GPA  : 71.43%
GRE Verbal     : 57.14%
GRE Quant      : 35.71%
GRE Writing    : 14.29%
--------------------------------------------------



Virginia Tech
--------------------------------------------------
Count          : 9      
--------------------------------------------------
Undergrad GPA  : 66.67%
GRE Verbal     : 44.44%
GRE Quant      : 33.33%
GRE Writing    : 33.33%
--------------------------------------------------



Worcester Polythchnic Institute (WPI)
--------------------------------------------------
Count          : 7      
--------------------------------------------------
Undergrad GPA  : 85.71%
GRE Verbal     : 35.71%
GRE Quant      : 28.57%
GRE Writing    : 35.71%
--------------------------------------------------





In [138]:
a=stats.percentileofscore([1,2,3,4,5,6], 3)
a

50.0