In [2]:
import biogeme.database as db
import biogeme.biogeme as bio
import biogeme.models as models
from biogeme.expressions import Beta, DefineVariable
import pandas as pd
import math
import csv

df = pd.read_csv('../../Data/SMTO_2015/SMTO_2015_Complete_Input.csv')
df = df[df['Level'] != 'Other']
school_codes = ['SG', 'SC', 'MI', 'YK','YG', 'RY','OC']
df['School'] = df['School'].apply(lambda x: school_codes.index(x))

for code in school_codes:
    df['Enrol.' + code] = df.apply(lambda x: x['UG.' + code] if x['Level'] == 'UG' else x['Grad.' + code], axis=1)
df.columns
full_df = df.copy()

In [8]:
def run_model(name, with_ASCs, combined = True):
    database = db.Database("SMTO", df.select_dtypes(include = 'number'))
    ASCs, V, av = [], {}, {}
    B_DIST = Beta('B_DIST', 0, None, None, 0)
    B_ENROL = Beta('B_ENROL', 1, None, None, 1 if with_ASCs else 1)
    B_FAM_DIST = Beta('B_FAM_DIST', 0, None, None, 0)
    #B_CLOSEST_SCHOOL = Beta('B_CLOSEST_SCHOOL', 0, None, None, 0)

    for i in range(len(school_codes)):
        code = school_codes[i]
        ASCs.append(Beta('ASC_' + code, 0, None, None, 0 if with_ASCs and code != 'SG' else 1))
        if combined:
            V[i] = ASCs[i] + B_ENROL *  database.variables["Enrol." + code] + B_DIST * database.variables['Dist.' + code] + B_FAM_DIST * database.variables["Dist." + code] * database.variables["Family"]
        else:
            V[i] = ASCs[i] + B_ENROL *  database.variables["Enrol." + code] + B_DIST * database.variables['Dist.' + code]
        av[i] = 1   
        
    logprob = models.loglogit(V, av, database.variables["School"])
    test_dict = {'loglike': logprob, 'weight': database.variables["Exp_Segment"]}
    biogeme  = bio.BIOGEME(database,test_dict,numberOfThreads=1)
    biogeme.modelName = name
    results = biogeme.estimate(saveIterations=True)
    betas = results.getBetaValues()
    
    print("Results for " + name + " model:")
    print(betas)
    print()
    return betas

In [3]:
school_codes

['SG', 'SC', 'MI', 'YK', 'YG', 'RY', 'OC']

In [10]:
def get_cm(name, betas, with_ASCs, combined = True):
    
    for i in range(len(school_codes)):
        code = school_codes[i]
        if with_ASCs:
            if combined:
                df['V_'+ code] = (betas['ASC_' + code] if code != 'SG' else 0) + betas['B_DIST'] * df['Dist.' + code] + betas['B_FAM_DIST'] * df['Dist.' + code] * df['Family'] + df['Total.' + code]
            else:
                df['V_'+ code] = (betas['ASC_' + code] if code != 'SG' else 0) + betas['B_DIST'] * df['Dist.' + code] + df['Total.' + code]
        else:
            if combined:
                df['V_'+ code] = betas['B_DIST'] * df['Dist.' + code] + betas['B_FAM_DIST'] * df['Dist.' + code] * df['Family'] + df['Total.' + code] #* betas['B_ENROL']
            else:
                df['V_'+ code] = betas['B_DIST'] * df['Dist.' + code] + df['Total.' + code] #* betas['B_ENROL']

    utils = df.iloc[:,-(len(school_codes)):]
    for i in range(len(school_codes)):
        code = school_codes[i]
        df['P_' + code] = utils.apply(lambda x: math.exp(x['V_' + code]) / sum([math.exp(j) for j in x]), axis = 1)
    probs = pd.concat((df['School'], df.iloc[:,-(len(school_codes)):]), axis=1)
    
    print("Softmax confusion matrix for " + name + " model:")
    softmax_cm = []
    for school in range(len(school_codes)):
        softmax_cm.append(probs[probs['School'] == school][['P_' + i for i in school_codes]].sum().values.tolist())
        print(*probs[probs['School'] == school][['P_' + i for i in school_codes]].sum().values)
    
    print(softmax_cm)
    accuracy = 0
    total = 0
    for i in range(7):
        total += sum(softmax_cm[i])
        accuracy += softmax_cm[i][i]
    accuracy = accuracy/total
    print('\nAccuracy: ' + str(accuracy))
    
    
    print("\nHardmax confusion matrix for " + name + " model:")
    for school in range(len(school_codes)):
        print(*[(probs[probs['School'] == school][['P_' + i for i in school_codes]].idxmax(axis = 1) == 'P_' + j).sum() for j in school_codes])
    
    print()

In [4]:
'''
for (x, y) in (('Eric', True),('Proposed', False)):
    df = full_df.copy()
    print("----------- Combined ----------")
    get_cm(x, run_model(x, y), y)
    
    #print("----------- Family ----------")
    #df = full_df[full_df['Family'] == 1]
    #get_cm(x, run_model(x, y, False), y, False)
    
    #print("----------- Non-Family ----------")
    #df = full_df[full_df['Family'] == 0]
    #get_cm(x, run_model(x, y, False), y, False)
'''

'\nfor (x, y) in ((\'Eric\', True),(\'Proposed\', False)):\n    df = full_df.copy()\n    print("----------- Combined ----------")\n    get_cm(x, run_model(x, y), y)\n    \n    #print("----------- Family ----------")\n    #df = full_df[full_df[\'Family\'] == 1]\n    #get_cm(x, run_model(x, y, False), y, False)\n    \n    #print("----------- Non-Family ----------")\n    #df = full_df[full_df[\'Family\'] == 0]\n    #get_cm(x, run_model(x, y, False), y, False)\n'

In [11]:
df = full_df.copy()
print("----------- Combined ----------")
get_cm('Proposed', run_model('Proposed', False), False)

----------- Combined ----------
Results for Proposed model:
{'B_DIST': -0.13475000404502052, 'B_FAM_DIST': 0.06520057534791564}

Softmax confusion matrix for Proposed model:
2762.3145817186514 242.68737848017508 296.78443240017333 995.9139242013098 75.35036598480714 1369.4416187632023 169.5076984516492
340.0421421758302 244.73785970551108 29.031502448625687 232.20974981521346 23.933729067630303 183.24417976639728 20.800837020791484
289.7081690812989 18.28910492277937 260.2246722240281 196.7741579911264 8.144937867618069 138.1136461672136 18.745311745935535
983.2537556531387 165.29098422194528 233.03801714021287 1110.5502476563101 46.749340267359145 484.96720762346905 60.15044743757616
115.48317306725038 21.629641109970162 20.55869352945785 83.1328085073063 6.909393615960424 60.30111474840854 6.98517542164639
1043.4266453978978 171.96296959196357 207.64587052688952 630.0236373440589 41.0079870219161 547.9988536333345 65.93403648393881
196.78339465747308 23.799607735945237 27.45087821553