In [1]:
import pandas as pd

df = pd.read_csv('../../Data/SMTO_2015/Formatted.csv')

campus_info = pd.read_csv('../../Data/SMTO_2015/Campus_Info.csv', index_col=1)
codes = campus_info.index.tolist()

In [2]:
from math import log

df['School_Num'] = df['School'].apply(lambda x: codes.index(x))
for c in codes:
    df['Enrol.' + c] = df.apply(lambda x: log(x[x['Level']+ '.' + c]), axis = 1)

cols_to_keep = ['School_Num', 'Segment'] + ['Dist.' + c for c in codes] + ['Enrol.' + c for c in codes]
df = df[cols_to_keep]
df.head()

Unnamed: 0,School_Num,Segment,Dist.SG,Dist.SC,Dist.MI,Dist.YK,Dist.YG,Dist.RY,Dist.OC,Enrol.SG,Enrol.SC,Enrol.MI,Enrol.YK,Enrol.YG,Enrol.RY,Enrol.OC
0,1,1,10.25606,14.88098,37.71907,22.59214,9.218413,9.580635,11.24173,10.578802,9.34958,9.44983,10.525756,7.758333,10.16689,8.107117
1,0,5,1.132351,23.0392,28.1554,15.87906,11.21115,2.675173,2.723838,9.591308,5.620401,6.383507,8.263333,4.75359,7.664816,5.153292
2,0,1,33.03563,53.38017,6.943656,35.22834,39.65776,34.08243,32.38063,10.578802,9.34958,9.44983,10.525756,7.758333,10.16689,8.107117
3,0,2,0.699414,24.11954,27.95182,16.81186,12.83041,2.314008,1.541276,10.578802,9.34958,9.44983,10.525756,7.758333,10.16689,8.107117
4,0,5,1.132351,23.0392,28.1554,15.87906,11.21115,2.675173,2.723838,9.591308,5.620401,6.383507,8.263333,4.75359,7.664816,5.153292


In [3]:
def print_results(results, with_ASCs, with_B_Enrol, level):
    out = []
    stats = results.getGeneralStatistics()
    betas = results.getBetaValues()
    if with_ASCs:
        out.extend([betas['ASC_' + c] if c != 'SG' else 0 for c in codes])
    else:
        out.extend([log(campus_info.loc[c][level]) for c in codes])
    out.append(betas['B_DIST'])    
    out.append(betas['B_ENROL'] if with_B_Enrol else (0 if with_ASCs else 1))
    out.append(stats['Final log likelihood'][0])
    return out

In [4]:
import biogeme.database as db
import biogeme.biogeme as bio
import biogeme.models as models
from biogeme.expressions import Beta, DefineVariable

for j in range(1, 7):
    print("Segment", j)
    level = 'UG' if j <= 4 else 'Grad'
    temp = df[df['Segment'] == j]
    database = db.Database("SMTO", temp)

    for (with_ASCs, with_B_Enrol) in ((True, False), (False, False), (False, True)):
        B_DIST = Beta('B_DIST', 0, None, None, 0)
        B_ENROL = Beta('B_ENROL', 0 if with_ASCs else 1, None, None, 0 if with_B_Enrol else 1)
        ASCs, V, av = [], {}, {}
        for i in range(len(codes)):
            c = codes[i]
            ASCs.append(Beta('ASC_' + c, 1, None, None, 0 if with_ASCs and i != 0 else 1))
            V[i] = ASCs[i] + database.variables['Enrol.' + c] * B_ENROL + database.variables['Dist.' + c] * B_DIST
            av[i] = 1
        
        logprob = models.loglogit(V, av, database.variables['School_Num'])
        biogeme = bio.BIOGEME(database, logprob)
        res = biogeme.estimate()
        for k in print_results(res, with_ASCs, with_B_Enrol, level):
            print(k)
        print()

Segment 1
0
0.18000374062411648
0.01244936185715921
0.860227363995559
-1.2900518542827304
1.0124478438923379
-1.1132365930965111
-0.061481877214120406
0
-10757.420435529057

10.578801664944281
9.349580439067687
9.449829601826478
10.525756156212303
7.75833346749091
10.1668897397139
8.10711747075039
-0.060623311583886744
1
-10919.378899268573

10.578801664944281
9.349580439067687
9.449829601826478
10.525756156212303
7.75833346749091
10.1668897397139
8.10711747075039
-0.058617693639853655
0.7958275657711226
-10853.44790738021

Segment 2
0
0.08941723474203186
-0.28291669268837394
0.9758472278893843
-1.0090708947027476
0.09517736703280168
-1.1773670651914099
-0.16042506938201115
0
-3595.155126527525

10.578801664944281
9.349580439067687
9.449829601826478
10.525756156212303
7.75833346749091
10.1668897397139
8.10711747075039
-0.1579534093343367
1
-3701.4218983450505

10.578801664944281
9.349580439067687
9.449829601826478
10.525756156212303
7.75833346749091
10.1668897397139
8.10711747075039
-0