# Predictions for incoming students

In [1]:
import json
import pandas as pd
import pickle
import numpy as np
from sklearn.externals import joblib



In [2]:
_course = 'ca116'
_academic_year = (2018, 2019)

In [3]:
filename = '../../data/features/features_%s_%s.json' % (_course.upper(), _academic_year[1])
with open(filename) as f:
    data = json.load(f)

In [4]:
'{:,}'.format(len(data))

'1,560'

In [5]:
data[0]

{'course': 'ca116',
 'academic_year_0': 2018,
 'academic_year_1': 2019,
 'week': 1,
 'exam': 4,
 'student': 'abdulla6',
 'cao_points': 395.0,
 'route': 'CAO',
 'math_lc': 71.0,
 'domicile': 8766.13663348,
 'grade': 50,
 'program_correct_W1': 0.6666666666666666,
 'cum_programs_W1': 0.6666666666666666,
 'campus_rate_W1': 1.0,
 'week_rate_W1': 0.0,
 'coverage_W1': 0.4}

In [6]:
exams = [4, 8, 12]

In [7]:
THRESHOLD = 40

In [8]:
X_test_students = {}

for exam in exams:
    
    # Students to test
    X_test_students[exam] = np.load('../../data/features/X_test_{}.npy'.format(exam))

In [9]:
cols_to_remove = ['academic_year_0', 'academic_year_1', 'course', 'route', # For now route is removed
                  'student', 'week', 'exam', 'grade', 'domicile']
    
i = 0
while i < 12:

    week = i + 1
    print('** Week %s **' % (week))

    week_data = pd.DataFrame([d for d in data if d['week'] == week])

    if len(week_data) > 0:

        print('Generating predictions')

        # Load Model
        filename = '../../data/models/model_week_%s.pkl' % (week)
        print('Using model: %s' % (filename))
        model = joblib.load(filename)

        # test data
        test_data = week_data[ week_data['student'].isin(X_test_students[exam]) ]

        X_test = test_data.drop(cols_to_remove, axis=1)
        y_test = test_data['grade']

        # fill NA
        X_test.fillna(0, inplace=True)

        # Predict
        predictions = model.predict(X_test)
        # Probabilities
        probs = model.predict_proba(X_test)
        
        # Students
        student_names = list(week_data['student'])

        # Predictions per student and week
        predictions = [ { 
            'student': name, 
            'prediction': bool(p), 
            'probability': list(prob),
            'week': week 
        } for name, p, prob, y in zip(student_names, predictions, probs, y_test) ]

        # Write them!
        filename = '../../data/predictions/predictions_%s_%s_week_%s.json' % (_course.upper(), _academic_year[1], week)
        print('Saving them to: %s' % (filename))
        with open(filename, 'w') as outfile:
            json.dump(predictions, outfile)
        
    i += 1

** Week 1 **
Generating predictions
Using model: ../../data/models/model_week_1.pkl
Saving them to: ../../data/predictions/predictions_CA116_2019_week_1.json
** Week 2 **
Generating predictions
Using model: ../../data/models/model_week_2.pkl
Saving them to: ../../data/predictions/predictions_CA116_2019_week_2.json
** Week 3 **
Generating predictions
Using model: ../../data/models/model_week_3.pkl
Saving them to: ../../data/predictions/predictions_CA116_2019_week_3.json
** Week 4 **
Generating predictions
Using model: ../../data/models/model_week_4.pkl
Saving them to: ../../data/predictions/predictions_CA116_2019_week_4.json
** Week 5 **
Generating predictions
Using model: ../../data/models/model_week_5.pkl
Saving them to: ../../data/predictions/predictions_CA116_2019_week_5.json
** Week 6 **
Generating predictions
Using model: ../../data/models/model_week_6.pkl
Saving them to: ../../data/predictions/predictions_CA116_2019_week_6.json
** Week 7 **
Generating predictions
Using model: ../