In [3]:
from __future__ import print_function

import math

from IPython import display
from matplotlib import cm
from matplotlib import gridspec
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import metrics
import tensorflow as tf
from tensorflow.python.data import Dataset

tf.logging.set_verbosity(tf.logging.ERROR)
pd.options.display.max_rows = 20
pd.options.display.float_format = '{:.1f}'.format

semesters = pd.read_csv('terminsstatus_17.txt', sep = '\t')
semesters.head()


Unnamed: 0,program,lopnr,female,kull,termin,tnr,status,poang_p,kullnr,ptnr
0,E,1,0,20002,20002,15,-7,6.0,15,1
1,W,2,1,20112,20112,37,-7,15.0,37,1
2,D,3,1,20032,20032,21,-7,16.5,21,1
3,N,4,1,20112,20112,37,-7,30.0,37,1
4,W,5,1,20112,20112,37,-7,0.0,37,1


# Add columns for new features

## Previous semester (if it doesn't exist --> mean points)

In [4]:
delay_one_term = lambda x: x.shift(+1)
prev_semesters = semesters.apply(delay_one_term)
mean_points = semesters.poang_p.mean()

prev = []
for i,s in enumerate(semesters.lopnr):
    if s == prev_semesters.at[i,'lopnr']:
        prev.append(prev_semesters.at[i, 'poang_p'])
    else:
        prev.append(mean_points)
prev_points_pd = pd.DataFrame(prev)
semesters['prev_points'] = prev_points_pd
semesters.head()


Unnamed: 0,program,lopnr,female,kull,termin,tnr,status,poang_p,kullnr,ptnr,prev_points
0,E,1,0,20002,20002,15,-7,6.0,15,1,16.1
1,W,2,1,20112,20112,37,-7,15.0,37,1,16.1
2,D,3,1,20032,20032,21,-7,16.5,21,1,16.1
3,N,4,1,20112,20112,37,-7,30.0,37,1,16.1
4,W,5,1,20112,20112,37,-7,0.0,37,1,16.1


## First semester 

In [5]:
lopnr_prev = 0
points_first_semester = []
for i, s in enumerate(semesters.lopnr):
    if s != lopnr_prev:
        p = semesters.at[i, 'poang_p']
        points_first_semester.append(p)
    else:
        points_first_semester.append(p)
    lopnr_prev = s
pfs_pd = pd.DataFrame(points_first_semester)
semesters['points_first_semester'] = pfs_pd
semesters.head()

Unnamed: 0,program,lopnr,female,kull,termin,tnr,status,poang_p,kullnr,ptnr,prev_points,points_first_semester
0,E,1,0,20002,20002,15,-7,6.0,15,1,16.1,6.0
1,W,2,1,20112,20112,37,-7,15.0,37,1,16.1,15.0
2,D,3,1,20032,20032,21,-7,16.5,21,1,16.1,16.5
3,N,4,1,20112,20112,37,-7,30.0,37,1,16.1,30.0
4,W,5,1,20112,20112,37,-7,0.0,37,1,16.1,0.0


# Accumulated points

In [10]:
lopnr_prev = 0
acc_points = []
for i, p in enumerate(semesters.lopnr):
    if p != lopnr_prev:
        acc_points.append(semesters.at[i,'poang_p'])
    else:
        acc_points.append(acc_points[i-1] + semesters.at[i,'poang_p'])
    lopnr_prev = p
acc_points_pd = pd.DataFrame(acc_points)
#acc_points_pd.head(20)
semesters['accumulated_points'] = acc_points_pd
semesters.head(40)

Unnamed: 0,program,lopnr,female,kull,termin,tnr,status,poang_p,kullnr,ptnr,prev_points,points_first_semester,accumulated_points
0,E,1,0,20002,20002,15,-7,6.0,15,1,16.1,6.0,6.0
1,W,2,1,20112,20112,37,-7,15.0,37,1,16.1,15.0,15.0
2,D,3,1,20032,20032,21,-7,16.5,21,1,16.1,16.5,16.5
3,N,4,1,20112,20112,37,-7,30.0,37,1,16.1,30.0,30.0
4,W,5,1,20112,20112,37,-7,0.0,37,1,16.1,0.0,0.0
5,F,6,0,20082,20082,31,-7,0.0,31,1,16.1,0.0,0.0
6,D,7,0,20102,20102,35,-7,5.0,35,1,16.1,5.0,5.0
7,V,8,0,20092,20092,33,-7,0.0,33,1,16.1,0.0,0.0
8,B,9,1,20022,20022,19,-7,0.0,19,1,16.1,0.0,0.0
9,K,10,0,20122,20122,39,-7,18.5,39,1,16.1,18.5,18.5


# Amount of Zero-points semesters

In [16]:
pd.options.display.max_rows = 40
lopnr_prev = 0
zero_semesters = [0]*semesters.shape[0]
for i, s in enumerate(semesters.lopnr):
    if semesters.at[i,'poang_p'] == 0:
        if s != lopnr_prev:
            zero_semesters[i] += 1
        else:
            zero_semesters[i] = zero_semesters[i-1]+1
        lopnr_prev = s
zs_pd = pd.DataFrame(zero_semesters)
semesters['zero_semesters'] = zs_pd
semesters.head(1000)

Unnamed: 0,program,lopnr,female,kull,termin,tnr,status,poang_p,kullnr,ptnr,prev_points,points_first_semester,accumulated_points,zero_semesters
0,E,1,0,20002,20002,15,-7,6.0,15,1,16.1,6.0,6.0,0
1,W,2,1,20112,20112,37,-7,15.0,37,1,16.1,15.0,15.0,0
2,D,3,1,20032,20032,21,-7,16.5,21,1,16.1,16.5,16.5,0
3,N,4,1,20112,20112,37,-7,30.0,37,1,16.1,30.0,30.0,0
4,W,5,1,20112,20112,37,-7,0.0,37,1,16.1,0.0,0.0,1
5,F,6,0,20082,20082,31,-7,0.0,31,1,16.1,0.0,0.0,1
6,D,7,0,20102,20102,35,-7,5.0,35,1,16.1,5.0,5.0,0
7,V,8,0,20092,20092,33,-7,0.0,33,1,16.1,0.0,0.0,1
8,B,9,1,20022,20022,19,-7,0.0,19,1,16.1,0.0,0.0,1
9,K,10,0,20122,20122,39,-7,18.5,39,1,16.1,18.5,18.5,0
