# Handcrafting Features

Importing libraries:

In [1]:
import pandas as pd
import datetime
import json
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

## Programming work

Computer programming work students submit to a grading platform

In [2]:
df_programs = pd.read_json('../../data/raw/programming.json')

In [3]:
# df_programs.head(2)

In [4]:
min(df_programs['date']), max(df_programs['date'])

(Timestamp('2015-12-16 16:35:56'), Timestamp('2018-12-16 08:46:56'))

In [5]:
def get_programs(course, start, end):
    return df_programs[(df_programs['module'] == course) &
                       (df_programs['date'] >= start) & 
                       (df_programs['date'] <= end)]

## Demographic & Academics data

In [6]:
df_demographics = pd.read_csv('../../data/demographics/summary.csv')

In [7]:
# df_demographics.head(2)

In [8]:
df_demographics.dtypes

academic_year_0        int64
academic_year_1        int64
cao_points           float64
domicile              object
math_leaving_cert    float64
student               object
dtype: object

In [9]:
def get_demographic_data(academic_year):
    return df_demographics[(df_demographics['academic_year_0'] == academic_year[0]) & 
                           (df_demographics['academic_year_1'] == academic_year[1])]

In [10]:
len(get_demographic_data((2016, 2017)))

125

## Web data

In [11]:
df_web = pd.read_csv('../../data/raw/web_data.csv')

In [12]:
df_web.head(2)

Unnamed: 0,date,module,ip,resource,user
0,150921-10:02.49,ca116,136.206.102.206,/,gctest2
1,150921-10:57.31,ca116,136.206.202.40,/,baranj2


In [13]:
df_web['date'] = pd.to_datetime(df_web['date'], format='%y%m%d-%H:%M.%S')

In [14]:
df_web.head(2)

Unnamed: 0,date,module,ip,resource,user
0,2015-09-21 10:02:49,ca116,136.206.102.206,/,gctest2
1,2015-09-21 10:57:31,ca116,136.206.202.40,/,baranj2


In [15]:
df_web = df_web[df_web['resource'].str.contains(".html")]

In [16]:
def get_web_data(course, start, end):
    return df_web[(df_web['module'] == course) &
                  (df_web['date'] >= start) & 
                  (df_web['date'] <= end)]

## Grades

Results on the laboratory computer-based examinations:

In [17]:
df_grades = pd.read_json('../../data/raw/grades.json')

In [18]:
# df_grades.head(2)

In [19]:
def get_grades(course, academic_year):
    return df_grades[(df_grades['academic_year_0'] == academic_year[0]) & 
                     (df_grades['academic_year_1'] == academic_year[1]) & 
                     (df_grades['module'] == course)]

In [20]:
grades_students = get_grades('ca116', (2016, 2017))

In [21]:
# grades_students[grades_students['user'] == 'agnewm4']

In [22]:
student_names = grades_students.user.unique()

In [23]:
len(student_names)

126

In [24]:
exam_weeks = sorted(grades_students.exam_week.unique())

Weeks of the exams:

In [25]:
exam_weeks

[4, 8, 12]

Dictionary of given a week, which exam should we predicting next?

In [26]:
def get_week_to_exam(exam_weeks):
    week_to_exam = {}
    curr = exam_weeks[-1]
    for i in reversed(range(12)):
        week = i + 1
        if week in exam_weeks:
            curr = week
        week_to_exam[week] = curr
    return week_to_exam

In [27]:
get_week_to_exam(exam_weeks)

{12: 12, 11: 12, 10: 12, 9: 12, 8: 8, 7: 8, 6: 8, 5: 8, 4: 4, 3: 4, 2: 4, 1: 4}

In [28]:
len(get_grades('ca116', (2018, 2019)))

248

## Get data about the course

Start and end of the semester based on the academic year:

In [29]:
semester_dates = {
    (2015, 2016): ('2015-09-21', '2015-12-12'),
    (2016, 2017): ('2016-09-19', '2016-12-10'),
    (2017, 2018): ('2017-09-18', '2017-12-09'),
    (2018, 2019): ('2018-09-24', '2018-12-15'),
}

In [30]:
course = 'ca116'

In [31]:
semester = '2015/2016'

Year is considered as the second year on an academic year:

In [32]:
year = int(semester.split('/')[1])
year

2016

In [33]:
dates = semester_dates[(2015, 2016)]
dates

('2015-09-21', '2015-12-12')

In [34]:
def to_datetime(dates):
    start = datetime.datetime.strptime(dates[0], "%Y-%m-%d")
    end = datetime.datetime.strptime(dates[1], "%Y-%m-%d")
    return start, end

In [35]:
start, end = to_datetime(dates)
start, end

(datetime.datetime(2015, 9, 21, 0, 0), datetime.datetime(2015, 12, 12, 0, 0))

Get **timetable** for lectures, labs and exams:

In [36]:
from matplotlib.dates import MO, TU, WE, TH, FR, SA, SU
import matplotlib.dates as mdates

def get_lectures_labs_exams(course, year):
    """
    Get lectures, labs and exams for a given course and year
    :param course: Course
    :param academic_year: Year
    :return: Lectures, Labs, Lab exams, Weeks for the lab exams, Lab week days
    """
    lectures_file = '../../data/programs/%s.%s.lectures' % (course.upper(), year)

    lectures = {}
    labs = {}
    lab_exams = {}
    labs_mdates = []

    with open(lectures_file, 'r') as f:
        for line in f:
            line = line.strip()
            if line == "" or line.startswith("#"):
                continue
            l = line.split(",")
            type = l[0].strip()
            if type == "Lecture":
                # Lecture, 1, 1, 10, 2
                # Tuesday, 10 AM, 2-hours long
                count = int(l[1].strip())
                days = int(l[2].strip())
                hour = int(l[3].strip())
                duration = int(l[4].strip())
                lectures[count] = [days, hour, duration]
            elif type == "Lab":
                # Lab, 1, Tuesday, 1, 14, 2
                count = int(l[1].strip())
                day_week = l[2].strip()
                days = int(l[3].strip())
                hour = int(l[4].strip())
                duration = int(l[5].strip())
                labs[count] = [days, hour, duration]
                labs_mdates.append(day_week)
            elif type == "Exam":
                # Exam, 6, 5, 1, 14, 2
                week_exam = int(l[1].strip())
                weeks = int(l[2].strip())
                days = int(l[3].strip())
                hour = int(l[4].strip())
                duration = int(l[5].strip())
                lab_exams[week_exam] = [weeks, days, hour, duration]

    labs_byweekday = []
    for day_week in labs_mdates:
        if day_week == "Monday":
            labs_byweekday.append(MO)
        elif day_week == "Tuesday":
            labs_byweekday.append(TU)
        elif day_week == "Wednesday":
            labs_byweekday.append(WE)
        elif day_week == "Thursday":
            labs_byweekday.append(TH)
        elif day_week == "Friday":
            labs_byweekday.append(FR)
    lab_week_days = mdates.WeekdayLocator(byweekday=labs_byweekday)

    return lectures, labs, lab_exams, lab_week_days

In [37]:
lectures, labs, lab_exams, lab_week_days = get_lectures_labs_exams('ca116', 2016)

Lectures were on Tuesday at 10 AM for 2 hours, Wednesday at 10 AM for two hours and Thursday at 10 AM for 1 hour:

In [38]:
lectures

{1: [1, 10, 2], 2: [2, 10, 1], 3: [3, 10, 1]}

Labs were on Tuesday and Thursday at 2 PM for 2 hours:

In [39]:
labs

{1: [1, 14, 2], 2: [3, 14, 2]}

Lab exams were on week 6 and 12 on the Thursday lab slot:

In [40]:
lab_exams

{6: [5, 3, 14, 2], 12: [11, 3, 14, 2]}

Get **periods** (time intervals):

In [41]:
def get_periods(start, end, lectures, labs, lab_exams):
    """
    Get Periods for a semester
    :param start: Start date
    :param end: End date
    :param lectures: Lecture times
    :param labs: Lab times
    :param lab_exam_dts: Exam
    :return: Dictionaries of weeks, lectures, labs and exams periods
    """
    # Dictionary of WEEK Start and End Datetime
    week_dts = {}
    # Dictionary of LECTURES Start and End Datetime
    lectures_dts = {}
    # Dictionary of LABS Start and End Datetime
    labs_dts = {}

    week = 1
    week_dt = start
    while week_dt < end:

        next_week_dt = week_dt + datetime.timedelta(days=7)

        # print "Week", week, "- Start", week_dt, "- End", next_week_dt
        week_dts[week] = [week_dt, next_week_dt]

        lectures_dts[week] = []
        for lecture in lectures:
            days, hours, duration = lectures[lecture]
            lecture_start = week_dt + datetime.timedelta(days=days, hours=hours)
            lecture_end = lecture_start + datetime.timedelta(hours=duration)
            lectures_dts[week].append([lecture_start, lecture_end])

        labs_dts[week] = []
        for lab in labs:
            days, hours, duration = labs[lab]
            lab_start = week_dt + datetime.timedelta(days=days, hours=hours)
            lab_end = lab_start + datetime.timedelta(hours=duration)
            labs_dts[week].append([lab_start, lab_end])

        week += 1
        week_dt = next_week_dt

    lab_exams_dts = {}
    for lab_exam in lab_exams:
        weeks, days, hours, duration = lab_exams[lab_exam]
        exam_start = start + datetime.timedelta(weeks=weeks, days=days, hours=hours)
        exam_end = exam_start + datetime.timedelta(hours=duration)
        lab_exams_dts.setdefault(lab_exam, [])
        lab_exams_dts[lab_exam] = [exam_start, exam_end]

    return week_dts, lectures_dts, labs_dts, lab_exams_dts

In [42]:
week_dts, lectures_dts, labs_dts, lab_exam_dts = get_periods(start, end, lectures, labs, lab_exams)

Dictionary of intervals for each week e.g. week 1 

In [43]:
week_dts[1]

[datetime.datetime(2015, 9, 21, 0, 0), datetime.datetime(2015, 9, 28, 0, 0)]

Dictionary of intervals for each lecture e.g. lectures in week 2

In [44]:
lectures_dts[2]

[[datetime.datetime(2015, 9, 29, 10, 0),
  datetime.datetime(2015, 9, 29, 12, 0)],
 [datetime.datetime(2015, 9, 30, 10, 0),
  datetime.datetime(2015, 9, 30, 11, 0)],
 [datetime.datetime(2015, 10, 1, 10, 0),
  datetime.datetime(2015, 10, 1, 11, 0)]]

Dictionary of intervals for each lab e.g. labs in week 4

In [45]:
labs_dts[4]

[[datetime.datetime(2015, 10, 13, 14, 0),
  datetime.datetime(2015, 10, 13, 16, 0)],
 [datetime.datetime(2015, 10, 15, 14, 0),
  datetime.datetime(2015, 10, 15, 16, 0)]]

Dictionary of lab exams:

In [46]:
lab_exam_dts

{6: [datetime.datetime(2015, 10, 29, 14, 0),
  datetime.datetime(2015, 10, 29, 16, 0)],
 12: [datetime.datetime(2015, 12, 10, 14, 0),
  datetime.datetime(2015, 12, 10, 16, 0)]}

Get lab hours:

In [47]:
def get_lab_times(labs_dts):

    lab_times = {}
    for week in labs_dts:
        lab_times.setdefault(week, [])
        for lab_time in labs_dts[week]:
            start_lab, end_lab = lab_time
            lab_time = start_lab
            while lab_time <= end_lab:
                lab_times[week].append([lab_time.year, lab_time.month, lab_time.day, lab_time.hour])
                lab_time += datetime.timedelta(hours=1) # Add one hour
    return lab_times

In [48]:
lab_times = get_lab_times(labs_dts)

Lab times for week 5:

In [49]:
lab_times[5]

[[2015, 10, 20, 14],
 [2015, 10, 20, 15],
 [2015, 10, 20, 16],
 [2015, 10, 22, 14],
 [2015, 10, 22, 15],
 [2015, 10, 22, 16]]

Get **labs** and **programs**:

In [50]:
def get_lab_and_programs(course, year):
    """
    Get labs and programs per lab for a given course and academic year
    :param course: Course name
    :param academic_year: Academic year
    :return: Dictionary of programs per lab, Array of lab names, Array of lab programs
    """

    programs_file = '../../data/programs/%s.%s.programs' % (course.upper(), year)

    with open(programs_file, 'r') as f:
        programs_file_contents = f.read()

    programs_per_lab = {}
    lab_programs = []
    lab_names = []

    for line in programs_file_contents.splitlines():

        if line.strip() == "" or line.startswith("#"):
            continue

        line = line.split()
        lab, p = line[0], line[1]

        lab = lab.strip()
        if ".html" in lab:
            lab = lab.split(".html")[0]

        p = p.strip()

        programs_per_lab.setdefault(lab, [])
        programs_per_lab[lab].append(p)

        if p not in lab_programs:
            lab_programs.append(p)

        if lab not in lab_names:
            lab_names.append(lab)

    return programs_per_lab, lab_names, lab_programs

In [51]:
programs_per_lab, all_lab_names, all_lab_programs = get_lab_and_programs('ca116', 2016)

In [52]:
print("Number of labs:", len(programs_per_lab))

Number of labs: 20


In [53]:
print("Labs:", ", ".join(str(lab) for lab in all_lab_names))

Labs: /labsheet-04, /labsheet-05, /labsheet-06, /labsheet-07, /labsheet-08, /labsheet-09, /labsheet-10, /labsheet-11-boot-camp, /labsheet-12-more-boot-camp, /labsheet-13, /labsheet-14, /labsheet-15, /labsheet-16, /labsheet-17, /labsheet-18, /labsheet-19, /labsheet-20, /labsheet-pt-01, /labsheet-pt-02, /labsheet-pt-03


In [54]:
print("Number of distinct programs:", len(all_lab_programs))

Number of distinct programs: 112


Dictionary of programs per lab:

In [55]:
programs_per_lab['/labsheet-04']

['counting-up.py',
 'counting-up-2.py',
 'counting-down.py',
 'counting-even.py',
 'counting-odd.py',
 'counting-seven.py',
 'counting-multiples.py',
 'fizz-buzz.py',
 'fib-n.py']

Get **resources** (material):

In [56]:
def get_resources(course, year, start_semester):
    """
    Get information from resources students can access
    :param module_name: Course name
    :param academic_year: Academic year
    :param start_semester: Start of the semester
    :return: Resources per week, When each resource was made available, Main resources, All resources, Week resources
    """

    resources_file = '../../data/programs/%s.%s.resources' % (course.upper(), year)

    resources_per_week = {}
    resource_post_dt = {}
    main_resources = []
    all_resources = []
    week_resources = []

    with open(resources_file, 'r') as f:
        for line in f:
            line = line.strip()
            if line == "" or line.startswith("#"):
                continue
            l = line.split(",")
            type = l[0].strip()
            if type == "Week":
                # Weekly resources
                # Week, 1, /01-intro, 1
                week = int(l[1].strip())
                resource = l[2].strip()
                days = int(l[3].strip())
                resources_per_week.setdefault(week, [])
                resources_per_week[week].append(resource)
                resource_post_dt[resource] = start_semester + datetime.timedelta(weeks=week - 1, days=days)
                all_resources.extend(resources_per_week[week])
                week_resources.extend(resources_per_week[week])
            elif type == "Main":
                # Resources available throughtout the semester
                # Main, /index, 0, 0
                resource = l[1].strip()
                weeks = int(l[2].strip())
                days = int(l[3].strip())
                main_resources.append(resource)
                resource_post_dt[resource] = start_semester + datetime.timedelta(weeks=weeks, days=days)
                all_resources.append(resource)

    return resources_per_week, resource_post_dt, main_resources, all_resources, week_resources

In [57]:
resources_per_week, resource_post_dt, main_resources, all_resources, week_resources = \
    get_resources('ca116', 2016, start)

Material for week 4:

In [58]:
resources_per_week[4]

['/extra-tasks-04',
 '/08-truthiness',
 '/09-strings',
 '/labsheet-06',
 '/10-argv',
 '/11-string-methods',
 '/labsheet-07']

Dictionary of when was each resource posted:

In [59]:
resource_post_dt['/01-intro']

datetime.datetime(2015, 9, 22, 0, 0)

## Feature Engineering

Handcrafting features per student per week based on their programming work, web events and characteristics:

In [60]:
DCU_IP = "136.206."
WEEKEND = [5, 6]

In [61]:
distances = {}

distances_properties = "../../data/demographics/distances"
with open(distances_properties) as f:

    for line in f:

        line = line.strip()
        if line == "" or line.startswith("#"):
            continue
        l = line.split(",")
        code = l[0].strip()
        distance = float(l[1].strip())

        distances[code] = distance

In [62]:
MAX_DIST = max(distances.values())

In [63]:
cao_dict = {
    (878, 878): 'Mature',
    (999, 999): 'Deferrals',
    (777, 777): 'FETAC',
    (888, 888): 'McAlleese',
    (978, 979): 'HEAR',
    (968, 969): 'DARE',
    (976, 976): 'Access',
    (778, 778): 'Colaiste Dhulaigh Science',
    (669, 669): 'ELITE Sports',
}

In [64]:
def get_route(points):
    for k, v in cao_dict.items():
        low, high = k
        if points >= low and points <= high:
            return v
    return 'CAO'

In [65]:
get_route(300), get_route(976), get_route(978)

('CAO', 'Access', 'HEAR')

In [66]:
def handcraft_features(course, academic_year):
    
    print('---- Handcrafting features ----')
    print('Course: %s' % (course))
    print('Academic year: %s/%s' % (academic_year[0], academic_year[1]))
    
    features = []
    now = datetime.datetime.now()
    
    # Semester dates: start & end
    dates = semester_dates[academic_year]
    start, end = to_datetime(dates)
    print('Semester Start: %s, End: %s \n' % (start, end))
    
    # PROGRAMS
    _programs = get_programs(course, start, end)
    
    # Web logs
    _web = get_web_data(course, start, end)
    
    # GRADES
    _grades = get_grades(course, academic_year)
    
    # Students & Exam Weeks
    student_names = _grades.user.unique()
    print('# Students: %d' % (len(student_names)))
    exam_weeks = [4, 8, 12] # sorted(_grades.exam_week.unique())
    print('Exam weeks: ' + ', '.join([str(e) for e in exam_weeks]))
    week_to_exam = get_week_to_exam(exam_weeks)
    
    # DEMOGRAPHICS AND ACADEMICS
    _demographics = get_demographic_data(academic_year)
    _student_demographics = {}
    for student_name in student_names:
        demos = _demographics[_demographics['student'] == student_name]
        cao_points = 0
        route = ''
        math_lc = 0
        domicile = MAX_DIST
        if len(demos) > 0:
            row = demos.iloc[0, :]
            cao_points = float(row['cao_points'])
            route = str(get_route(cao_points))
            math_lc = float(row['math_leaving_cert'])
            if row['domicile'] is not np.nan:
                domicile = float(distances[row['domicile']])
            else:
                domicile = MAX_DIST
        _student_demographics[student_name] = dict(cao_points=cao_points, route=route, 
                                                   math_lc=math_lc, domicile=domicile)
    
    # Lectures, Labs, Exams
    lectures, labs, lab_exams, lab_week_days = get_lectures_labs_exams(course, academic_year[1])

    # Timestamp Periods
    week_periods, lecture_periods, lab_periods, exam_periods = get_periods(start, end, lectures, labs, lab_exams)
    
    # Labs and Programs
    programs_per_lab, lab_names, all_programs = get_lab_and_programs(course, academic_year[1])
    programs_cumulative = []
    
    # Resources
    resources_per_week, resource_post_dt, main_resources, all_resources, week_resources = \
        get_resources(course, academic_year[1], start)
        
    past_features = {}
    
    for i in range(12):
        
        week = i + 1
        print('** Week %s **' % (week))
        
        # NEXT EXAM: Target
        next_exam = week_to_exam[week]
        print('Target exam: %s' % (next_exam))
        
        # Start and End week
        start_week, end_week = week_periods[week]
        if end_week > now: 
            print('This class and week is now going on, exiting...')
            return features
        
        print('Start: %s, End: %s' % (start_week, end_week))
        if week in exam_weeks: # Exam happens this week
            start_exam, end_exam = exam_periods[week]
            end_week = start_exam
            print('EXAM')

        # Resources week
        resources_week = resources_per_week[week]
        print('# Resources: %s' % (len(resources_week)))
        # Labs week
        labs_week = [r for r in resources_week if r in programs_per_lab]
        # Programs week
        programs_week = [program for lab, programs in programs_per_lab.items() if lab in labs_week 
                         for program in programs]
        print('# Programs week: %s' % (len(programs_week)))
        programs_cumulative.extend(programs_week)
        print('# Programs cumulative: %s' % (len(programs_cumulative)))
    
        for student_name in student_names:
            
            # DEMOGRAPHICS
            cao_points = _student_demographics[student_name]['cao_points']
            route = _student_demographics[student_name]['route']
            math_lc = _student_demographics[student_name]['math_lc']
            domicile = _student_demographics[student_name]['domicile']

            # PROGRAMS
            programs = _programs[(_programs['user'] == student_name) &
                                 (_programs['date'] >= start_week) & 
                                 (_programs['date'] <= end_week) &
                                 (_programs['task'].isin(programs_week))]
            
            program_correctness = []
            for program_name in programs_week:
                if len(programs[(programs['task'] == program_name) & 
                                (programs['correct'] == True)]) > 0:
                    program_correctness.append(1)
                else:
                    program_correctness.append(0)
            program_correct = 0.
            if len(program_correctness) > 0:
                program_correct = float(sum(program_correctness)) / len(program_correctness)
            
            # PROGRAMS CUMULATIVE
            cum_programs = _programs[(_programs['user'] == student_name) &
                                     (_programs['date'] >= start) & 
                                     (_programs['date'] <= end_week) &
                                     (_programs['task'].isin(programs_cumulative))]
            
            cum_program_correctness = []
            for program_name in programs_cumulative:
                if len(cum_programs[(cum_programs['task'] == program_name) & 
                                    (cum_programs['correct'] == True)]) > 0:
                    cum_program_correctness.append(1)
                else:
                    cum_program_correctness.append(0)
            cum_program_correct = 0.
            if len(cum_program_correctness) > 0:
                cum_program_correct = float(sum(cum_program_correctness)) / len(cum_program_correctness)
                
            # WEB
            web = _web[(_web['user'] == student_name) &
                       (_web['date'] >= start_week) & 
                       (_web['date'] <= end_week) &
                       (_web['resource'].str.split('.').str[0].isin(resources_week))]
            
            # On-campus / Off-campus
            campus_rate = 0.
            # Week / Weekend
            week_rate = 0.
            # Resource Coverage
            coverage = 0.
            if len(web) > 0:
                campus_rate = float(len([i for i in web.index if DCU_IP in df_web.loc[i, 'ip']])
                                   ) / len(web)
                week_rate = float(len([i for i in web.index if df_web.loc[i, 'date'].weekday() in WEEKEND])
                                 ) / len(web)
                coverage = float(len(set([df_web.loc[i, 'resource'].split('.')[0] 
                                          for i in web.index]))) / len(resources_week)

            # GRADES
            grades = _grades[(_grades['user'] == student_name) & 
                             (_grades['exam_week'] == next_exam)]
            grade = 0
            if len(grades) > 0:
                grade = int(grades.iloc[0, :]['grade'])
                
            # Add features to dictionary for each student and academic year
            past_features.setdefault( (student_name, academic_year[1]), [])
            past_features[ (student_name, academic_year[1]) ].extend([
                ('program_correct_W' + str(week), program_correct),
                ('cum_programs_W' + str(week), cum_program_correct),
                ('campus_rate_W' + str(week), campus_rate),
                ('week_rate_W' + str(week), week_rate),
                ('coverage_W' + str(week), coverage),
            ])
            # Previous exam grades
            last_week = week - 1
            if last_week in exam_weeks: # Exam happened last week
                grades = _grades[(_grades['user'] == student_name) & 
                                 (_grades['exam_week'] == last_week)]
                last_grade = 0
                if len(grades) > 0:
                    last_grade = int(grades.iloc[0, :]['grade'])
                past_features[ (student_name, academic_year[1]) ].append(
                    ('exam_W' + str(last_week), last_grade),
                )

            # FEATURE
            feature = {
                'course': course,
                'academic_year_0': academic_year[0],
                'academic_year_1': academic_year[1],
                'week': week,
                'exam': next_exam,
                'student': student_name,
                'cao_points': cao_points,
                'route': route,
                'math_lc': math_lc,
                'domicile': domicile,
                'grade': grade,
            }
            for name, value in past_features[ (student_name, academic_year[1]) ]:
                feature[name] = value
            
            features.append(feature)
        
    return features

### Course: CA116 - Academic Year: 2016/2017

In [67]:
_course = 'ca116'
_academic_year = (2016, 2017)

features = handcraft_features(_course, _academic_year)

---- Handcrafting features ----
Course: ca116
Academic year: 2016/2017
Semester Start: 2016-09-19 00:00:00, End: 2016-12-10 00:00:00 

# Students: 126
Exam weeks: 4, 8, 12
** Week 1 **
Target exam: 4
Start: 2016-09-19 00:00:00, End: 2016-09-26 00:00:00
# Resources: 7
# Programs week: 0
# Programs cumulative: 0
** Week 2 **
Target exam: 4
Start: 2016-09-26 00:00:00, End: 2016-10-03 00:00:00
# Resources: 5
# Programs week: 0
# Programs cumulative: 0
** Week 3 **
Target exam: 4
Start: 2016-10-03 00:00:00, End: 2016-10-10 00:00:00
# Resources: 4
# Programs week: 22
# Programs cumulative: 22
** Week 4 **
Target exam: 4
Start: 2016-10-10 00:00:00, End: 2016-10-17 00:00:00
EXAM
# Resources: 4
# Programs week: 11
# Programs cumulative: 33
** Week 5 **
Target exam: 8
Start: 2016-10-17 00:00:00, End: 2016-10-24 00:00:00
# Resources: 5
# Programs week: 17
# Programs cumulative: 50
** Week 6 **
Target exam: 8
Start: 2016-10-24 00:00:00, End: 2016-10-31 00:00:00
# Resources: 3
# Programs week: 11
#

In [68]:
len(features)

1512

In [69]:
'{:,}'.format(len(features))

'1,512'

In [70]:
filename = '../../data/features/features_%s_%s.json' % (_course.upper(), _academic_year[1])

with open(filename, 'w') as outfile:
    json.dump(features, outfile)

### Course: CA116 - Academic Year: 2017/2018

In [71]:
_course = 'ca116'
_academic_year = (2017, 2018)

features = handcraft_features(_course, _academic_year)

---- Handcrafting features ----
Course: ca116
Academic year: 2017/2018
Semester Start: 2017-09-18 00:00:00, End: 2017-12-09 00:00:00 

# Students: 156
Exam weeks: 4, 8, 12
** Week 1 **
Target exam: 4
Start: 2017-09-18 00:00:00, End: 2017-09-25 00:00:00
# Resources: 6
# Programs week: 18
# Programs cumulative: 18
** Week 2 **
Target exam: 4
Start: 2017-09-25 00:00:00, End: 2017-10-02 00:00:00
# Resources: 4
# Programs week: 21
# Programs cumulative: 39
** Week 3 **
Target exam: 4
Start: 2017-10-02 00:00:00, End: 2017-10-09 00:00:00
# Resources: 4
# Programs week: 23
# Programs cumulative: 62
** Week 4 **
Target exam: 4
Start: 2017-10-09 00:00:00, End: 2017-10-16 00:00:00
EXAM
# Resources: 5
# Programs week: 14
# Programs cumulative: 76
** Week 5 **
Target exam: 8
Start: 2017-10-16 00:00:00, End: 2017-10-23 00:00:00
# Resources: 3
# Programs week: 21
# Programs cumulative: 97
** Week 6 **
Target exam: 8
Start: 2017-10-23 00:00:00, End: 2017-10-30 00:00:00
# Resources: 3
# Programs week: 

In [72]:
'{:,}'.format(len(features))

'1,872'

In [73]:
filename = '../../data/features/features_%s_%s.json' % (_course.upper(), _academic_year[1])

with open(filename, 'w') as outfile:
    json.dump(features, outfile)

### Course: CA116 - Academic Year: 2018/2019

In [74]:
_course = 'ca116'
_academic_year = (2018, 2019)

features = handcraft_features(_course, _academic_year)

---- Handcrafting features ----
Course: ca116
Academic year: 2018/2019
Semester Start: 2018-09-24 00:00:00, End: 2018-12-15 00:00:00 

# Students: 130
Exam weeks: 4, 8, 12
** Week 1 **
Target exam: 4
Start: 2018-09-24 00:00:00, End: 2018-10-01 00:00:00
# Resources: 5
# Programs week: 18
# Programs cumulative: 18
** Week 2 **
Target exam: 4
Start: 2018-10-01 00:00:00, End: 2018-10-08 00:00:00
# Resources: 4
# Programs week: 27
# Programs cumulative: 45
** Week 3 **
Target exam: 4
Start: 2018-10-08 00:00:00, End: 2018-10-15 00:00:00
# Resources: 6
# Programs week: 26
# Programs cumulative: 71
** Week 4 **
Target exam: 4
Start: 2018-10-15 00:00:00, End: 2018-10-22 00:00:00
EXAM
# Resources: 4
# Programs week: 12
# Programs cumulative: 83
** Week 5 **
Target exam: 8
Start: 2018-10-22 00:00:00, End: 2018-10-29 00:00:00
# Resources: 4
# Programs week: 24
# Programs cumulative: 107
** Week 6 **
Target exam: 8
Start: 2018-10-29 00:00:00, End: 2018-11-05 00:00:00
# Resources: 5
# Programs week:

In [75]:
'{:,}'.format(len(features))

'1,560'

In [76]:
filename = '../../data/features/features_%s_%s.json' % (_course.upper(), _academic_year[1])

with open(filename, 'w') as outfile:
    json.dump(features, outfile)