In [3]:
# Data libraries
import pandas as pd
import numpy as np
import random

# Plotting libraries
import matplotlib.pyplot as plt
import seaborn as sns

import os
import subprocess
import statistics
import csv
import re

In [6]:
# Read Data
df = pd.read_csv('courses_data.csv', delimiter=',', encoding='unicode_escape')
df2 = pd.read_csv('Days_tutors.csv', delimiter=',', encoding='unicode_escape', header=1)

In [41]:
# Clean data
# Get rid of courses with year above 4 and missing data
df = df[(df.NYT!='P')&(df.NYT!='5')&(df.NYT.notnull())]
df['NYT'].replace('1', '0', inplace=True)
df['NYT'].replace('2', '0', inplace=True)

# Get rid of courses with Semester 1
df = df[df.Semester!='Semester 1']

# Get rid of courses with no tutors needed
df = df[df.Frequency.notnull()]

# Replace the weeks taught from weekly/odd/even/specific numbers to list of numbers
df['Frequency'].replace('Weekly', '1,1,1,1,1,1,1,1,1,1', inplace=True)
df['Frequency'].replace('Odd', '1,0,1,0,1,0,1,0,1,0', inplace=True)
df['Frequency'].replace('odd', '1,0,1,0,1,0,1,0,1,0', inplace=True)
df['Frequency'].replace('Even', '0,1,0,1,0,1,0,1,0,1', inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['NYT'].replace('1', '0', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['NYT'].replace('2', '0', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always be

In [43]:
# An alternative way to obtain timetabling data, if available

def get_days_times(course):
    # Indexing days of the week
    day_to_index = {"Monday": 0, "Tuesday": 1, "Wednesday": 2, "Thursday": 3, "Friday": 4}
    
    # Format for the days
    days = [0,0,0,0,0]
    times = []

    # Find the column containing the course code
    course_column = None
    for col in df.columns:
        if course.lower() in str(df[col].iloc[0]).lower():
            course_column = col
            break

    # If a certain day appears in the column, add 1 to its corresponding 0
    for entry in df[course_column].iloc[1:].dropna():
        match = re.match(r"(\w+) (\d{1,2}):\d{2} - (\d{1,2}):\d{2}", entry)
    if match:
        day, start_hour, end_hour = match.groups()
        if day in day_to_index:
            days[day_to_index[day]] += 1

            # For that day, log the workshop times 
            start_time = int(start_hour)
            duration = int(end_hour) - start_time
                
            times.append([start_time, duration])

    return days, times

In [52]:
def generate_days_times():
    total = random.randint(1, 2)  # at most 5 tutorial slots a week (2 here for FICO to run)
    days = [0,0,0,0,0,0,0,0,0,0]

    # Randomly distribute the days of the slots
    while total > 0:
        day = random.randint(0, 4)
        if days[day] < 2:  # at most 3 tutorials in a day (2 here for FICO to run)
            add_value = random.randint(1, min(2 - days[day], total))
            days[day] += add_value
            total -= add_value

    times = []
    for day_count in days:
        used_times = set()  # Track if a timeslot is already used that day
        for _ in range(day_count):
            while True:
                start_time = random.randint(9, 17)  # Fit uni schedules
                duration = random.choice([1, 2]) if start_time <= 16 else 1
                if all(start_time + d not in used_times for d in range(duration)):  # Check overlap
                    for d in range(duration):
                        used_times.add(start_time + d)
                    times.append([start_time, duration+start_time])
                    break

    # New format for FICO 
    session_lists = []
    index = 0
    for day, count in enumerate(days):
        for _ in range(count):
            if index < len(times):
                start, end = times[index]
                session_list = [0] * 10  
                session_list[day*2] = start 
                session_list[day*2 + 1] = end  
                session_lists.append(session_list)  
                index += 1

    return index, session_lists

In [53]:
# Extract course data

# a = Course Code
a = []
for code in df['Code']:
    a.append(code)

# b = Discipline. Format (1,0,0,0,1,0,0,0)
# As path requires student ID, done manually
# algebra 1, analysis 2, applied maths 3, finance 4, geometry 5, physics 6, OR 7, stats 8
b=[[0,0,1,0,0,0,0,0],[1,0,0,0,1,0,0,0],[0,1,0,0,0,0,0,0],[0,0,1,0,0,0,0,0],
  [0,0,1,0,0,1,0,0], [0,0,0,1,0,0,0,0],[0,1,0,0,0,0,0,0],[1,1,0,0,0,0,0,0],
  [1,0,0,0,0,0,0,0], [1,0,0,0,0,0,0,0],[1,0,0,0,0,0,0,0],[0,1,0,0,0,0,0,0],
  [0,1,0,0,0,0,0,0], [1,0,0,0,0,0,0,0],[0,0,0,0,0,0,1,0],[0,0,0,0,0,0,1,0],
  [0,0,0,0,0,0,0,0],[0,1,0,0,0,0,0,0],[0,0,0,0,0,0,0,1], [0,0,1,0,0,0,0,0],
  [0,0,0,1,0,0,0,0],[1,1,0,0,0,0,0,0],[0,0,0,0,0,0,0,1],[0,0,0,0,0,0,0,1],
  [0,0,0,0,0,0,0,1],[0,0,1,0,1,1,0,0]]


# c = Competency (years 1, 2, 3, 4)
c = []
for year in df['NYT']:
    c.append(int(year))
    
# d = Weeks there are workshops in in format (0,1,0,1,etc)
d = []
for weeks in df['Frequency']:
    lst = [int(s) for s in weeks.split(',')]
    d.append(lst)

# f = nb of tutors needed
f = []
for tutors in df['No T Required']:
    f.append(int(tutors))

In [61]:
new_a = []
new_b = []
new_c = []
new_d = []
new_e = []

# Running through each course
# Making list smaller for FICO to be able to run
for i in range(int(len(a)/2)):
    index, sessions = generate_days_times()
    # Running through each workshop
    for j in range(index):
        new_a.append(a[i])
        new_b.append(b[i])
        new_c.append(c[i])
        new_d.append(d[i])
        new_e.append(sessions[j])

# Ensuring f is at the new length
new_f = f[:int(len(a)/2)]

In [62]:
lists = {
    "Course_code": new_a,
    "CourseIDWorkshops": new_a, # Duplicate to ease FICO indexing
    "Disciplines": new_b,
    "Difficulty": new_c,
    "Week": new_d,
    "StartEnd": new_e,
    "Min_tutors": new_f
}

with open('Courses_data.dat', 'w', newline='') as file3:
    for key, data in lists.items():
        file3.write(f"{key}: [")  # Write the header
        # Ensure data is properly formatted
        if all(isinstance(row, (list, tuple)) for row in data):  # If list of lists
            for row in data:
                file3.write(" ".join(map(str, row)) + " ")
        else:  # If list of integers
            file3.write(" ".join(map(str, data)) + " ")
        file3.write("]\n")  # Add closing bracket and new line