In [1]:
import numpy as np
import pandas as pd
from math import lcm
from collections import OrderedDict, defaultdict
import os

## Tools

In [2]:
def export_to_txt(arr, folder="solutions", filename="solution.txt"):
    # Ensure the folder exists
    os.makedirs(folder, exist_ok=True)

    # Full file path
    filepath = os.path.join(folder, filename)

    # Convert array to string without clipping or wrapping
    with np.printoptions(threshold=np.inf, linewidth=10000):
        arr_str = np.array2string(arr, separator=', ')

    # Write to file
    with open(filepath, 'w') as f:
        f.write(arr_str)

def import_from_txt(folder="output", filename="output.txt"):
    filepath = os.path.join(folder, filename)

    with open(filepath, 'r') as f:
        content = f.read()

    # Convert string back to numpy array
    arr = np.array(eval(content), dtype=np.int16)  # Use eval because it's safe with known input like this
    return arr

In [None]:
def get_session_durations(df: pd.DataFrame) -> pd.DataFrame:
    """
    For each curriculum_id, return a DataFrame with session_1 and session_2 durations.
    Ensures session_1 >= session_2 by swapping values if needed.
    
    Parameters:
    - df (pd.DataFrame): Must include 'curriculum_id', 'session', and 'duration' columns.
    
    Returns:
    - pd.DataFrame: with columns ['curriculum_id', 'session_1', 'session_2']
    """
    session_df = (
        df.pivot_table(index="curriculum_id", columns="session", values="duration", aggfunc="first")
        .fillna(0)
        .astype(int)
        .rename(columns={1: "session_1", 2: "session_2"})
        .reset_index()
    )
    
    # Ensure session_1 >= session_2
    session_df[["session_1", "session_2"]] = session_df[["session_1", "session_2"]].apply(
        lambda row: sorted(row, reverse=True), axis=1, result_type="expand"
    )

    return session_df

In [139]:
def get_curriculum_classes_map(df: pd.DataFrame) -> dict:
    """
    Returns a dictionary where the key is curriculum_id (id),
    and the value is classes, for rows where session_1 == 2.
    
    Parameters:
    - df (pd.DataFrame): Must include 'id', 'session_1', and 'classes' columns.
    
    Returns:
    - dict: {curriculum_id: classes}
    """
    return (
        df[df["session_1"] == 2]
        .set_index("id")["classes"]
        .to_dict()
    )

In [None]:
export_to_txt(generate_valid_guess(df, T, R, curriculum), "solutions", f"solution.txt")

In [None]:
def generate_valid_guess_2(course_df, time_slot_indexes, room_indexes, curriculum_df, fill_value=0, dtype=np.int16):
    """
    Generate a valid solution using your 1D approach with transpose.
    Parameters:
    - course_df (pd.DataFrame): with 'course_id' and 'val' (duration)
    - T (int): rows
    - R (int): columns
    - fill_value (int): fill for empty slots

    Returns:
    - np.ndarray shape (T, R)
    """
    T = len(time_slot_indexes)
    R = len(room_indexes)

    curriculum_classes = get_curriculum_classes_map(curriculum_df)

    durations = course_df.set_index('course_id')['duration'].to_dict()
    course_ids = course_df['course_id'].tolist()

    total_slots = T * R
    required_slots = sum(durations.values())
    if required_slots > total_slots:
        raise ValueError("Not enough slots for all courses")

    # arr_1d = np.full(total_slots, fill_value, dtype=dtype)
    arr_2d = np.full((T, R), fill_value, dtype=dtype)

    # Find indices where 2-hour course cannot start:
    # invalid_starts = {i for i in range(T * R) if (i % T) % (T//5) == T//5 - 1}

    # Compute valid starting indices for 2-hour courses
    # valid_starts = [i for i in range(total_slots) if i not in invalid_starts]

    # Find valid time slots for 2-hour course
    time_indices = [t for t in range(T - 1) if t % 10 != 9]

    # Place 2-hour courses first
    used = set()
    for curr_id, n_classes in curriculum_classes.items():
        placed = False

        for t in time_indices:
            # Try to find 'n_classes' rooms where both t and t+1 are free
            available_rooms = []
            for r in range(R):
                if (t, r) not in used and (t + 1, r) not in used:
                    available_rooms.append(r)
                if len(available_rooms) == n_classes:
                    break

            if len(available_rooms) == n_classes:
                # Place the 2-hour classes for this curriculum
                for r in available_rooms:
                    arr_2d[t, r] = curr_id
                    arr_2d[t + 1, r] = curr_id
                    used.update({(t, r), (t + 1, r)})
                placed = True
                break

        if not placed:
            raise RuntimeError(f"Cannot place curriculum {curr_id} with {n_classes} classes")

    # Place 1-hour courses
    one_hour_courses = [cid for cid in course_ids if durations[cid] == 1]
    free_positions = [i for i in range(total_slots) if i not in used_indices]
    np.random.shuffle(one_hour_courses)

    np.random.shuffle(one_hour_courses)
    np.random.shuffle(free_positions)

    for cid, pos in zip(one_hour_courses, free_positions):
        arr_1d[pos] = cid
        used_indices.add(pos)

    # Reshape to (R, T), then transpose to (T, R)
    arr_2d = arr_1d.reshape(R, T).T

    return arr_2d


In [None]:
def generate_valid_guess(course_df, T, R, fill_value=0, dtype=np.int16):
    """
    Generate a valid solution using your 1D approach with transpose.
    Parameters:
    - course_df (pd.DataFrame): with 'course_id' and 'val' (duration)
    - T (int): rows
    - R (int): columns
    - fill_value (int): fill for empty slots

    Returns:
    - np.ndarray shape (T, R)
    """
    durations = course_df.set_index('course_id')['duration'].to_dict()
    course_ids = course_df['course_id'].tolist()

    total_slots = T * R
    required_slots = sum(durations.values())
    if required_slots > total_slots:
        raise ValueError("Not enough slots for all courses")

    arr_1d = np.full(total_slots, fill_value, dtype=dtype)

    # Find indices where 2-hour course cannot start:
    invalid_starts = {i for i in range(T * R) if (i % T) % (T//5) == T//5 - 1}

    # Compute valid starting indices for 2-hour courses
    valid_starts = [i for i in range(total_slots) if i not in invalid_starts]

    # Place 2-hour courses first
    used_indices = set()
    for cid in course_ids:
        dur = durations[cid]
        if dur == 2:
            placed = False
            np.random.shuffle(valid_starts)
            for start_idx in valid_starts:
                # Ensure both start and next slot are unused
                if start_idx in used_indices or (start_idx + 1) in used_indices:
                    continue
                # Place the course at start_idx and start_idx + 1
                arr_1d[start_idx] = cid
                arr_1d[start_idx + 1] = cid
                used_indices.update({start_idx, start_idx + 1})
                placed = True
                break
            if not placed:
                raise RuntimeError(f"Cannot place 2-hour course {cid}")

    # Place 1-hour courses
    one_hour_courses = [cid for cid in course_ids if durations[cid] == 1]
    free_positions = [i for i in range(total_slots) if i not in used_indices]
    np.random.shuffle(one_hour_courses)

    np.random.shuffle(one_hour_courses)
    np.random.shuffle(free_positions)

    for cid, pos in zip(one_hour_courses, free_positions):
        arr_1d[pos] = cid
        used_indices.add(pos)

    # Reshape to (R, T), then transpose to (T, R)
    arr_2d = arr_1d.reshape(R, T).T

    return arr_2d


In [7]:
def array_to_dict_guess(arr, ignore_value=0):
    """
    Create a dictionary mapping each non-ignored value to a list of (row, col) positions
    where it appears in the 2D array.

    Parameters:
    - arr (np.ndarray): Input 2D array
    - ignore_value (int): Value to ignore (default=0)

    Returns:
    - dict: {value: [(row1, col1), (row2, col2), ...]}
    """
    from collections import defaultdict

    value_positions = defaultdict(list)

    for row in range(arr.shape[0]):
        for col in range(arr.shape[1]):
            val = arr[row, col]
            if val != ignore_value:
                value_positions[int(val)].append((row+1, col+1))

    return dict(sorted(value_positions.items()))


In [8]:
def is_valid_guess(guess_arr, course_df, ignore_value=0):
    """
    Validate a guess array by checking:
    - Each course appears the correct number of times.
    - 2-hour courses are placed in consecutive time slots in the same column.

    Parameters:
    - guess_arr (np.ndarray): 2D guess array representing course placements.
    - course_df (pd.DataFrame): DataFrame with 'course_id' and duration information.
    - ignore_value (int): Value to ignore in the array (default = 0).

    Returns:
    - bool: True if the guess is valid, False otherwise.
    """
    def get_course_duration(df, course_id):
        row = df[df['course_id'] == course_id]
        if row.empty:
            raise ValueError(f"Course ID '{course_id}' not found in DataFrame.")
        return int(row.iloc[0]['duration'])


    guess_dict = array_to_dict_guess(guess_arr, ignore_value)

    for course_id, positions in guess_dict.items():
        expected_count = get_course_duration(course_df, course_id)

        # Check if the number of occurrences matches the expected duration
        if len(positions) != expected_count:
            print(f"Failed on duration count: {course_id}")
            return False

        # For 2-hour courses, ensure consecutive placement in the same column
        if expected_count == 2:
            (r1, c1), (r2, c2) = positions
            if c1 != c2 or abs(r1 - r2) != 1:
                print(f"Failed on consecutive placement: {course_id}")
                return False

    return True


In [9]:
def get_class_combinations(mods):
    """
    Get all unique class assignment combinations (1-based), sorted.

    Parameters:
    - mods (list of int): number of classes for each course.

    Returns:
    - list of tuples: sorted list of unique combinations (starting from 1)
    """
    cycle_length = lcm(*mods)
    seen = set()
    for student_id in range(cycle_length):
        combo = tuple((student_id % n) + 1 for n in mods)
        seen.add(combo)
    return sorted(seen)


In [10]:
def filter_classes_by_combination(guess_df, course_order, combination):
    """
    Given a class combination, return the subset of guess_df matching that class assignment.

    Parameters:
    - guess_df (pd.DataFrame): DataFrame containing a 'course_code' column.
    - course_order (list of str): Ordered course names corresponding to the combination.
    - combination (tuple of int): Class assignments (1-based) for each course in course_order.

    Returns:
    - pd.DataFrame: Filtered DataFrame containing only rows matching the class combination.
    """
    result_frames = []

    for course_name, class_number in zip(course_order, combination):
        class_suffix = f"{class_number:02}"  # Format as two digits
        pattern = f"{course_name}-160{class_suffix}"

        matching_rows = guess_df[
            guess_df['course_code'].str.startswith(f"{course_name}-") &
            guess_df['course_code'].str.contains(pattern)
        ]

        result_frames.append(matching_rows)

    return pd.concat(result_frames, ignore_index=True)


In [11]:
def prepare_schedule_df(guess_arr, course_code, time_slots_df, rooms_df):
    """
    Convert a guess array into a structured and enriched schedule DataFrame.

    This function maps a guess array to a DataFrame containing course assignments,
    merges it with time slot and room data, and adds a readable time range column.

    Parameters:
    - guess_arr (np.ndarray): 2D array where each cell encodes a course_id.
    - course_code (list): List of course code strings indexed by course_id - 1.
    - time_slots_df (pd.DataFrame): DataFrame containing time slot metadata, must have 'id' column.
    - rooms_df (pd.DataFrame): DataFrame containing room metadata, must have 'id' and 'room_name' columns.

    Returns:
    - pd.DataFrame: Schedule DataFrame with columns:
        - 'course_id', 'course_code', 'duration', 'time_slot_id', 'room_name', 'time_range', 'day', 'time_slot'
    """

    # Step 1: Convert guess array to a dictionary
    guess_dict = array_to_dict_guess(guess_arr)

    # Step 2: Convert dictionary to DataFrame
    rows = []
    for course_id, slots in guess_dict.items():
        duration = len(slots)
        time_slots = sorted(ts for ts, _ in slots)
        room_ids = set(rid for _, rid in slots)

        start_time_slot = time_slots[0]
        room_id = room_ids.pop() if len(room_ids) == 1 else list(room_ids)

        rows.append({
            'course_id': course_id,
            'course_code': course_code[course_id - 1],
            'duration': duration,
            'time_slot_id': start_time_slot,
            'room_id': room_id
        })

    schedule_df = pd.DataFrame(rows)

    # Step 3: Enrich DataFrame with time slot and room info
    schedule_df = schedule_df.merge(time_slots_df, left_on='time_slot_id', right_on='id', how='left')
    schedule_df = schedule_df.merge(rooms_df[['id', 'room_name']], left_on='room_id', right_on='id', how='left')

    schedule_df = schedule_df.drop(columns=['id_y', 'time_slot_id', 'room_id'])
    schedule_df.rename(columns={'id_x': 'time_slot_id'}, inplace=True)

    # Step 4: Add 'time_range' column directly
    def slot_to_time_range(time_slot, duration):
        start_hour = int(time_slot[1:])
        end_hour = start_hour + duration
        return f"{start_hour:02}.00-{end_hour:02}.00"

    schedule_df['time_range'] = schedule_df.apply(
        lambda row: slot_to_time_range(row['time_slot'], row['duration']), axis=1
    )

    return schedule_df


In [12]:
def group_schedule_by_day(df):
    """
    Group the schedule DataFrame by day (ordered: Senin to Jumat).

    Returns:
        OrderedDict: {
            'Senin': [(time_range, course_code_prefix, room_name), ...],
            ...
        }
    """
    day_order = ['Senin', 'Selasa', 'Rabu', 'Kamis', 'Jumat']
    temp = defaultdict(list)

    for _, row in df.iterrows():
        entry = (row['time_range'], row['course_code'][:6], row['room_name'])
        temp[row['day']].append(entry)

    # Sort each day's entries by start time
    def time_key(item):
        return int(item[0][:2])  # Extract hour from "HH.00-HH.00"

    result = OrderedDict()
    for day in day_order:
        if day in temp:
            result[day] = sorted(temp[day], key=time_key)

    return result


In [13]:
def check_duplicate_time_slot(df):
    return df['time_slot_id'].duplicated().any()

In [14]:
def has_same_subject_multiple_times_per_day(df):
    """
    Returns True if any subject (course prefix) appears more than once on the same day.

    Parameters:
    - df (pd.DataFrame): Must contain 'course_code' and 'day' columns.

    Returns:
    - bool: True if duplicate subject on the same day is found, False otherwise.
    """
    df = df.copy()
    df['course_prefix'] = df['course_code'].str[:6]

    duplicate_groups = (
        df.groupby(['course_prefix', 'day'])
        .filter(lambda group: len(group) > 1)
    )

    return not duplicate_groups.empty


In [15]:
def is_valid_schedule(df):
    """
    Check if a schedule is valid.

    A schedule is invalid if:
    - A student has multiple subjects at the same time slot.
    - The same subject appears multiple times in one day.

    Parameters:
    - df (pd.DataFrame): Filtered schedule for one class combination.

    Returns:
    - bool: True if valid, False otherwise.
    """
    if check_duplicate_time_slot(df):
        return False

    if has_same_subject_multiple_times_per_day(df):
        return False

    return True


In [16]:
def all_schedules_valid(solution_df, course_order, combinations, verbose=False):
    """
    Check if all combinations produce valid schedules.
    Optionally print valid schedules if verbose=True.
    Return False if any schedule is invalid, else True.

    Parameters:
    - combinations (list of tuples): Each tuple represents a class combination.
    - solution_df (pd.DataFrame): The full schedule data.
    - course_order (list): The order of course codes used to map combinations.
    - verbose (bool): Whether to print valid schedules.

    Returns:
    - bool: True if all combinations result in valid schedules, False if at least one is invalid.
    """
    if verbose:
        all_valid = True
        for combo in combinations:
            filtered_df = filter_classes_by_combination(solution_df, course_order, combo)
            valid = is_valid_schedule(filtered_df)
            
            if not valid:
                all_valid = False
            else:
                print("Valid Combination:", combo)
                ordered_schedule = group_schedule_by_day(filtered_df)
                for day, entries in ordered_schedule.items():
                    print(f"{day}:")
                    for entry in entries:
                        print(f"  {entry}")
                print()

        return all_valid
    else:
        for combo in combinations:
            filtered_df = filter_classes_by_combination(solution_df, course_order, combo)
            if not is_valid_schedule(filtered_df):
                return False
        return True


In [17]:
def describe_solution(solution_arr, course_df, course_code, time_slots, rooms, course_order, combinations):
    """
    Validate and describe a given guess array.

    Parameters:
    - solution_arr (np.ndarray): Guess array loaded beforehand.
    - course_df (pd.DataFrame): Course data frame for validation.
    - course_code (pd.DataFrame): Course code dataframe for preparing schedule.
    - time_slots (pd.DataFrame): Time slots dataframe for preparing schedule.
    - rooms (pd.DataFrame): Rooms dataframe for preparing schedule.
    - course_order (list): List of course codes defining course order.
    - combinations (list of tuples): List of class combinations.

    Prints validation results and used rooms count.
    """

    def get_used_rooms(solution_dict):
        used_rooms = set()
        for slots in solution_dict.values():
            if slots:
                _, room_id = slots[0]
                used_rooms.add(room_id)
        return sorted(used_rooms)

    is_valid = is_valid_guess(solution_arr, course_df)
    print("Is guess valid?", is_valid)

    schedule_df = prepare_schedule_df(solution_arr, course_code, time_slots, rooms)
    schedule_valid = all_schedules_valid(schedule_df, course_order, combinations)

    if schedule_valid:
        print("Schedule is good")
        solution_dict = array_to_dict_guess(solution_arr)
        used_rooms = get_used_rooms(solution_dict)
        print("Used rooms:", len(used_rooms))
    else:
        print("Schedule has conflict")

    print()


## Execution

In [116]:
fmipa = pd.read_csv('csv/fmipa.csv')
df = fmipa.copy()
df.drop(columns=["instructor", "day", "time"], inplace=True)

In [137]:
rooms = pd.read_csv('csv/rooms.csv')
big_rooms = rooms[rooms['capacity'] >= 100]
room_indexes = big_rooms['id'].to_list()

subjects = pd.read_csv('csv/subjects.csv')
time_slots = pd.read_csv('csv/time_slots.csv')
time_slot_indexes = time_slots['id'].to_list()

In [118]:
curriculum = pd.read_csv('csv/curriculum.csv')

curriculum_sessions = get_session_durations(df)
curriculum = curriculum.merge(curriculum_sessions, left_on='id', right_on='curriculum_id', how='left')
curriculum.drop(columns=['curriculum_id'], inplace=True)

curriculum = curriculum.merge(subjects, left_on='subject_id', right_on='id', how='left')
curriculum.drop(columns=['id_y'], inplace=True)
curriculum.rename(columns={"id_x": "id", "code": "subject_code", "subject": "subject_name"}, inplace=True)

curriculum

Unnamed: 0,id,faculty_id,subject_id,classes,session_1,session_2,subject_code,subject_name,credits
0,1,160,1,6,2,2,MA1201,Matematika IIA,4
1,2,160,2,6,2,2,FI1201,Fisika IIA,4
2,3,160,3,6,2,1,KI1201,Kimia IIA,3
3,4,160,4,5,2,1,KU1202,Pengantar Rekayasa dan Desain,3
4,5,160,5,5,2,0,KU1024,Bahasa Inggris,2


In [124]:
curriculum_class_map = get_curriculum_classes_map(curriculum)
curriculum_class_map

{1: 6, 2: 6, 3: 6, 4: 5, 5: 5}

In [121]:
curriculum.set_index('id')[['classes', 'session_1']].to_dict()

{'classes': {1: 6, 2: 6, 3: 6, 4: 5, 5: 5},
 'session_1': {1: 2, 2: 2, 3: 2, 4: 2, 5: 2}}

In [20]:
arrays = {col: fmipa[col].to_numpy() for col in fmipa.columns}

course_id = arrays["course_id"]
course_code = arrays["code"]
duration = arrays["duration"]
time_slot = arrays["time_slot"]
room_id = arrays["room_id"]

In [21]:
C = len(df)         # 51
T = len(time_slots) # 50
R = len(big_rooms)  # 54

# Generate solution
for i in range(10):
    export_to_txt(generate_valid_guess(df, T, R), "solutions", f"solution_{i+1}.txt")

AttributeError: 'tuple' object has no attribute 'size'

In [None]:
combinations = get_class_combinations([6, 6, 6, 5, 5])

# Order of courses matches combination order
course_order = ["MA1201", "FI1201", "KI1201", "KU1202", "KU1024"]

# Check if solution is valid
for i in range(10):
    print(f"Solution {i+1}")
    arr = import_from_txt("solutions", f"solution_{i+1}.txt")
    describe_solution(arr, fmipa, course_code, time_slots, rooms, course_order, combinations)

Solution 1
Is guess valid? True
Schedule has conflict

Solution 2
Is guess valid? True
Schedule has conflict

Solution 3
Is guess valid? True
Schedule has conflict

Solution 4
Is guess valid? True
Schedule has conflict

Solution 5
Is guess valid? True
Schedule has conflict

Solution 6
Is guess valid? True
Schedule has conflict

Solution 7
Is guess valid? True
Schedule has conflict

Solution 8
Is guess valid? True
Schedule has conflict

Solution 9
Is guess valid? True
Schedule has conflict

Solution 10
Is guess valid? True
Schedule has conflict

