Combine Scheduled data and Preference data 

In [None]:
import pandas as pd

# Load datasets
schedule_df = pd.read_csv('../data/schedule.csv')
preference_df = pd.read_csv('../data/preference.csv')

# Standardize column names for merging
if 'preferred_shift' in preference_df.columns:
    preference_df = preference_df.rename(columns={'preferred_shift': 'shift'})
if 'preferred_ward' in preference_df.columns:
    preference_df = preference_df.rename(columns={'preferred_ward': 'ward'})

# Label assignments: 1 for assigned (in schedule), 0 for not assigned (only in preferences)
schedule_df['label'] = 1

# Only keep preference rows NOT already assigned in schedule
merge_keys = ['nurse_id', 'date', 'shift', 'ward']
pref_unassigned = pd.merge(
    preference_df, 
    schedule_df[merge_keys], 
    on=merge_keys, 
    how='left', 
    indicator=True
)
pref_unassigned = pref_unassigned[pref_unassigned['_merge'] == 'left_only'].drop(columns=['_merge'])
pref_unassigned['label'] = 0

# Concatenate into one edge list
edge_df = pd.concat([schedule_df, pref_unassigned], ignore_index=True)

# Optional: engineer additional features for GAT here!
# Example: edge_df['is_preference'] = (edge_df['label'] == 0).astype(int)

# Check result
print(edge_df.head())
print("Total edges:", len(edge_df))
print(edge_df['label'].value_counts())

# edge_df.to_csv('../../data/edges_for_gat.csv', index=False)


  nurse_id        date ward  shift  duration_hours  week start_time end_time  \
0     N018  2025-06-30    A  Flex1               4  27.0      14:00    18:00   
1     N035  2025-06-30    A  Flex2               5  27.0      12:00    17:00   
2     N003  2025-06-30    A  Flex4               7  27.0      12:00    19:00   
3     N007  2025-06-30    A  Flex4               7  27.0      12:00    19:00   
4     N031  2025-06-30    A  Flex4               7  27.0      12:00    19:00   

   label  
0      1  
1      1  
2      1  
3      1  
4      1  
Total edges: 3062
label
1    1942
0    1120
Name: count, dtype: int64


OSError: Cannot save file into a non-existent directory: '..\..\data'