**Adding length of each session to student profiles**


In [2]:
import pandas as pd

In [None]:
# Load existing enriched profiles
csv_path="../data/processed"
profiles = pd.read_csv(f'{csv_path}/oulad_media_profiles_full.csv')

In [11]:
# Recompute average session duration per user
path="../data/raw/OULAD"
vle= pd.read_csv(f"{path}/vle.csv")
student_vle = pd.read_csv(f"{path}/studentVle.csv")

In [12]:
# Merge to get dates
df = student_vle.merge(vle[['id_site']], on='id_site', how='left')
df['date'] = pd.to_datetime(df['date'])

In [13]:
# Sum clicks per session (per user-date)
session = (
    df
    .groupby(['id_student', 'date'])['sum_click']
    .sum()
    .reset_index(name='clicks_per_session')
)


In [15]:
# Approximate duration (minutes): assume 0.5 min per click
session['duration_min'] = session['clicks_per_session'] * 0.5

# Compute average session duration per user
user_sessions = (
    session
    .groupby('id_student')['duration_min']
    .mean()
    .reset_index(name='avg_session_duration_min'))

In [16]:
# Merge session info into profiles
merged = profiles.merge(
    user_sessions,
    on='id_student',
    how='left'
)

In [17]:
# Save the augmented profiles
out_path = "../data/processed/oulad_media_profiles_full_with_sessions.csv"
merged.to_csv(out_path, index=False)


In [18]:
# Preview
print("Augmented profiles sample:")
print(merged.head())

Augmented profiles sample:
   id_student  course_count  other_count  reading_count  video_count  \
0        6516          2008          497            286            0   
1        8462           172          203            268           13   
2        8462           172          203            268           13   
3       11391           759          138             37            0   
4       23629           120           36              5            0   

   course_prop  other_prop  reading_prop  video_prop gender  \
0     0.719455    0.178072      0.102472    0.000000      M   
1     0.262195    0.309451      0.408537    0.019817      M   
2     0.262195    0.309451      0.408537    0.019817      M   
3     0.812634    0.147752      0.039615    0.000000      M   
4     0.745342    0.223602      0.031056    0.000000      F   

                region   highest_education imd_band age_band  \
0             Scotland    HE Qualification   80-90%     55<=   
1        London Region    HE Qual