In [3]:
import numpy as np 
import pandas as pd

def impute_other_fields(dataframes: dict, stored_value=None):
    if 'vle' in dataframes:
        dataframes['vle'] = dataframes['vle'].drop(columns=[col for col in ['week_from', 'week_to'] if col in dataframes['vle']])
    
    if 'assessments' in dataframes:
        dataframes['assessments'] = dataframes['assessments'].drop(columns=[col for col in ['date'] if col in dataframes['assessments']])
    
    if 'student_reg' in dataframes:
        df_student_registration = dataframes['student_reg']
        if 'date_registration' in df_student_registration:
            if stored_value is not None:
                # Uses the stored median to fill missing values
                df_student_registration['date_registration'] = df_student_registration['date_registration'].fillna(stored_value)
            else:
                # Computes median from train and fill missing
                median_val = df_student_registration['date_registration'].median()
                df_student_registration['date_registration'] = df_student_registration['date_registration'].fillna(median_val)
                # Returning this median for storing and reusing later
        if 'date_unregistration' in df_student_registration:
            df_student_registration['date_unregistration'] = df_student_registration['date_unregistration'].fillna(999)
    
    return dataframes, median_val

def impute_with_constant(dataframes: dict):
    if 'student_info' in dataframes:
         df_student_info = dataframes['student_info']
         df_student_info['imd_band'] = (
    df_student_info['imd_band']
    .replace([np.nan, None, 'nan', 'None', '', 'NA', 'na'], "Missing")
    .astype(str))
    
    if 'student_assessment' in dataframes:
         df_student_assessment = dataframes['student_assessment']
         df_student_assessment['score'] = df_student_assessment['score'].fillna(-1)
    
    return impute_other_fields(dataframes)

def impute_with_median(dataframes: dict, stored_values=None):
    if 'student_info' in dataframes:
        df_student_info = dataframes['student_info']
        if 'imd_band' in df_student_info:
            if stored_values and 'imd_band_mode' in stored_values:
                imd_mode = stored_values['imd_band_mode']
            else:
                imd_mode = df_student_info['imd_band'].mode()[0]
            df_student_info['imd_band'] = df_student_info['imd_band'].fillna(imd_mode)
    else:
        imd_mode = None

    if 'student_assessment' in dataframes:
        df_student_assessment = dataframes['student_assessment']
        if 'score' in df_student_assessment:
            if stored_values and 'score_median' in stored_values:
                score_median = stored_values['score_median']
            else:
                score_median = df_student_assessment['score'].median()
            df_student_assessment['score'] = df_student_assessment['score'].fillna(score_median)
    else:
        score_median = None

    dataframes, date_reg_median = impute_other_fields(dataframes, 
                                                     stored_value=(stored_values.get('date_reg_median') if stored_values else None))

    # Returning dataframes + dictionary of medians/modes for storing for val/test
    return dataframes, {
        'imd_band_mode': imd_mode,
        'score_median': score_median,
        'date_reg_median': date_reg_median
    }

