In [None]:
from google.colab import drive
drive.mount('/content/drive')
import pandas as pd

import zipfile
zip_path = "/content/drive/MyDrive/student_data.zip"
extract_dir = "/content/student_data"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

df = pd.read_csv("/content/student_data/StudentsPerformance.csv")


def preprocess_and_normalize_all(df, target_col, output_path="processed_dataset_normalized.csv"):
    # Drop rows with missing target
    df = df.dropna(subset=[target_col])

    # Separate features and target
    X = df.drop(columns=[target_col])
    y = df[target_col]

    # Fill missing values in features
    for col in X.columns:
        if X[col].dtype == 'object' or X[col].dtype.name == 'category':
            X[col] = X[col].fillna("missing")
        else:
            X[col] = X[col].fillna(X[col].mean())

    # Encode categorical variables
    X_encoded = pd.get_dummies(X, drop_first=True)

    # Combine with target column
    full_df = pd.concat([X_encoded, y.rename(target_col)], axis=1)

    # Convert booleans to integers
    full_df = full_df.applymap(lambda x: int(x) if isinstance(x, bool) else x)

    # Normalize all columns to [0, 1]
    full_df = full_df.apply(lambda col: (col - col.min()) / (col.max() - col.min()))

    # Convert to float32
    full_df = full_df.astype('float32')

    # Save to CSV
    full_df.to_csv(output_path, index=False)

    return full_df


processed_df = preprocess_and_normalize_all(df, target_col="math score", output_path="/content/drive/MyDrive/students_normalized2.csv")

print(processed_df)



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
     reading score  writing score  gender_male  race/ethnicity_group B  \
0         0.662651       0.711111          0.0                     1.0   
1         0.879518       0.866667          0.0                     0.0   
2         0.939759       0.922222          0.0                     1.0   
3         0.481928       0.377778          1.0                     0.0   
4         0.734940       0.722222          1.0                     0.0   
..             ...            ...          ...                     ...   
995       0.987952       0.944444          0.0                     0.0   
996       0.457831       0.500000          1.0                     0.0   
997       0.650602       0.611111          0.0                     0.0   
998       0.734940       0.744444          0.0                     0.0   
999       0.831325       0.844444          0.0           

  full_df = full_df.applymap(lambda x: int(x) if isinstance(x, bool) else x)
