In [28]:
import os
import pickle
import pandas as pd
from ucimlrepo import fetch_ucirepo


def fetch_students_dropout_data():
    local_file_path = "./data/students_dropout_data.pkl"

    if not os.path.exists(local_file_path):
        # Fetch dataset from UCIMLRepo
        dataset = fetch_ucirepo(id=697)

        if dataset:
            # Save the dataset to a local file using pickle
            with open(local_file_path, "wb") as file:
                pickle.dump(dataset, file)
            print("Data saved to:", local_file_path)
        else:
            print("Failed to fetch data from UCIMLRepo.")
            return None
    else:
        # Load the dataset from the local file using pickle
        with open(local_file_path, "rb") as file:
            dataset = pickle.load(file)

    return dataset


# Example usage
dataset = fetch_students_dropout_data()
if dataset is not None:
    print("Dataset loaded successfully.")
    print("Features DataFrame:")
    x, y = dataset["data"]["features"], dataset["data"]["targets"]
    display(x)

Dataset loaded successfully.
Features DataFrame:


Unnamed: 0,Marital Status,Application mode,Application order,Course,Daytime/evening attendance,Previous qualification,Previous qualification (grade),Nacionality,Mother's qualification,Father's qualification,...,Curricular units 1st sem (without evaluations),Curricular units 2nd sem (credited),Curricular units 2nd sem (enrolled),Curricular units 2nd sem (evaluations),Curricular units 2nd sem (approved),Curricular units 2nd sem (grade),Curricular units 2nd sem (without evaluations),Unemployment rate,Inflation rate,GDP
0,1,17,5,171,1,1,122.0,1,19,12,...,0,0,0,0,0,0.000000,0,10.8,1.4,1.74
1,1,15,1,9254,1,1,160.0,1,1,3,...,0,0,6,6,6,13.666667,0,13.9,-0.3,0.79
2,1,1,5,9070,1,1,122.0,1,37,37,...,0,0,6,0,0,0.000000,0,10.8,1.4,1.74
3,1,17,2,9773,1,1,122.0,1,38,37,...,0,0,6,10,5,12.400000,0,9.4,-0.8,-3.12
4,2,39,1,8014,0,1,100.0,1,37,38,...,0,0,6,6,6,13.000000,0,13.9,-0.3,0.79
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4419,1,1,6,9773,1,1,125.0,1,1,1,...,0,0,6,8,5,12.666667,0,15.5,2.8,-4.06
4420,1,1,2,9773,1,1,120.0,105,1,1,...,0,0,6,6,2,11.000000,0,11.1,0.6,2.02
4421,1,1,1,9500,1,1,154.0,1,37,37,...,0,0,8,9,1,13.500000,0,13.9,-0.3,0.79
4422,1,1,1,9147,1,1,180.0,1,37,37,...,0,0,5,6,5,12.000000,0,9.4,-0.8,-3.12


In [75]:
from tensorflow.keras import layers
from tensorflow import keras
import numpy as np

keras.backend.clear_session()


categorical = [
    "Marital Status",
    "Application mode",
    "Daytime/evening attendance",
    "Previous qualification",
    "Nacionality",
    "Mother's qualification",
    "Father's qualification",
    "Mother's occupation",
    "Father's occupation",
    "Displaced",
    "Educational special needs",
    "Debtor",
    "Tuition fees up to date",
    "Gender",
    "Scholarship holder",
    "International",
    "Course",
]

numeric = [
    "Age at enrollment",
    "Curricular units 1st sem (credited)",
    "Curricular units 1st sem (enrolled)",
    "Curricular units 1st sem (evaluations)",
    "Curricular units 1st sem (approved)",
    "Curricular units 1st sem (grade)",
    "Curricular units 1st sem (without evaluations)",
    "Curricular units 2nd sem (credited)",
    "Curricular units 2nd sem (enrolled)",
    "Curricular units 2nd sem (evaluations)",
    "Curricular units 2nd sem (approved)",
    "Curricular units 2nd sem (grade)",
    "Curricular units 2nd sem (without evaluations)",
    "Unemployment rate",
    "Inflation rate",
    "GDP",
    "Application order",
    "Admission grade",
    "Previous qualification (grade)",
]


x_cat = x[categorical].values
x_numeric = x[numeric].values
y_enc = np.where(y.values == "Dropout", 1, -1)

cats = keras.Input(shape=(x_cat.shape[1],),name="categorical_vals")
nums = keras.Input(shape=(x_numeric.shape[1],), name="numeric_vals")

norm_layer = layers.Normalization()
norm_layer.adapt(x_numeric)
nums_normalized = norm_layer(nums)

inputs = [cats, nums_normalized]

features = layers.Concatenate()(inputs)

features = layers.Dense(64, activation='relu')(features)

dropout = layers.Dense(1, activation='sigmoid')(features)
outputs = [dropout]
model = keras.Model(inputs=inputs, outputs=outputs)
model.compile(
    optimizer="rmsprop",
    loss=["mean_squared_error", "categorical_crossentropy"],
    metrics=[["mean_absolute_error"], ["accuracy"]],
)

model.fit([x_cat, x_numeric], [dropout], epochs=1)

TypeError: int() argument must be a string, a bytes-like object or a real number, not 'NoneType'