In [11]:
import sys
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer


In [8]:
# 1. Load data
data = pd.read_csv(r"C:\Users\yunpi\OneDrive\Desktop\Code practice\dlsp-portfolio-starter-code\dlsp-portfolio-starter-code\.ipynb_checkpoints\cover_data.csv")

data.info()
data.describe()
print(data.head(5))

<class 'pandas.DataFrame'>
RangeIndex: 581012 entries, 0 to 581011
Data columns (total 55 columns):
 #   Column                              Non-Null Count   Dtype
---  ------                              --------------   -----
 0   Elevation                           581012 non-null  int64
 1   Aspect                              581012 non-null  int64
 2   Slope                               581012 non-null  int64
 3   Horizontal_Distance_To_Hydrology    581012 non-null  int64
 4   Vertical_Distance_To_Hydrology      581012 non-null  int64
 5   Horizontal_Distance_To_Roadways     581012 non-null  int64
 6   Hillshade_9am                       581012 non-null  int64
 7   Hillshade_Noon                      581012 non-null  int64
 8   Hillshade_3pm                       581012 non-null  int64
 9   Horizontal_Distance_To_Fire_Points  581012 non-null  int64
 10  Wilderness_Area1                    581012 non-null  int64
 11  Wilderness_Area2                    581012 non-null  int64
 12 

In [None]:
# 2. Split train and test data
TARGET_COL = "class"

x = data.drop(columns=[TARGET_COL])
y = data[TARGET_COL]

#  Train / Val / Test split (80/10/10)
x_train, x_temp, y_train, y_temp = train_test_split(
    x, y, test_size=0.2, random_state=42
)
x_val, x_test, y_val, y_test = train_test_split(
    x_temp, y_temp, test_size=0.5, random_state=42
)


In [12]:
# 3. Preprocessing
numeric_cols = x.select_dtypes(include=['int64', 'float64']).columns
cat_cols = x.select_dtypes(include=['object']).columns

preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), numeric_cols),
        ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
    ]
)

x_train_processed = preprocessor.fit_transform(x_train)
x_val_processed = preprocessor.transform(x_val)
x_test_processed = preprocessor.transform(x_test)

In [14]:
# 4. Model building
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(x_train_processed.shape[1],)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
]
)

# 5. Model compiling
model.compile(
    optimizer='adam',
    loss='mse',
    metrics=['mae']
)

# 6. Model training
history = model.fit(
    x_train_processed,
    y_train,
    validation_data=(x_val_processed, y_val),
    epochs=50,
    batch_size=32
)

# Model evaluating
test_metrics = model.evaluate(x_test_processed, y_test)
print("Test metrics: ", dict(zip(model.metrics_names, test_metrics)))

Epoch 1/50
[1m14526/14526[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 949us/step - loss: 1.0144 - mae: 0.6193 - val_loss: 0.8286 - val_mae: 0.5448
Epoch 2/50
[1m14526/14526[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 837us/step - loss: 0.7861 - mae: 0.5410 - val_loss: 0.6934 - val_mae: 0.5101
Epoch 3/50
[1m14526/14526[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 681us/step - loss: 0.7140 - mae: 0.5151 - val_loss: 0.6371 - val_mae: 0.4749
Epoch 4/50
[1m14526/14526[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 615us/step - loss: 0.6709 - mae: 0.4974 - val_loss: 0.6042 - val_mae: 0.4600
Epoch 5/50
[1m14526/14526[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 606us/step - loss: 0.6395 - mae: 0.4842 - val_loss: 0.5631 - val_mae: 0.4466
Epoch 6/50
[1m14526/14526[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 602us/step - loss: 0.6179 - mae: 0.4753 - val_loss: 0.5555 - val_mae: 0.4357
Epoch 7/50
[1m14526/14526[0m [32m━━━━━━━━━━━━━