In [2]:
import pandas as pd, numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer


In [3]:
# load
df = pd.read_csv("house_prediction_cleaned.csv")
target = "SalePrice"
X = df.drop(columns=[target])
y = df[target].values

In [4]:

# numeric-only pipeline (adjust if you have categoricals)
num_cols = X.select_dtypes(include=[np.number]).columns.tolist()
num_pipe = Pipeline([("imp", SimpleImputer(strategy="median")), ("sc", StandardScaler())])
pre = ColumnTransformer([("num", num_pipe, num_cols)])




In [5]:
X_proc = pre.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_proc, y, test_size=0.2, random_state=42)

In [6]:
# reshape for Conv1D: (samples, timesteps, channels) -> timesteps = n_features
n_features = X_train.shape[1]
X_train = X_train.reshape((X_train.shape[0], n_features, 1))
X_test = X_test.reshape((X_test.shape[0], n_features, 1))

In [7]:
import tensorflow as tf
tf.random.set_seed(42)

In [8]:

model = tf.keras.Sequential([
    tf.keras.layers.Conv1D(32, kernel_size=3, activation='relu', input_shape=(n_features,1)),
    tf.keras.layers.MaxPooling1D(2),
    tf.keras.layers.Conv1D(64, kernel_size=3, activation='relu'),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1)
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [10]:
# ----- FIX SHAPE FOR CONV1D -----
n_features = X_train.shape[1]

X_train = X_train.reshape((X_train.shape[0], n_features, 1))
X_test  = X_test.reshape((X_test.shape[0], n_features, 1))


In [15]:

# ----- BUILD 1D-CNN -----
import tensorflow as tf


model = tf.keras.Sequential([
    tf.keras.layers.Conv1D(32, kernel_size=2, activation='relu', input_shape=(n_features, 1)),
    tf.keras.layers.MaxPooling1D(1),
    tf.keras.layers.Conv1D(64, kernel_size=2, activation='relu'),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1)
])


In [16]:

model.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [17]:
# ----- TRAIN -----
history = model.fit(
    X_train, y_train,
    validation_split=0.1,
    epochs=50,
    batch_size=16
)


Epoch 1/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 74ms/step - loss: 63764717568.0000 - mae: 245464.3750 - val_loss: 52044251136.0000 - val_mae: 220934.1250
Epoch 2/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 63764295680.0000 - mae: 245463.5312 - val_loss: 52043735040.0000 - val_mae: 220932.9844
Epoch 3/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 23ms/step - loss: 63763365888.0000 - mae: 245461.6719 - val_loss: 52042620928.0000 - val_mae: 220930.4844
Epoch 4/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - loss: 63761379328.0000 - mae: 245457.7812 - val_loss: 52040347648.0000 - val_mae: 220925.3906
Epoch 5/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - loss: 63757451264.0000 - mae: 245450.0625 - val_loss: 52036030464.0000 - val_mae: 220915.7812
Epoch 6/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - loss: 63

In [19]:
y_pred = model.predict(X_test).flatten()


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 126ms/step


In [21]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("RMSE:", rmse)
print("MAE:", mae)
print("R2:", r2)


RMSE: 66336.82259471001
MAE: 60582.901041666664
R2: -0.04975826475727785
