In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np


In [3]:
data = pd.read_csv('housing.csv')

# Map string variables to binary values
variable_list = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']

def binary_mapping(x):
  return x.map({'no' : 0, 'yes' : 1})

data[variable_list] = data[variable_list].apply(binary_mapping)
data = data.drop('furnishingstatus', axis=1)

# Assuming the target variable is 'housing_value', adjust accordingly
y = data['price'].values
data = data.drop('price', axis=1)
x = data.values

# Split the dataset into training and validation sets
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.8, random_state=42)

# Standardize the input features
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_val = scaler.transform(x_val)

# Standardize the output features
scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1)).flatten()
y_val_scaled = scaler_y.transform(y_val.reshape(-1, 1)).flatten()

# Convert data to PyTorch tensors
x_train_tensor = torch.tensor(x_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
x_val_tensor = torch.tensor(x_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)

# Create DataLoader for training and validation sets
train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

val_dataset = TensorDataset(x_val_tensor, y_val_tensor)
val_loader = DataLoader(val_dataset, batch_size=32)

# Define the model
model = nn.Sequential(
    nn.Linear(x_train.shape[1], 32),
    nn.Tanh(),
    nn.Linear(32, 1)
)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)

# Training loop
epochs = 5000
for epoch in range(epochs+1):
    model.train()
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), targets)
        loss.backward()
        optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        val_outputs = model(x_val_tensor)
        val_loss = criterion(val_outputs.squeeze(), y_val_tensor)
        if epoch % 500 == 0:
          print(f'Epoch {epoch}/{epochs}, Training Loss: {loss.item()}, Validation Loss: {val_loss.item()}')

Epoch 0/5000, Training Loss: 20573526687744.0, Validation Loss: 22797747224576.0
Epoch 500/5000, Training Loss: 2143041880064.0, Validation Loss: 2832338255872.0
Epoch 1000/5000, Training Loss: 1162080681984.0, Validation Loss: 2340031037440.0
Epoch 1500/5000, Training Loss: 1586125209600.0, Validation Loss: 2089591111680.0
Epoch 2000/5000, Training Loss: 1380664737792.0, Validation Loss: 1953160495104.0
Epoch 2500/5000, Training Loss: 2240179339264.0, Validation Loss: 1877811920896.0
Epoch 3000/5000, Training Loss: 902428622848.0, Validation Loss: 1838378647552.0
Epoch 3500/5000, Training Loss: 1059672621056.0, Validation Loss: 1826636431360.0
Epoch 4000/5000, Training Loss: 1015589765120.0, Validation Loss: 1816909971456.0
Epoch 4500/5000, Training Loss: 920949555200.0, Validation Loss: 1811508363264.0
Epoch 5000/5000, Training Loss: 1255986692096.0, Validation Loss: 1804847022080.0
