In [None]:
%pip install ucimlrepo

Collecting ucimlrepo
  Downloading ucimlrepo-0.0.7-py3-none-any.whl.metadata (5.5 kB)
Downloading ucimlrepo-0.0.7-py3-none-any.whl (8.0 kB)
Installing collected packages: ucimlrepo
Successfully installed ucimlrepo-0.0.7


In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
from ucimlrepo import fetch_ucirepo

# fetch dataset
automobile = fetch_ucirepo(id=10)

# data (as pandas dataframes)
X = automobile.data.features
y = automobile.data.targets

In [None]:
data = pd.concat([X, y], axis=1)
data.head()

Unnamed: 0,price,highway-mpg,city-mpg,peak-rpm,horsepower,compression-ratio,stroke,bore,fuel-system,engine-size,...,wheel-base,engine-location,drive-wheels,body-style,num-of-doors,aspiration,fuel-type,make,normalized-losses,symboling
0,13495.0,27,21,5000.0,111.0,9.0,2.68,3.47,mpfi,130,...,88.6,front,rwd,convertible,2.0,std,gas,alfa-romero,,3
1,16500.0,27,21,5000.0,111.0,9.0,2.68,3.47,mpfi,130,...,88.6,front,rwd,convertible,2.0,std,gas,alfa-romero,,3
2,16500.0,26,19,5000.0,154.0,9.0,3.47,2.68,mpfi,152,...,94.5,front,rwd,hatchback,2.0,std,gas,alfa-romero,,1
3,13950.0,30,24,5500.0,102.0,10.0,3.4,3.19,mpfi,109,...,99.8,front,fwd,sedan,4.0,std,gas,audi,164.0,2
4,17450.0,22,18,5500.0,115.0,8.0,3.4,3.19,mpfi,136,...,99.4,front,4wd,sedan,4.0,std,gas,audi,164.0,2


# Data Preparation

In [None]:
# Data Preprocessing
#1. Hanle missing value
data = data.replace('?', np.nan)  # Replace '?' with NaN
data = data.dropna()  # Drop rows with NaN values (or use another method to fill)

In [None]:
# 2. Convert categorical features to numerical (if needed)
data['fuel-type'] = data['fuel-type'].map({'gas': 1, 'diesel': 0})

In [None]:
import pandas as pd
# 2.1 Encoding categorical variables using one-hot encoding
categorical_features = ['make','aspiration', 'num-of-doors', 'body-style', 'drive-wheels', 'engine-location', 'engine-type', 'num-of-cylinders', 'fuel-system']
data = pd.get_dummies(data, columns=categorical_features, dummy_na=False)


In [None]:
# 3. Select relevant features and target
# Misalnya, menggunakan kolom 'price' sebagai target
X = data.drop(['price'], axis=1)  # Feature set
y = data['price']  # Target variable

In [None]:
# 4. Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# 5. Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)  # Reshape for PyTorch
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)  # Reshape for PyTorch

# Modeling

In [None]:
# Define a simple regression neural network
class SimpleRegression(nn.Module):
    def __init__(self, inputSize, outputSize):
        super(SimpleRegression, self).__init__()
        self.linear = torch.nn.Linear(inputSize, outputSize)

    def forward(self, x):
        output = self.linear(x)
        return output

In [None]:
# Initialize model, loss function, and optimizer
inputSize = X_train_tensor.shape[1]
outputSize = 1  # For regression
model = SimpleRegression(inputSize, outputSize)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Training the model
num_epochs = 100
for epoch in range(num_epochs):
    # Forward pass
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.3f}')

Epoch [1/100], Loss: 173552256.000
Epoch [2/100], Loss: 173552000.000
Epoch [3/100], Loss: 173551712.000
Epoch [4/100], Loss: 173551472.000
Epoch [5/100], Loss: 173551200.000
Epoch [6/100], Loss: 173550944.000
Epoch [7/100], Loss: 173550656.000
Epoch [8/100], Loss: 173550400.000
Epoch [9/100], Loss: 173550112.000
Epoch [10/100], Loss: 173549856.000
Epoch [11/100], Loss: 173549584.000
Epoch [12/100], Loss: 173549328.000
Epoch [13/100], Loss: 173549072.000
Epoch [14/100], Loss: 173548800.000
Epoch [15/100], Loss: 173548496.000
Epoch [16/100], Loss: 173548240.000
Epoch [17/100], Loss: 173547968.000
Epoch [18/100], Loss: 173547712.000
Epoch [19/100], Loss: 173547440.000
Epoch [20/100], Loss: 173547184.000
Epoch [21/100], Loss: 173546912.000
Epoch [22/100], Loss: 173546640.000
Epoch [23/100], Loss: 173546368.000
Epoch [24/100], Loss: 173546096.000
Epoch [25/100], Loss: 173545824.000
Epoch [26/100], Loss: 173545568.000
Epoch [27/100], Loss: 173545296.000
Epoch [28/100], Loss: 173545024.000
E

In [None]:
# Evaluate the model
model.eval()
with torch.no_grad():
    test_outputs = model(X_test_tensor)
    test_loss = criterion(test_outputs, y_test_tensor)
    print(f'Test Loss: {test_loss.item():.3f}')

Test Loss: 132710208.000
