In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import numpy as np
import pandas as pd
import pathlib
import cv2
from PIL import Image
import torch
import torch.nn as nn
import torchvision.transforms as tf_transforms
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import gdown
import os
import torch.optim as optim


In [17]:
# Load housing dataset
df = pd.read_csv('./AustinHousingData_Preprocessed.csv')

In [18]:
min_price = df['price'].min()
max_price = df['price'].max()

print(f" Minimum original price: {min_price}")
print(f" Maximum original price: {max_price}")

 Minimum original price: 79402.0705
 Maximum original price: 1954899.0775


In [4]:
# Normalize the prices (optional but recommended for regression)
df['price'] = (df['price'] - df['price'].mean()) / df['price'].std()

# Split the dataset
train_df, test_df = train_test_split(df, test_size=0.3, random_state=42)

# For the images
transform = tf_transforms.Compose([
        tf_transforms.RandomHorizontalFlip(),
        tf_transforms.RandomVerticalFlip(),
        tf_transforms.RandomRotation(degrees=15),
        tf_transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
        tf_transforms.Resize((224, 224)),
        tf_transforms.ToTensor(),
        tf_transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])


In [5]:
# CONSTANTS
num_epochs = 20
batch_size = 128

In [6]:
def image_generator(df, directory, input_shape, batch_size):
    files = df['homeImage'].values

    x_batch = []
    y_batch = []
    for file_name in files:
        file_path = os.path.join(directory, file_name)
        image = cv2.imread(file_path)
        image = cv2.resize(image, (input_shape[1], input_shape[0]))
        image_id = file_name
        price = df[df['homeImage'] == file_name]['price'].values[0]

        x_batch.append(image)
        y_batch.append(price)

        if len(x_batch) == batch_size:
            yield np.array(x_batch), np.array(y_batch)
            x_batch = []
            y_batch = []

    if x_batch:
        yield np.array(x_batch), np.array(y_batch)


class CustomDataset(Dataset):
    def __init__(self, df, directory, input_shape):
        self.df = df
        self.directory = directory
        self.input_shape = input_shape
        self.files = df['homeImage'].values  # KoristiMO homeImage kolonu iz dataframe-a

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        file_name = self.files[idx]
        file_path = os.path.join(self.directory, file_name)
        image = cv2.imread(file_path)
        image = cv2.resize(image, (self.input_shape[1], self.input_shape[0]))
        label = float(self.df[self.df['homeImage'] == file_name]['price'].values[0])
        return image, label


# Direktorijum za trening slika na Google Drive-u
image_dir = '/content/drive/MyDrive/homeImages'

data_generator = image_generator(df, image_dir, input_shape=(224, 224, 3), batch_size=batch_size)

In [7]:
# Koristimo next funkciju da izvučemo prvi batch iz generatora
#batch_images, batch_labels = next(data_generator)

# Ovde možete dodati dodatni kod za rad sa prvim batch-om ako je potrebno

# Ispisujemo oblik slike
#print("Shape of the first image in the batch:", batch_images[0].shape)

In [8]:
class ResNetRegression(nn.Module):
    def __init__(self, input_channels=3):
        super(ResNetRegression, self).__init__()
        resnet = models.resnet50(pretrained=True)
        # Modify the first convolutional layer to match the number of input channels
        resnet.conv1 = nn.Conv2d(input_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
        resnet.fc = nn.Sequential(
            nn.Linear(resnet.fc.in_features, 128),
            nn.ReLU(),
            nn.Linear(128, 1)
        )
        self.resnet = resnet

    def forward(self, x):
        return self.resnet(x)


In [9]:
# Instantiate the model, loss function, and optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ResNetRegression(input_channels=3).to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 162MB/s]


In [10]:
train_dataset = CustomDataset(train_df, image_dir, input_shape=(224, 224, 3))
test_dataset = CustomDataset(test_df, image_dir, input_shape=(224, 224, 3))

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [11]:
for epoch in range(num_epochs):
    model.train()
    for i, (inputs, targets) in enumerate(train_loader):
        inputs = inputs.permute(0, 3, 1, 2)  # Change the order of dimensions
        inputs = inputs.to(device, dtype=torch.float32)
        targets = targets.to(device, dtype=torch.float32)



        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), targets)
        loss.backward()
        optimizer.step()

    # Print or log the training loss for each epoch if needed
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}')


Epoch 1/20, Loss: 0.523365318775177
Epoch 2/20, Loss: 0.7665634751319885
Epoch 3/20, Loss: 0.7384809851646423
Epoch 4/20, Loss: 0.8153394460678101
Epoch 5/20, Loss: 0.8049691319465637
Epoch 6/20, Loss: 0.8987608551979065
Epoch 7/20, Loss: 0.4523463249206543
Epoch 8/20, Loss: 0.60481196641922
Epoch 9/20, Loss: 0.7951939105987549
Epoch 10/20, Loss: 0.5732152462005615
Epoch 11/20, Loss: 0.416111022233963
Epoch 12/20, Loss: 0.6483496427536011
Epoch 13/20, Loss: 0.4671533703804016
Epoch 14/20, Loss: 0.2679119110107422
Epoch 15/20, Loss: 0.2608918845653534
Epoch 16/20, Loss: 0.2516487240791321
Epoch 17/20, Loss: 0.22516795992851257
Epoch 18/20, Loss: 0.17678454518318176
Epoch 19/20, Loss: 0.14388421177864075
Epoch 20/20, Loss: 0.09093771874904633


In [12]:
from sklearn.metrics import mean_squared_error, median_absolute_error, explained_variance_score

def evaluate_model(model, criterion, data_loader, device):
    model.eval()
    with torch.no_grad():
        all_predictions = []
        all_targets = []
        total_loss = 0.0
        num_batches = len(data_loader)

        for inputs, targets in data_loader:
            inputs = inputs.permute(0, 3, 1, 2)  # Change the order of dimensions
            inputs = inputs.to(device, dtype=torch.float32)
            targets = targets.to(device, dtype=torch.float32)

            outputs = model(inputs)
            loss = criterion(outputs.squeeze(), targets)
            total_loss += loss.item()

            all_predictions.append(outputs.squeeze().cpu().numpy())
            all_targets.append(targets.cpu().numpy())

        # Concatenate predictions and targets
        all_predictions = np.concatenate(all_predictions)
        all_targets = np.concatenate(all_targets)

        # Calculate metrics
        mse = mean_squared_error(all_targets, all_predictions)
        medae = median_absolute_error(all_targets, all_predictions)
        explained_variance = explained_variance_score(all_targets, all_predictions)

        # Print or log the test loss and metrics if needed
        average_loss = total_loss / num_batches
        print(f'Test Loss: {average_loss}')
        print(f'Mean Squared Error (MSE): {mse}')
        print(f'Median Absolute Error (MedAE): {medae}')
        print(f'Explained Variance Score: {explained_variance}')

        return average_loss, mse, medae, explained_variance

def test_model_predictions(model, data_loader, device, num_examples):
    model.eval()
    with torch.no_grad():
        for i, (inputs, targets) in enumerate(data_loader):
            inputs = inputs.permute(0, 3, 1, 2)  # Change the order of dimensions
            inputs = inputs.to(device, dtype=torch.float32)
            targets = targets.to(device, dtype=torch.float32)

            outputs = model(inputs)
            predicted_prices = outputs.squeeze().cpu().numpy()

            # Print or log the predicted prices if needed
            for predicted_price, target_price in zip(predicted_prices, targets.cpu().numpy()):
                print(f'Predicted Price: {predicted_price}, Actual Price: {target_price}')

            # Break the loop after printing the specified number of examples
            if i + 1 >= num_examples:
                break


In [13]:
# Evaluacija modela na test setu
test_loss, test_mse, test_medae, test_explained_variance = evaluate_model(model, criterion, test_loader, device)

Test Loss: 0.9315732525240991
Mean Squared Error (MSE): 0.9259281158447266
Median Absolute Error (MedAE): 0.4868400990962982
Explained Variance Score: 0.03963559865951538


In [14]:
# Testiranje modela na prvih 25 primera
test_model_predictions(model, test_loader, device, num_examples=25)

Predicted Price: -0.5955552458763123, Actual Price: -0.1579548716545105
Predicted Price: -0.8563575744628906, Actual Price: -1.1532766819000244
Predicted Price: 0.7119724154472351, Actual Price: 3.1906216144561768
Predicted Price: -0.290322482585907, Actual Price: 0.7718085646629333
Predicted Price: -0.7416175007820129, Actual Price: -0.2700022757053375
Predicted Price: -0.836172342300415, Actual Price: -0.8860295414924622
Predicted Price: -0.9575719237327576, Actual Price: -0.6817747950553894
Predicted Price: -0.20214711129665375, Actual Price: -1.1442174911499023
Predicted Price: -0.14124353229999542, Actual Price: -0.6144955158233643
Predicted Price: -0.054037611931562424, Actual Price: -1.2241090536117554
Predicted Price: -0.2518518269062042, Actual Price: -1.3120273351669312
Predicted Price: -0.5025056004524231, Actual Price: -0.6596479415893555
Predicted Price: -0.29721733927726746, Actual Price: -1.1107652187347412
Predicted Price: 0.7079096436500549, Actual Price: 0.01118094939

In [19]:
torch.save(model.state_dict(), '/content/drive/MyDrive/Price-Image-Predictor.pth')

In [16]:
import joblib

joblib.dump(model, 'Image_price_predictor.pkl')


['Image_price_predictor.pkl']