In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import numpy as np
import pandas as pd
import pathlib
import cv2
from PIL import Image
import torch
import torch.nn as nn
import torchvision.transforms as tf_transforms
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import gdown
import os
import torch.optim as optim


In [3]:
# Load housing dataset
df = pd.read_csv('./AustinHousingData_Preprocessed.csv')

In [4]:
# Normalize the prices (optional but recommended for regression)
df['price'] = (df['price'] - df['price'].mean()) / df['price'].std()

# Split the dataset
train_df, test_df = train_test_split(df, test_size=0.3, random_state=42)

# Define transformations
transform = tf_transforms.Compose([
    tf_transforms.Resize((224, 224)),
    tf_transforms.ToTensor(),
    tf_transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [5]:
# Load housing dataset
df = pd.read_csv('./AustinHousingData_Preprocessed.csv')

# Funkcija za učitavanje slika
def image_generator(df, directory, input_shape, batch_size):
    files = df['homeImage'].values

    x_batch = []
    y_batch = []
    for file_name in files:
        file_path = os.path.join(directory, file_name)
        image = cv2.imread(file_path)
        image = cv2.resize(image, (input_shape[1], input_shape[0]))
        print(image.shape)
        # Transpose the image to have channels as the last dimension
        #image = image.transpose((2, 0, 1))  # assuming channels last, change if needed
        #print(image.shape)
        image_id = file_name
        price = df[df['homeImage'] == file_name]['price'].values[0]

        x_batch.append(image)
        y_batch.append(price)

        if len(x_batch) == batch_size:
            yield np.array(x_batch), np.array(y_batch)
            x_batch = []
            y_batch = []

    if x_batch:
        yield np.array(x_batch), np.array(y_batch)


# Klasa za dataset
class CustomDataset(Dataset):
    def __init__(self, df, directory, input_shape):
        self.df = df
        self.directory = directory
        self.input_shape = input_shape
        self.files = df['homeImage'].values  # Koristite homeImage kolonu iz dataframe-a

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        file_name = self.files[idx]
        file_path = os.path.join(self.directory, file_name)
        image = cv2.imread(file_path)
        image = cv2.resize(image, (self.input_shape[1], self.input_shape[0]))
        label = float(self.df[self.df['homeImage'] == file_name]['price'].values[0])
        return image, label


# Direktorijum za trening slika na Google Drive-u
image_dir = '/content/drive/MyDrive/homeImages'

# Učitavanje podataka koristeći generator sa batch-ovima
num_epochs = 10  # Prilagodite broj epoha prema vašim potrebama
batch_size = 128
data_generator = image_generator(df, image_dir, input_shape=(224, 224, 3), batch_size=batch_size)


In [6]:
# Koristimo next funkciju da izvučemo prvi batch iz generatora
#batch_images, batch_labels = next(data_generator)

# Ovde možete dodati dodatni kod za rad sa prvim batch-om ako je potrebno

# Ispisujemo oblik slike
#print("Shape of the first image in the batch:", batch_images[0].shape)

In [7]:
class ResNetRegression(nn.Module):
    def __init__(self, input_channels=3):
        super(ResNetRegression, self).__init__()
        resnet = models.resnet50(pretrained=True)
        # Modify the first convolutional layer to match the number of input channels
        resnet.conv1 = nn.Conv2d(input_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
        resnet.fc = nn.Sequential(
            nn.Linear(resnet.fc.in_features, 128),
            nn.ReLU(),
            nn.Linear(128, 1)
        )
        self.resnet = resnet

    def forward(self, x):
        return self.resnet(x)


In [8]:
# Instantiate the model, loss function, and optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ResNetRegression(input_channels=3).to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)




In [9]:
train_dataset = CustomDataset(train_df, image_dir, input_shape=(224, 224, 3))
test_dataset = CustomDataset(test_df, image_dir, input_shape=(224, 224, 3))

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [10]:
for epoch in range(num_epochs):
    model.train()
    for i, (inputs, targets) in enumerate(train_loader):
        inputs = inputs.permute(0, 3, 1, 2)  # Change the order of dimensions
        inputs = inputs.to(device, dtype=torch.float32)
        targets = targets.to(device, dtype=torch.float32)



        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), targets)
        loss.backward()
        optimizer.step()

    # Print or log the training loss for each epoch if needed
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}')


Epoch 1/10, Loss: 0.6187809705734253
Epoch 2/10, Loss: 0.5716995000839233
Epoch 3/10, Loss: 1.0943646430969238
Epoch 4/10, Loss: 0.9788837432861328
Epoch 5/10, Loss: 0.7955726981163025
Epoch 6/10, Loss: 0.9533791542053223
Epoch 7/10, Loss: 0.9192478060722351
Epoch 8/10, Loss: 0.7651209235191345
Epoch 9/10, Loss: 0.6402580142021179
Epoch 10/10, Loss: 0.7377968430519104


In [12]:
model.eval()
with torch.no_grad():
    for inputs, targets in test_loader:
        inputs = inputs.permute(0, 3, 1, 2)  # Change the order of dimensions
        inputs = inputs.to(device, dtype=torch.float32)
        targets = targets.to(device, dtype=torch.float32)

        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), targets)

# Print or log the test loss if needed
print(f'Test Loss: {loss.item()}')


Test Loss: 1.1552252769470215


In [14]:
average_test_loss = loss.item() / len(test_loader)
average_test_loss

0.03726533151442005

In [15]:
from sklearn.metrics import mean_squared_error, median_absolute_error, explained_variance_score

model.eval()
with torch.no_grad():
    all_predictions = []
    all_targets = []

    for inputs, targets in test_loader:
        inputs = inputs.permute(0, 3, 1, 2)  # Change the order of dimensions
        inputs = inputs.to(device, dtype=torch.float32)
        targets = targets.to(device, dtype=torch.float32)

        outputs = model(inputs)

        all_predictions.append(outputs.squeeze().cpu().numpy())
        all_targets.append(targets.cpu().numpy())

# Concatenate predictions and targets
all_predictions = np.concatenate(all_predictions)
all_targets = np.concatenate(all_targets)

# Calculate Mean Squared Error (MSE)
mse = mean_squared_error(all_targets, all_predictions)

# Calculate Median Absolute Error (MedAE)
medae = median_absolute_error(all_targets, all_predictions)

# Calculate Explained Variance Score
explained_variance = explained_variance_score(all_targets, all_predictions)

print(f'Mean Squared Error (MSE): {mse}')
print(f'Median Absolute Error (MedAE): {medae}')
print(f'Explained Variance Score: {explained_variance}')


Mean Squared Error (MSE): 0.9381877183914185
Median Absolute Error (MedAE): 0.43593984842300415
Explained Variance Score: 0.10327601432800293


In [13]:
# Testiranje modela na pojedinačnim primerima
model.eval()
with torch.no_grad():
    for inputs, targets in test_loader:
        inputs = inputs.permute(0, 3, 1, 2)  # Change the order of dimensions
        inputs = inputs.to(device, dtype=torch.float32)
        targets = targets.to(device, dtype=torch.float32)

        outputs = model(inputs)
        predicted_prices = outputs.squeeze().cpu().numpy()

        # Print or log the predicted prices if needed
        for predicted_price, target_price in zip(predicted_prices, targets.cpu().numpy()):
            print(f'Predicted Price: {predicted_price}, Actual Price: {target_price}')

Predicted Price: -0.23572608828544617, Actual Price: -0.1579548716545105
Predicted Price: -0.39641112089157104, Actual Price: -1.1532766819000244
Predicted Price: 0.09579752385616302, Actual Price: 3.1906216144561768
Predicted Price: -0.2856556475162506, Actual Price: 0.7718085646629333
Predicted Price: -0.622986376285553, Actual Price: -0.2700022757053375
Predicted Price: -0.7025690078735352, Actual Price: -0.8860295414924622
Predicted Price: -0.737705647945404, Actual Price: -0.6817747950553894
Predicted Price: -0.4131995737552643, Actual Price: -1.1442174911499023
Predicted Price: -0.26610270142555237, Actual Price: -0.6144955158233643
Predicted Price: -0.27204641699790955, Actual Price: -1.2241090536117554
Predicted Price: -0.36860039830207825, Actual Price: -1.3120273351669312
Predicted Price: -1.0199971199035645, Actual Price: -0.6596479415893555
Predicted Price: -0.27056804299354553, Actual Price: -1.1107652187347412
Predicted Price: -0.14895708858966827, Actual Price: 0.0111809

KeyboardInterrupt: ignored

In [16]:
torch.save(model.state_dict(), '/content/drive/MyDrive/Price-Image-Predictor-V1.pth')

Pa sad... Meni se cini da predvidja lose cene, a chatgpt kaze da je onako mid... Ovo je samo prva verzija! Moralo je preko mog google drive-a. Moze i preko vaseg, ali mora da se uploada dataset