In [1]:
import pandas as pd
import numpy as np
from os.path import join

In [2]:
DATA_PATH = '../data'
socal_df = pd.read_csv(join(DATA_PATH, 'socal2.csv'))
socal_df.head()

Unnamed: 0,image_id,street,citi,n_citi,bed,bath,sqft,price
0,0,1317 Van Buren Avenue,"Salton City, CA",317,3,2.0,1560,201900
1,1,124 C Street W,"Brawley, CA",48,3,2.0,713,228500
2,2,2304 Clark Road,"Imperial, CA",152,3,1.0,800,273950
3,3,755 Brawley Avenue,"Brawley, CA",48,3,1.0,1082,350000
4,4,2207 R Carrillo Court,"Calexico, CA",55,4,3.0,2547,385100


In [3]:
import torch
from PIL import Image

In [4]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, df, split='train', transform=None):
        self.df = df
        self.X = df.drop(columns=['price', 'image_id'])
        self.img_dir = join(DATA_PATH, 'socal_pics')
        self.y = df['price']
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = join(self.img_dir, str(self.df.iloc[idx]['image_id'])+ '.jpg')
        img = Image.open(img_path)
        img = self.transform(img)

        return {
            'image': img,
            'sqft': torch.tensor(self.X.iloc[idx]['sqft'], dtype=torch.float),
            'price': torch.tensor(self.y.iloc[idx], dtype=torch.float)
        }

In [5]:
from sklearn.model_selection import train_test_split
train_df, test_df = train_test_split(socal_df, test_size=0.2, random_state=42)
train_df.shape, test_df.shape

((12379, 8), (3095, 8))

In [6]:
# for mac
train_df = train_df.head(1024)
test_df = test_df.head(32)

In [7]:
from torchvision.transforms import Resize, Compose, ToPILImage, ToTensor

transform = Compose([
    Resize((224, 224)),
    ToTensor()
])

In [8]:
train_dataset = Dataset(train_df, transform=transform)
test_dataset = Dataset(test_df, transform=transform)

In [9]:
from torch.utils.data import DataLoader

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [10]:
from timm import create_model
model = create_model('resnet18', pretrained=True, num_classes=1)

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
model.fc = torch.nn.Linear(model.fc.in_features, 1)

In [12]:
from decimal import Decimal

In [13]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

model = model.to(device)

EPOCHS = 10

criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
for i in range(EPOCHS):
    model.train()
    train_mse = 0
    for batch in train_loader:
        optimizer.zero_grad()
        X, y = batch['image'].to(device), batch['price'].to(device)
        y_pred = model(X).squeeze()
        loss = criterion(y_pred, y)
        loss.backward()
        optimizer.step()
        train_mse += loss.item()
    print(f'Epoch {i+1}\nTrain Loss: {Decimal(loss.item()):.2E}')
    print(f'Train MSE: {Decimal(train_mse/len(train_loader)):.2E}')

    model.eval()
    test_mse = 0
    for batch in test_loader:
        X, y = batch['image'].to(device), batch['price'].to(device)
        y_pred = model(X).squeeze()
        loss = criterion(y_pred, y)
        test_mse += loss.item()
    print(f'Test Loss: {Decimal(loss.item()):.2E}')
    print(f'Test MSE: {Decimal(test_mse/len(test_loader)):.2E}')


cpu
Epoch 1
Train Loss: 6.92E+11
Train MSE: 6.24E+11
Test Loss: 3.10E+11
Test MSE: 5.84E+11
Epoch 2
Train Loss: 5.07E+11
Train MSE: 6.21E+11
Test Loss: 3.08E+11
Test MSE: 5.81E+11
Epoch 3
Train Loss: 7.58E+11
Train MSE: 6.12E+11
Test Loss: 3.02E+11
Test MSE: 5.73E+11


KeyboardInterrupt: 