<a href="https://colab.research.google.com/github/dernameistegal/airbnb_price/blob/main/pictures_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 0. Preparation

In [1]:
#@title remove repos from disc
%cd /content
!rm -r airbnb_price

/content


In [2]:
#@title Clone repo
!git clone https://github.com/dernameistegal/airbnb_price.git

Cloning into 'airbnb_price'...
remote: Enumerating objects: 127, done.[K
remote: Counting objects: 100% (127/127), done.[K
remote: Compressing objects: 100% (115/115), done.[K
remote: Total 127 (delta 58), reused 54 (delta 8), pack-reused 0[K
Receiving objects: 100% (127/127), 1.90 MiB | 6.88 MiB/s, done.
Resolving deltas: 100% (58/58), done.


In [3]:
#@title add paths to library search path
import sys 

sys.path.append("/content/airbnb_price/custom_functions")
sys.path.append("/content/airbnb_price/feature_extraction")

In [4]:
#@title Imports and drive
import os
import torch
import torchvision
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch.nn.functional as F
from tqdm import tqdm

# own modules
import general_utils as gu
import feature_extraction_utils as fu



from google.colab import drive

#@title Mount drive
drive.mount('/content/drive/', force_remount=True)

Mounted at /content/drive/


In [5]:
#@title define device

# device
device = gu.get_device()
num_cpus = os.cpu_count()
print(num_cpus, 'CPUs available')

cuda available: True ; cudnn available: True ; num devices: 1
Using device Tesla T4
2 CPUs available


# 1. Data Cleaning Hostpics (Dont has to be run again)

In [None]:
hostpics_dir = "/content/drive/MyDrive/Colab/airbnb/data/hostpics/hostpics_raw"

In [None]:
# make dataset and dataloader with hostpics

# load moments
hostpics_moments = np.load("/content/drive/MyDrive/Colab/airbnb/data/hostpics/hostpics_moments.npy")
hostpics_moments = torch.from_numpy(hostpics_moments)

# initialize dataset and dataloader
dataset = fu.Dataset(filepath=hostpics_dir, channel_moments=hostpic_moments, ndata=10)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False)


In [None]:
# extract features from pretrained model
vgg = torchvision.models.vgg19(pretrained=True)
feature_extractor = vgg.features[0:31]

# compute features for later training
train_features = fu.compute_train_features(device=device, dataloader=dataloader, feature_extractor=feature_extractor)
train_features = train.features.cpu().numpy()

# 4. Feature Extraction thumbnails

In [10]:
thumbnails_dir = "/content/drive/MyDrive/Colab/airbnb/data/thumbnails/thumbnails_raw"
response_dir = "/content/drive/MyDrive/Colab/airbnb/data/thumbnails/thumbnails_response"

In [11]:
# make train_dataset and val_dataset and respective dataloader with thumbnails

# load moments
thumbnails_moments = np.load("/content/drive/MyDrive/Colab/airbnb/data/thumbnails/thumbnails_moments.npy")
thumbnails_moments = torch.from_numpy(thumbnails_moments)

# initialize dataset and dataloader
dataset = fu.Dataset(picture_dir=thumbnails_dir, response_dir= response_dir, channel_moments=thumbnails_moments, ndata=1000)
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [700, 300], generator=torch.Generator().manual_seed(42))
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=16, shuffle=False)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=16, shuffle=False)

In [12]:
#@title define models classes
class Model(torch.nn.Module):
    def __init__(self, feature_extractor, finalizer):
        super().__init__()
        self.feature_extractor = feature_extractor
        self.finalizer = finalizer

    def forward(self, x):
        x = self.feature_extractor(x)
        x = self.finalizer(x)
    
        return x

class Finalizer(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
        self.conv2 = torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
        self.conv3 = torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1)
        self.pool1 = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.linear1 = torch.nn.Linear(in_features = 25088, out_features=4096)
        self.drop1 = torch.nn.Dropout()
        self.linear2 = torch.nn.Linear(in_features=4096, out_features=2048)
        self.drop2 = torch.nn.Dropout()
        self.linear3 = torch.nn.Linear(in_features=2048, out_features=1024)
        self.drop3 = torch.nn.Dropout()
        self.linear4 = torch.nn.Linear(in_features=1024, out_features=1)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.pool1(x)
        x = torch.flatten(x, start_dim=1)
        x = F.relu(self.linear1(x))
        x = self.drop1(x)
        x = F.relu(self.linear2(x))
        x = self.drop2(x)
        x = F.relu(self.linear3(x))
        x = self.drop3(x)
        x = self.linear4(x)

        return x
        

In [13]:
# define feature extractor
vgg = torchvision.models.vgg19(pretrained=True)
feature_extractor = vgg.features[0:31]

In [14]:
# define finalizer
finalizer = Finalizer()

In [15]:
# define model
model = Model(feature_extractor=feature_extractor, finalizer=finalizer)

In [21]:
model = model.to(device)

In [16]:
# freeze parameters in feature extractor
for name, p in model.named_parameters():
    if "feature_extractor" in name:
        p.requires_grad = False

In [17]:
# define loss function and optimizer
loss_fn = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())

In [23]:
#@title define train functions
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau, ExponentialLR, StepLR
import torchvision
from torchvision import datasets, transforms, models
import fastprogress
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import time
    
def train(dataloader, optimizer, model, loss_fn, device, master_bar, scaler):
    model.train()
    epoch_loss = []

    for image, target in fastprogress.progress_bar(dataloader, parent=master_bar):
        
        image, target = image.to(device), target.to(device)

        # zero gradient
        optimizer.zero_grad()

        # Forward pass
        prediction = model.forward(image)
        prediction = torch.squeeze(prediction)

        # loss calculation
        loss = loss_fn(prediction, target)

        # Backward pass
        scaler.scale(loss).backward() #loss.backward()
        scaler.step(optimizer) # optimizer.step()
        scaler.update()

        # For plotting the train loss, save it for each sample
        epoch_loss.append(loss.item())

    return np.mean(epoch_loss)


def validate(dataloader, model, loss_fn, device, master_bar):
    model.eval()
    epoch_loss = []

    with torch.no_grad():
        for image, target in fastprogress.progress_bar(dataloader, parent=master_bar):
            
            image, target = image.to(device), target.to(device)

            # Forward pass
            prediction = model.forward(image)
            prediction = torch.squeeze(prediction)

            # loss calculation
            loss = loss_fn(prediction, target)

            # For plotting the train loss, save it for each sample
            epoch_loss.append(loss.item())

    return np.mean(epoch_loss)
    

def run_training(model, optimizer, loss_fn, device, num_epochs,
                 train_dataloader, val_dataloader, verbose=False):
  
    # technical stuff
    start_time = time.time()
    scaler = torch.cuda.amp.GradScaler()
    master_bar = fastprogress.master_bar(range(num_epochs))

    # instantiate losses
    train_loss = []
    val_loss = []

    for epoch in master_bar:

        # Train the model
        epoch_train_loss = train(train_dataloader, optimizer, model, loss_fn, device, master_bar, scaler)
        #Validate the model
        epoch_val_loss = validate(val_dataloader, model, loss_fn, device, master_bar)

        # Save loss and acc for plotting
        train_loss.append(epoch_train_loss)
        val_loss.append(epoch_val_loss)


        if verbose:
            master_bar.write(
                f'Train loss: {epoch_train_loss:.2f}, val loss: {epoch_val_loss:.2f}')

    time_elapsed = np.round(time.time() - start_time, 0).astype(int)
    print(f'Finished training after {time_elapsed} seconds.')
    return train_loss, val_loss


def plot(title, label, train_results, val_results, yscale='linear', save_path=None, 
         extra_pt=None, extra_pt_label=None):
    """Plot learning curves.

    Args:
        title (str): Title of plot
      

  label (str): x-axis label
        train_results (list): Results vector of training of length of number
            of epochs trained. Could be loss or accuracy.
        val_results (list): Results vector of validation of length of number
            of epochs. Could be loss or accuracy.
        yscale (str, optional): Matplotlib.pyplot.yscale parameter. 
            Defaults to 'linear'.
        save_path (str, optional): If passed, figure will be saved at this path.
            Defaults to None.
        extra_pt (tuple, optional): Tuple of length 2, defining x and y coordinate
            of where an additional black dot will be plotted. Defaults to None.
        extra_pt_label (str, optional): Legend label of extra point. Defaults to None.
    """
    
    epoch_array = np.arange(len(train_results)) + 1
    train_label, val_label = "Training "+label.lower(), "Validation "+label.lower()
    
    sns.set(style='ticks')

    plt.plot(epoch_array, train_results, epoch_array, val_results, linestyle='dashed', marker='o', zorder=-1)
    legend = ['Train results', 'Validation results']
    
    if extra_pt:
        ####################
        ## YOUR CODE HERE ##
        ####################
        plt.scatter(extra_pt[0], extra_pt[1], c="k")
        legend = ['Train results', 'Validation results', extra_pt_label]

        # END OF YOUR CODE #
        
    plt.legend(legend)
    plt.xlabel('Epoch')
    plt.ylabel(label)
    plt.yscale(yscale)
    plt.title(title)
    
    # sns.despine(trim=True, offset=5)
    plt.title(title, fontsize=15)
    if save_path:
        plt.savefig(str(save_path), bbox_inches='tight')
    plt.show()

def empty_loss(*irgendwas):
    return torch.tensor(0)

In [None]:
train_loss, val_loss = run_training(model=model, optimizer=optimizer, loss_fn=loss_fn, device=device, num_epochs=200,
                                    train_dataloader=train_dataloader, val_dataloader=val_dataloader, verbose=True)