In [None]:
import torch
import torch.utils.data
import torch.nn as nn
from torch.nn import functional as F
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import os
GPU = torch.cuda.is_available()

In [None]:
# Grab data from csv

df = pd.read_csv('../input/aps360_airbnb1.csv', index_col=False)
df.head()

In [None]:
df_cols = list(df.columns)
df_cols.remove('price')
df_cols.remove('Unnamed: 56')
df_cols.remove('Unnamed: 0')
df_cols.append('price')
df = df[df_cols]

input_features = len(df_cols) - 1
print(input_features)
print(len(df))

In [None]:
# turn data into 
datanp = df.values.astype(np.float32)
np.random.seed(50) # set the numpy seed for consistent split

# Data split count (train/valid/test): 2276/758/759
train_set = np.random.choice(range(3793), 2276, replace=False)
train_set.sort()
valid_set = np.random.choice([i for i in range(3793) if i not in train_set], 758, replace=False)
valid_set.sort()
test_set = [i for i in range(3793) if i not in train_set and i not in valid_set]

train_set = datanp[train_set]
valid_set = datanp[valid_set]
test_set = datanp[test_set]

separator = torch.from_numpy(np.array(range(input_features)))

In [None]:
class AirbnbPredictor(nn.Module):
    '''
    Class that holds regression model for Airbnb listing price prediction

    input : feature tensor of N x _______
    output : float value of price estimate (normalized)
    '''

    def __init__(self, N):
        super(AirbnbPredictor, self).__init__()
        self.name = 'AirbnbPredictor'
        self.features = nn.Sequential(
            nn.Linear(N, 10),
            nn.ReLU(inplace=True),
            nn.Linear(10, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.features(x)
        return x

In [None]:
def evaluate(model, loader, criterion):
    """ Evaluate the network on the validation set.

    Args:
        model: PyTorch neural network object
        loader: PyTorch data loader for the dataset
        criterion: The loss function

    Returns:
        acc: A scalar for the avg classification acc over the validation set
        loss: A scalar for the average loss function over the validation set
    """
    total_loss = 0.0
    total_epoch = 0

    for i, data in enumerate(loader, 0):
        price = torch.index_select(data, 1, torch.LongTensor([input_features]))
        inputs = torch.index_select(data, 1, separator)
        if GPU:
            inputs = inputs.cuda()
            price = price.cuda()

        outputs = model(inputs)
        total_epoch += len(price)
        loss = criterion(outputs, price)
        total_loss += loss.item()

    loss = float(total_loss) / (i + 1)

    return loss

In [None]:
# Training code

def train_net(model, train_set, valid_set, batch_size, learning_rate, num_epochs, name='default'):
    ########################################################################
    # Fixed PyTorch random seed for reproducible result
    torch.manual_seed(1000)
    if GPU:
        torch.cuda.manual_seed_all(1000)

    ########################################################################
    # Setup data loaders for np arrays
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0)
    valid_loader = torch.utils.data.DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=0)
    
    print("Data loaded. Starting training:")

    ########################################################################
    # Loss function and optimizer
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

    train_loss = np.zeros(num_epochs)
    val_acc = np.zeros(num_epochs)
    val_loss = np.zeros(num_epochs)

    ########################################################################
    # Train the network
    for epoch in range(num_epochs):
        total_train_loss = 0.
        total_epoch = 0

        for i, data in enumerate(train_loader):
            price = torch.index_select(data, 1, torch.LongTensor([input_features]))
            inputs = torch.index_select(data, 1, separator)
            if GPU:
                inputs = inputs.cuda()
                price = price.cuda()

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass, backward pass, and optimize
            outputs = model(inputs)
            loss = criterion(outputs, price)
            loss.backward()
            optimizer.step()

            # Calculate the statistics
            total_train_loss += loss.item()
            total_epoch += len(outputs)
        
        train_loss[epoch] = float(total_train_loss) / (i+1)
        val_loss[epoch] = evaluate(model, valid_loader, criterion)

        print(("Epoch {}: Train loss: {} | "+ "Validation loss: {}").format(epoch + 1, train_loss[epoch], val_loss[epoch]))

        # Save the current model (checkpoint) to a file
        model_path = "MODEL{}_NAME{}_EPOCH{}".format(model.name, name, epoch)
        torch.save(model.state_dict(), model_path)

    print('Finished Training')

    train_loss, val_loss
    plt.title("Training & Validation Loss")
    plt.plot(range(1,num_epochs+1), train_loss, label="Train")
    plt.plot(range(1,num_epochs+1), val_loss, label="Validation")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend(loc='best')
    plt.show()

In [None]:
test_model = AirbnbPredictor(input_features) if not GPU else AirbnbPredictor(input_features).cuda()
train_net(test_model, train_set, valid_set, batch_size=32, learning_rate=3e-4, num_epochs=1000, name='default')

In [None]:
test_loader = torch.utils.data.DataLoader(test_set, batch_size=32, shuffle=True, num_workers=0)
evaluate(test_model, test_loader, torch.nn.MSELoss())