# Airbnb

Airbnb rent price forecasting.

## Dependencies

In [None]:
import pandas

import re

import matplotlib.pyplot as pyplot

import sklearn.model_selection as model_selection

import catboost

import sys

## Data

Data exploration, visualization, transformation and selection.

In [None]:
data = pandas.read_csv(
    './data/airbnb.csv',
    usecols=[
        'number_of_reviews',
        'review_scores_rating',
        'review_scores_location',
        'review_scores_value',
        'price'
    ],
    converters={
        'price': lambda value: float(re.sub(r'[^\d\-.]', '', value))
    },
    nrows=100
)

In [None]:
data.dropna(how='any', inplace=True)

In [None]:
data.drop(data[data.price <= 0].index, inplace=True)

In [None]:
data.head()

In [None]:
data.describe()

In [None]:
for column in data.columns:
    data[column].value_counts().iloc[:20].plot(title=f'{column} (20 samples)', kind='bar', xlabel='Value', ylabel='Frequency', figsize=(20,6))

    pyplot.show()

In [None]:
train, labels = data.drop('price', axis=1), data.price

In [None]:
x, x_test, y, y_test = model_selection.train_test_split(train, labels, test_size=0.2, train_size=0.8)

## Model

Regression model and random hyperparameter optimization.

In [None]:
model = catboost.CatBoostRegressor()

In [None]:
f = open('/dev/null', 'w')

sys.stdout = f

In [None]:
%%capture

grid = {
    'learning_rate': [0.03, 0.1],
    'depth': [4, 6, 10],
    'l2_leaf_reg': [1, 3, 5, 7, 9]
}

model.randomized_search(
    grid,
    X=x,
    y=y,
    plot=False,
    verbose=False,
)

In [None]:
predictions = model.predict(x_test)