In [77]:
import pandas
import numpy
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error
import matplotlib.pyplot

# Read in the data and display the first 5 columns
dataframe = pandas.read_csv('./calendar.csv')

# Set the date column to be a pandas datetime column
dataframe['date'] = pandas.to_datetime(dataframe['date'])
dataframe.head()

Unnamed: 0,listing_id,date,available,price
0,241032,2016-01-04,t,$85.00
1,241032,2016-01-05,t,$85.00
2,241032,2016-01-06,f,
3,241032,2016-01-07,f,
4,241032,2016-01-08,f,


The calendar data set includes the listing ID, calendar date, and whether the listing is available. Counting the number of unavailable dates in a give month should give an idea of the busiest months of year to visit Seattle.

In [78]:
# Check if there are any null values in the available or date columns

nullAvailability = dataframe['available'].isnull().sum()
nullDates = dataframe['date'].isnull().sum()

nullAvailability, nullDates

(0, 0)

There are no null dates, or availability values which makes data cleaning simple. Display a bar chart with availability by date

In [92]:
# Group by date, and availability
availabilityByDate = dataframe.groupby(['date', 'available'], as_index=False).size()

# Locate only the listings that not available on a given date
availabilityByDate = availabilityByDate.loc[availabilityByDate['available'] == 'f']

# Group the listings by Month
availabilityByDate = availabilityByDate.groupby(pandas.Grouper(key='date', axis=0, freq='M'), as_index=False).size()

# Add the month name
availabilityByDate['month'] = availabilityByDate['date'].dt.strftime('%B')

availabilityByDate = availabilityByDate.sort_values(by=['size', 'date'])

availabilityByDate

Unnamed: 0,date,size,month
12,2017-01-31,2,January
0,2016-01-31,28,January
1,2016-02-29,29,February
3,2016-04-30,30,April
5,2016-06-30,30,June
8,2016-09-30,30,September
10,2016-11-30,30,November
2,2016-03-31,31,March
4,2016-05-31,31,May
6,2016-07-31,31,July
