In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import matplotlib.pyplot as plt

In [3]:
def convert_price_to_float(data, data_dictionary):
    """
    Function to convert string price columns into float price columns
    :param data: type of data set
    :param data_dictionary: dictionary with airbnb dataframes
    :return: dictionary containing updated dataframes with float price columns
    """
    string_columns = data_dictionary[data].select_dtypes(include=['object']).columns
    price_columns = [col for col in string_columns if 'price' in col]
    if len(price_columns) > 0:
        for price in price_columns:
            data_dictionary[data][price + '_float'] = data_dictionary[data][price]\
                .str\
                .replace('$', '')\
                .str\
                .replace(',', '')\
                .astype(float)
    return data_dictionary

In [4]:
def split_date(data, data_dictionary):
    """
    Function to split date column into separate year, month, day columns
    :param data: type of data set
    :param data_dictionary: dictionary with airbnb dataframes
    :return: dictionary containing updated dataframes with new date columns
    """
    if 'date' in data_dictionary[data].select_dtypes(include=['object']).columns:
        data_dictionary[data][['year','month','day']] = data_dictionary[data]['date']\
            .str.split('-', 3, expand=True)
    return data_dictionary

In [6]:
def load_csv(cities, data_sets, source_directory):
    """
    Function to load three types of csv data sets for Boston and Seattle
    :param city_type: list of city names
    :param data_type: list of data sets
    :param source_directory: location of the csv data sets
    :return: 
    """
    city_dictionary = {}
    for city in cities:
        data_dictionary = {}
        for data in data_sets:
            file_path = source_directory + '/' + data + '-' + city + '.csv'
            # 1. read csv into dataframe
            data_dictionary[data] = pd.read_csv(file_path)
            # 2. convert string price columns into float columns
            data_dictionary = convert_price_to_float(data, data_dictionary)
            # 3. split date column into separate year, month, day column
            data_dictionary = split_date(data, data_dictionary)
        city_dictionary[city] = data_dictionary
    return city_dictionary

In [7]:
# 1. load files
data_type = ['calendar'
             , 'listings'
             , 'reviews']

city_type = ['seattle'
             , 'boston']
city_dictionary = load_csv(city_type, data_type, 'data')

In [None]:
print(city_dictionary)