In this notebook I am going to extract AirBnB data: http://insideairbnb.com/get-the-data.html. Transform it into a pandas DataFrame and store it in an SQLite Database. 

In [45]:
# Imports
import sqlite3
import numpy as np
import pandas as pd

In [49]:
def transform_listings_data(df, city):
    
    # Drop variables with all there values missing
    df.dropna(axis=1, how='all', inplace=True)
    
    # Drop columns 
    df.drop(columns=['id', 'host_id', 'host_name', 'listing_url', 'last_scraped'], inplace=True)
    
    # Drop variables that are the same for every listing
    for col in df:
        if df[col].value_counts().shape[0] == 1:
            df.drop(columns=col, inplace=True)
            
    # Transform money columns from string:'$1,500.00'to float 1500.0
    money_cols = ['price', 'cleaning_fee', 'security_deposit']
    for col in money_cols:
        df[col] = df[col].dropna().apply(lambda p: float(p[1:].replace(",", "")))
    
    # Add city column
    df['from_city'] = city
    
    # Which ammenities to check for
    amenities = ["Doorman", "Gym", "Pool", "Wheelchair Accessible", "Fireplace", "Air Conditioning"
                 "Washer/Dryer", "TV", "Breakfast", "Dog(s)", "Cat(s)", "'24-Hour Check-in'"]
    
    # Check to see if the listing offers these ammenities
    amenity_arr = np.array([listings_df['amenities'].map(lambda amns: amn in amns) for amn in amenities])
    
    df = pd.concat([listings_df, pd.DataFrame(data=amenity_arr.T, columns=amenities)], axis=1)

    return df

In [50]:
def load_listings_data(df):
    
    conn = sqlite3.connect('airbnb.db')
    
    df.to_sql('listings', con = conn, if_exists='replace', index=False)

    conn.commit()
    conn.close()
    
    return None

In [51]:
# Execute this code cell to run the ETL pipeline

path_to_data = 'listings.csv'
city = 'San Francisco'

# Extract the Data
data = pd.read_csv(path_to_data)

# Transform the Data
results = transform_listings_data(data, city)

# Load the Data
load_listings_data(results)

  method=method,


In [52]:
pd.read_sql('SELECT * FROM listings', con = conn).head()

Unnamed: 0,scrape_id,name,summary,space,description,experiences_offered,neighborhood_overview,notes,transit,access,...,Gym,Pool,Wheelchair Accessible,Fireplace,Air ConditioningWasher/Dryer,TV,Breakfast,Dog(s),Cat(s),'24-Hour Check-in'
0,20200313233841,"Bright, Modern Garden Unit - 1BR/1B",New update: the house next door is under const...,"Newly remodeled, modern, and bright garden uni...",New update: the house next door is under const...,none,*Quiet cul de sac in friendly neighborhood *St...,Due to the fact that we have children and a do...,*Public Transportation is 1/2 block away. *Ce...,*Full access to patio and backyard (shared wit...,...,0,0,0,0,0,1,0,1,0,0
1,20200313233841,Creative Sanctuary,,We live in a large Victorian house on a quiet ...,We live in a large Victorian house on a quiet ...,none,I love how our neighborhood feels quiet but is...,All the furniture in the house was handmade so...,The train is two blocks away and you can stop ...,"Our deck, garden, gourmet kitchen and extensiv...",...,0,0,0,0,0,0,0,0,0,0
2,20200313233841,A Friendly Room - UCSF/USF - San Francisco,Nice and good public transportation. 7 minute...,"Settle down, S.F. resident, student, hospital,...",Nice and good public transportation. 7 minute...,none,"Shopping old town, restaurants, McDonald, Whol...",Wi-Fi signal in common areas. Large eat in k...,N Juda Muni and bus stop. Street parking.,,...,0,0,0,0,0,1,0,0,0,0
3,20200313233841,Friendly Room Apt. Style -UCSF/USF - San Franc...,Nice and good public transportation. 7 minute...,"Settle down, S.F. resident, student, hospital,...",Nice and good public transportation. 7 minute...,none,,Wi-Fi signal in common areas. Large eat in k...,"N Juda Muni, Bus and UCSF Shuttle. small shopp...",,...,0,0,0,0,0,1,0,0,0,0
4,20200313233841,Historic Alamo Square Victorian,Pls email before booking. Interior featured i...,Please send us a quick message before booking ...,Pls email before booking. Interior featured i...,none,,tax ID on file tax ID on file,,Guests have access to everything listed and sh...,...,0,0,0,0,0,1,0,0,0,0
