## Basic setting up

In [1]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import sqlalchemy
import datetime

In [2]:
barcelona_listings = pd.read_csv("barcelona_listings.csv")
berlin_listings = pd.read_csv("berlin_listings_filtered.csv")
madrid_listings = pd.read_csv("madrid_listings_filtered.csv")
listings = pd.concat([barcelona_listings, berlin_listings, madrid_listings], ignore_index=True)

In [3]:
barcelona_reviews = pd.read_csv("barcelona_reviews.csv")
berlin_reviews = pd.read_csv("berlin_reviews.csv")
madrid_reviews = pd.read_csv("madrid_reviews.csv")
reviews = pd.concat([barcelona_reviews, berlin_reviews, madrid_reviews], ignore_index=True)

In [4]:
barcelona_calendar = pd.read_csv("barcelona_calendar.csv")
berlin_calendar = pd.read_csv("berlin_calendar.csv")
madrid_calendar = pd.read_csv("madrid_calendar.csv")
calendar = pd.concat([barcelona_calendar, berlin_calendar, madrid_calendar], ignore_index=True)

In [5]:
engine = create_engine("sqlite:////Users/gfotiadis/Temp useful/merged_project.db")

In [6]:
def null_count(x):
    return listings[x].isna().sum()# / len(listings[x])

def unique_count(x):
    return listings.nunique()# / listings.shape[0] * 100

#for i,c in enumerate(listings.columns):
 #   print(c, unique_count(c)[i])

In [7]:
#db_name = "project.db"

In [8]:
#from_engine = sqlalchemy.create_engine("sqlite:///%s" % db_name, execution_options={"sqlite_raw_colnames": True})

## Import accommodates

In [9]:
df = pd.DataFrame(listings.accommodates.unique(), columns=["accommodates"])

In [10]:
#db_ac = pd.read_sql_table("Accommodation", from_engine)

In [11]:
#set_diff_df = pd.concat([db_ac, df, df]).drop_duplicates(keep=False)

In [12]:
df.to_sql("Accommodation", engine, if_exists="append", index=False, dtype={"accommodates" : sqlalchemy.INT})

In [13]:
engine.execute("select count(*) from Accommodation ").fetchall()

[(16,)]

## Import amenities

In [14]:
amenities = pd.DataFrame(listings.amenities.unique(), columns=["amenities"])

In [15]:
temp = []
for index, row in amenities.iterrows():
    temp.append(row["amenities"].split(','))

In [16]:
flat_list = []
for sublist in temp:
    for item in sublist:
        flat_list.append(item)

In [17]:
def remove_chars(item):
    for c in item:
        if c == "{" or c == "}" or c == "'" or c == '"' or c == "[" or c == "]":
            item = item.replace(c, "")
        item = item.strip().lower()
    return item

In [18]:
new_list = []
for item in flat_list:
    new_list.append('"' + remove_chars(item) + '"')
    

In [19]:
new_list = list(set(new_list))

In [20]:
new_list.remove('""')

In [21]:
new_list

['"indoor fireplace"',
 '"firm mattress"',
 '"lock on bedroom door"',
 '"accessible-height toilet"',
 '"other pet(s)"',
 '"ethernet connection"',
 '"fireplace guards"',
 '"outdoor seating"',
 '"carbon monoxide detector"',
 '"heated towel rack"',
 '"ev charger"',
 '"convection oven"',
 '"essentials"',
 '"amazon echo"',
 '"stove"',
 '"bed linens"',
 '"wide doorway"',
 '"mountain view"',
 '"lockbox"',
 '"ceiling fan"',
 '"hot water"',
 '"heated floors"',
 '"warming drawer"',
 '"ground floor access"',
 '"printer"',
 '"gas oven"',
 '"translation missing: en.hosting_amenity_50"',
 '"mudroom"',
 '"microwave"',
 '"pack ’n play/travel crib"',
 '"hbo go"',
 '"pets allowed"',
 '"full kitchen"',
 '"air purifier"',
 '"children’s books and toys"',
 '"hammock"',
 '"handheld shower head"',
 '"crib"',
 '"private entrance"',
 '"bath towel"',
 '"luggage dropoff allowed"',
 '"mobile hoist"',
 '"sound system"',
 '"waterfront"',
 '"gym"',
 '"memory foam mattress"',
 '"roll-in shower"',
 '"flat path to front

In [22]:
amenities = pd.DataFrame(new_list, columns=["amenities"])

In [23]:
#db_amenities = pd.read_sql_table("Amenities", from_engine)

In [24]:
#set_diff_amenities = pd.concat([db_amenities, amenities, amenities], ignore_index=True).drop_duplicates(keep=False)

In [25]:
#set_diff_amenities.amenities.isin(amenities.amenities).any()

In [26]:
#print(len(amenities.amenities), len(set_diff_amenities.amenities), len(amenities.amenities) + len(set_diff_amenities.amenities), )

In [27]:
#print(len(pd.concat([amenities, set_diff_amenities], ignore_index=True)))

In [28]:
amenities.to_sql("Amenities", engine, if_exists="append", index=False, dtype={"amenities" : sqlalchemy.CHAR(32)})

In [29]:
listings.id.count()

42094

In [30]:
len(listings["amenities"].unique() == listings.amenities.unique())

37765

## Import Bathrooms

In [31]:
bathrooms = pd.DataFrame(listings.bathrooms.unique(), columns=["bathrooms"])

In [32]:
bathrooms = bathrooms.dropna()

In [33]:
bathrooms.to_sql("Bathrooms", engine, if_exists="append", index=False, dtype={"bathrooms" : sqlalchemy.FLOAT})

## Import Description

In [38]:
description = pd.DataFrame({"description_id": np.arange(len(listings.description)), "summary": listings.summary, "space": listings.space, "description": listings.description, "neighbourhood_overview": listings.neighborhood_overview,"notes":listings.notes,"transit":listings.transit,"access":listings.access,"picture_url":listings.picture_url,"square_feet": listings.square_feet, "id": listings.id})

In [39]:
description["space"].fillna("",inplace=True)
description["neighbourhood_overview"].fillna("",inplace=True)
description["description"].fillna("",inplace=True)
description["summary"].fillna("",inplace=True)
description["notes"].fillna("",inplace=True)
description["transit"].fillna("",inplace=True)
description["access"].fillna("",inplace=True)
description["picture_url"].fillna("",inplace=True)
description["square_feet"].fillna(-1,inplace=True)


In [40]:
description.to_sql("Described_Description", engine, if_exists="append", index=False, dtype={"description_id": sqlalchemy.INT, "summary": sqlalchemy.VARCHAR(1024), "space": sqlalchemy.VARCHAR(1024), "description": sqlalchemy.VARCHAR(1024), "neighbourhood_overview": sqlalchemy.VARCHAR(1024),"notes":sqlalchemy.VARCHAR(1024),"transit": sqlalchemy.VARCHAR(1024),"access": sqlalchemy.VARCHAR(1024),"picture_url":sqlalchemy.CHAR(128),"square_feet": sqlalchemy.FLOAT, "id": sqlalchemy.INT})

## Import bedding

In [41]:
bedding = pd.DataFrame({"beds": listings.beds, "bed_type": listings.bed_type})

In [42]:
bedding = bedding.fillna(0).drop_duplicates()

In [43]:
bedding.to_sql("Bedding", engine, if_exists="append", index=False, dtype={"beds" : sqlalchemy.FLOAT, "bed_type" : sqlalchemy.CHAR(32)})

## Import Bedrooms

In [44]:
bedrooms = pd.DataFrame({"bedrooms": listings.bedrooms})

In [45]:
bedrooms = bedrooms.dropna().drop_duplicates()

In [48]:
bedrooms.to_sql("Bedrooms", engine, if_exists="append", index=False, dtype={"bedrooms" : sqlalchemy.INT})

## Import Room

In [49]:
room = pd.DataFrame({"room_type": listings.room_type})

In [50]:
room = room.drop_duplicates()

In [51]:
room.to_sql("Room", engine, if_exists="append", index=False, dtype={"room_type" : sqlalchemy.CHAR(32)})

## Import Property

In [52]:
prop = pd.DataFrame({"property_type": listings.property_type})

In [53]:
prop = prop.drop_duplicates()

In [54]:
prop.to_sql("Property", engine, if_exists="append", index=False, dtype={"propery_type" : sqlalchemy.CHAR(32)})

## Import Reviewer

In [55]:
reviewer = pd.DataFrame({"reviewer_id": reviews.reviewer_id, "reviewer_name" : reviews.reviewer_name})

In [56]:
reviewer = reviewer.drop_duplicates().drop(174201).drop(651249)

In [57]:
reviewer[reviewer.duplicated(['reviewer_id'], keep=False)]

Unnamed: 0,reviewer_id,reviewer_name


In [58]:
reviewer.to_sql("Reviewer", engine, if_exists="append", index=False, dtype={"reviewer_id" : sqlalchemy.INT, "reviewer_name" : sqlalchemy.CHAR(32)})

## Import Review

In [59]:
review = pd.DataFrame({"review_id" : np.arange(len(reviews.listing_id)), "review_date" : reviews.date, "comments" : reviews.comments})

In [60]:
for i in review.index:
    review.at[i, "review_date"] = datetime.datetime.strptime(str(review.at[i, "review_date"]), "%Y-%m-%d").date()

In [61]:
review.to_sql("Review", engine, if_exists="append", index=False, dtype={"review_id" : sqlalchemy.INT, "review_date" : sqlalchemy.DATE, "comments" : sqlalchemy.VARCHAR(1024)})

## Import Reviewed

In [62]:
reviewed = pd.DataFrame({"id": reviews.listing_id, "reviewer_id" : reviews.reviewer_id, "review_id" : review.review_id})

In [63]:
reviewed.to_sql("Reviewed", engine, if_exists="append", index=False, dtype={"id" : sqlalchemy.INT, "reviewer_id" : sqlalchemy.INT, "review_id" : sqlalchemy.INT})

## Import Pricing

In [64]:
pricing = pd.DataFrame({"price": listings.price, "weekly_price" : listings.weekly_price, "monthly_price": listings.monthly_price, "security_deposit" : listings.security_deposit, "cleaning_fee" : listings.cleaning_fee, "guests_included" : listings.guests_included, "extra_people" : listings.extra_people, "minimum_nights" : listings.minimum_nights, "maximum_nights" : listings.maximum_nights, "id" : listings.id})

In [65]:
for i in pricing.index:
    pricing.at[i, "price"] = float(str(pricing.at[i, "price"])[1:].replace(",", ""))
    if not pd.isnull(pricing.at[i, "weekly_price"]):
        pricing.at[i, "weekly_price"] = float(str(pricing.at[i, "weekly_price"])[1:].replace(",", ""))
    if not pd.isnull(pricing.at[i, "monthly_price"]):
        pricing.at[i, "monthly_price"] = float(str(pricing.at[i, "monthly_price"])[1:].replace(",", ""))
    if not pd.isnull(pricing.at[i, "security_deposit"]):
        pricing.at[i, "security_deposit"] = float(str(pricing.at[i, "security_deposit"])[1:].replace(",", ""))
    if not pd.isnull(pricing.at[i, "cleaning_fee"]):
        pricing.at[i, "cleaning_fee"] = float(str(pricing.at[i, "cleaning_fee"])[1:].replace(",", ""))
    if not pd.isnull(pricing.at[i, "extra_people"]):
        pricing.at[i, "extra_people"] = float(str(pricing.at[i, "extra_people"])[1:].replace(",", ""))

In [66]:
for i in pricing.index:
    if pd.isnull(pricing.at[i, "weekly_price"]):
        pricing.at[i, "weekly_price"] = pricing.at[i, "price"] * 7
    if pd.isnull(pricing.at[i, "monthly_price"]):
        pricing.at[i, "monthly_price"] = pricing.at[i, "price"] * 30
    if pd.isnull(pricing.at[i, "security_deposit"]):
        pricing.at[i, "security_deposit"] = 0
    if pd.isnull(pricing.at[i, "cleaning_fee"]):
        pricing.at[i, "cleaning_fee"] = 0

In [68]:
pricing.to_sql("Pricing", engine, if_exists="append", index=False, dtype={"price": sqlalchemy.FLOAT, "weekly_price" : sqlalchemy.FLOAT, "monthly_price": sqlalchemy.FLOAT, "security_deposit" : sqlalchemy.FLOAT, "cleaning_fee" : sqlalchemy.FLOAT, "guests_included" : sqlalchemy.INT, "extra_people" : sqlalchemy.FLOAT, "minimum_nights" : sqlalchemy.INT, "maximum_nights" : sqlalchemy.INT, "id" : sqlalchemy.INT})

## Import Verification

In [69]:
verification = pd.DataFrame(listings.host_verifications.unique(), columns=["host_verifications"])

In [70]:
temp = []
for index, row in verification.iterrows():
    temp.append(row["host_verifications"].split(','))

In [71]:
flat_list = []
for sublist in temp:
    for item in sublist:
        flat_list.append(item)

In [72]:
new_list = []
for item in flat_list:
    new_list.append('"' + remove_chars(item) + '"')

In [73]:
new_list = list(set(new_list))

In [74]:
new_list.remove('""')

In [75]:
verification = pd.DataFrame({"host_verifications" : new_list})

In [76]:
verification.to_sql("Verification", engine, if_exists="append", index=False, dtype={"host_verifications" : sqlalchemy.CHAR(32)})

## Import Host

In [138]:
host = pd.DataFrame({"host_id" : listings.host_id, "host_name" : listings.host_name, "host_since" : listings.host_since, "host_about" : listings.host_about, "host_response_time" : listings.host_response_time, "host_response_rate" : listings.host_response_rate, "host_neighbourhood" : listings.host_neighbourhood, "host_country_code" : "ES", "host_city" : "Barcelona", "host_name" : listings.host_name, "host_thumbnail_url" : listings.host_thumbnail_url, "host_picture_url" : listings.host_picture_url})

In [139]:
host = host.fillna("Unknown")

In [140]:
for i in host.index:
    host.at[i, "host_since"] = datetime.datetime.strptime(str(host.at[i, "host_since"]), "%Y-%m-%d").date()

In [141]:
host = host.drop_duplicates()

In [119]:
dups = host[host.duplicated(['host_id'], keep=False)]

In [121]:
dups_index = dups.index[np.where(dups["host_response_time"] == "Unknown")]

In [142]:
host = host.drop(dups_index)

In [144]:
host.to_sql("Host", engine, if_exists="append", index=False, dtype={"host_id" : sqlalchemy.CHAR(32), "host_since" : sqlalchemy.DATE, "host_about" : sqlalchemy.VARCHAR(1024), "host_response_time" : sqlalchemy.CHAR(32), "host_response_rate" : sqlalchemy.CHAR(32), "host_neighbourhood" : sqlalchemy.CHAR(32), "host_country_code" : sqlalchemy.CHAR(2), "host_city" : sqlalchemy.CHAR(32), "host_url" : sqlalchemy.CHAR(32), "host_name" : sqlalchemy.CHAR(32), "host_thumbnail_url" : sqlalchemy.CHAR(32), "host_picture_url" : sqlalchemy.CHAR(32)})

## Import verified_by

In [145]:
verified_by = pd.DataFrame({"host_id" : listings.host_id, "host_verifications" : listings.host_verifications})

In [146]:
verified_by = verified_by.drop_duplicates()

In [147]:
verified_by.to_sql("Verified_by", engine, if_exists="append", index=False, dtype={"host_id" : sqlalchemy.INT, "host_verifications" : sqlalchemy.CHAR(32)})

## Import Calendar

In [148]:
cal = pd.DataFrame({"date" : calendar.date.unique()})

In [149]:
for i in cal.index:
    cal.at[i, "date"] = datetime.datetime.strptime(str(cal.at[i, "date"]), "%Y-%m-%d").date()

In [150]:
cal.to_sql("Calendar", engine, if_exists="append", index=False, dtype={"date" : sqlalchemy.DATE})

## Import Available_at

In [151]:
avail = pd.DataFrame({"id" : calendar.listing_id, "date" : calendar.date, "price" : calendar.price, "available" : calendar.available})

In [152]:
for i in calendar.index:
    avail.at[i, "date"] = datetime.datetime.strptime(str(calendar.at[i, "date"]), "%Y-%m-%d").date()

In [153]:
for i in avail.index:
    if not pd.isna(avail.at[i, "price"]):
        avail.at[i, "price"] = float(str(avail.at[i, "price"]).replace(",","").replace("$", ""))

In [154]:
avail = avail.fillna(-1)

In [155]:
avail.to_sql("Available_at", engine, if_exists="append", index=False, dtype={"id" : sqlalchemy.INT, "date" : sqlalchemy.DATE, "price" : sqlalchemy.INT, "available" : sqlalchemy.CHAR(1)})



## Import Policy

In [157]:
reduced = pd.DataFrame({"cancellation_policy" : listings["cancellation_policy"]})

In [158]:
reduced = reduced.drop_duplicates()

In [159]:
reduced.to_sql("Policy", engine, if_exists="append", index=False, dtype={"cancellation_policy" : sqlalchemy.CHAR(32)})

## Import Score

In [160]:
score = pd.DataFrame({"review_scores_rating" : listings.review_scores_rating, "review_scores_accuracy" : listings.review_scores_accuracy, "review_scores_cleanliness" : listings.review_scores_cleanliness, "review_scores_checkin" : listings.review_scores_checkin, "review_scores_communication" : listings.review_scores_communication, "review_scores_location" : listings.review_scores_location, "review_scores_value" : listings.review_scores_value, "id" : listings.id})

In [161]:
score = score.fillna(-1)

In [163]:
score.to_sql("Score", engine, if_exists="append", index=False, dtype={"review_scores_rating" : sqlalchemy.INT, "review_scores_accuracy" : sqlalchemy.INT, "review_scores_cleanliness" : sqlalchemy.INT, "review_scores_checkin" : sqlalchemy.INT, "review_scores_communication" : sqlalchemy.INT, "review_scores_location" : sqlalchemy.INT, "review_scores_value" : sqlalchemy.INT, "id" : sqlalchemy.INT})

## Import Provide

In [164]:
provide = listings[["id", "amenities"]]

In [165]:
temp2 = []
for index, row in provide.iterrows():
    temp2.append(row["amenities"].split(','))

In [166]:
removed_chars_list = []
for l in temp2:
    tmp = []
    for a in l:
        tmp.append(remove_chars(a))
    removed_chars_list.append(tmp)

In [167]:
ids = []
amens = []
for i, elem in enumerate(provide["id"]):
    for j in np.arange(len(removed_chars_list[i])):
        ids.append(elem)
        amens.append(removed_chars_list[i][j])
        


In [168]:
provides = pd.DataFrame({"id" : ids, "amenities" : amens})

In [169]:
provides = provides.drop_duplicates()

In [170]:
provides.to_sql("Provides", engine, if_exists="append", index=False, dtype={"id" : sqlalchemy.INT, "amenities" : sqlalchemy.CHAR(32)})

## Country

In [171]:
country = ["Spain", "Germany"]
c_code = ["ES", "DE"]

In [172]:
da_country = pd.DataFrame({"country" : country, "country_code" : c_code})

In [173]:
da_country.to_sql("Country", engine, if_exists="append", index=False, dtype={"country_code" : sqlalchemy.CHAR(2), "country" : sqlalchemy.CHAR(32)})

## City

In [174]:
city= ["Barcelona","Madrid","Berlin"]

In [175]:
country_code = ["ES", "ES", "DE"]

In [176]:
da_city = pd.DataFrame({"city" : city, "country_code" : country_code})

In [177]:
da_city.to_sql("City", engine, if_exists="append", index=False, dtype={"city" : sqlalchemy.CHAR(32), "country_code" : sqlalchemy.CHAR(2)})

### Neighbourhood

In [187]:
nei = barcelona_listings[["neighbourhood", "country_code", "city"]]
for i in nei.index:
    nei.at[i, "city"] = "Barcelona"
nei = nei.drop_duplicates()
nei.to_sql("Neighbourhood", engine, if_exists="append", index=False, dtype={"Neighbourhood" : sqlalchemy.CHAR(32), "country_code" : sqlalchemy.CHAR(2), "city" : sqlalchemy.CHAR(32)})

In [188]:
nei = berlin_listings[["neighbourhood", "country_code", "city"]]
for i in nei.index:
    nei.at[i, "city"] = "Berlin"
nei = nei.drop_duplicates()
nei.to_sql("Neighbourhood", engine, if_exists="append", index=False, dtype={"Neighbourhood" : sqlalchemy.CHAR(32), "country_code" : sqlalchemy.CHAR(2), "city" : sqlalchemy.CHAR(32)})

In [189]:
nei = madrid_listings[["neighbourhood", "country_code", "city"]]
for i in nei.index:
    nei.at[i, "city"] = "Madrid"
nei = nei.drop_duplicates()
nei.to_sql("Neighbourhood", engine, if_exists="append", index=False, dtype={"Neighbourhood" : sqlalchemy.CHAR(32), "country_code" : sqlalchemy.CHAR(2), "city" : sqlalchemy.CHAR(32)})

## Import Listing

In [190]:
da_listing = barcelona_listings[["id", "listing_url", "name", "accommodates", "cancellation_policy", "host_id", "neighbourhood", "city", "country_code", "latitude", "longitude", "property_type", "room_type", "bathrooms", "bedrooms", "beds", "bed_type", "interaction", "house_rules", "is_business_travel_ready", "require_guest_profile_picture", "require_guest_phone_verification"]]
da_listing.name = da_listing.name.fillna("")
da_listing.interaction = da_listing.interaction.fillna("")
da_listing.house_rules = da_listing.house_rules.fillna("")

da_listing.city = da_listing.city.fillna("Barcelona")
da_listing.bathrooms = da_listing.bathrooms.fillna(0)
da_listing.bedrooms = da_listing.bedrooms.fillna(0)
da_listing.beds = da_listing.beds.fillna(0)

da_listing.to_sql("Listing", engine, if_exists="append", index=False, dtype={"id" : sqlalchemy.INT, "listing_url" : sqlalchemy.CHAR(32), "name" : sqlalchemy.CHAR(32), "accommodates" : sqlalchemy.CHAR(32), "cancellation_policy" : sqlalchemy.CHAR(32), "host_id" : sqlalchemy.INT, "neighbourhood": sqlalchemy.CHAR(32), "city" : sqlalchemy.CHAR(32), "country_code" : sqlalchemy.CHAR(2), "latitude" : sqlalchemy.FLOAT, "longitude" : sqlalchemy.FLOAT, "property_type": sqlalchemy.CHAR(32), "room_type" : sqlalchemy.CHAR(32), "bathrooms" : sqlalchemy.FLOAT, "bedrooms" : sqlalchemy.INT, "beds" : sqlalchemy.INT, "bed_type" : sqlalchemy.CHAR(32), "interaction" : sqlalchemy.VARCHAR(1024), "house_rules" : sqlalchemy.VARCHAR(1024), "is_business_travel_ready" : sqlalchemy.CHAR(1), "require_guest_profile_picture" : sqlalchemy.CHAR(1), "require_guest_phone_verification" : sqlalchemy.CHAR(1)})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value


In [191]:
da_listing = berlin_listings[["id", "listing_url", "name", "accommodates", "cancellation_policy", "host_id", "neighbourhood", "city", "country_code", "latitude", "longitude", "property_type", "room_type", "bathrooms", "bedrooms", "beds", "bed_type", "interaction", "house_rules", "is_business_travel_ready", "require_guest_profile_picture", "require_guest_phone_verification"]]
da_listing.name = da_listing.name.fillna("")
da_listing.interaction = da_listing.interaction.fillna("")
da_listing.house_rules = da_listing.house_rules.fillna("")

da_listing.city = da_listing.city.fillna("Berlin")
da_listing.bathrooms = da_listing.bathrooms.fillna(0)
da_listing.bedrooms = da_listing.bedrooms.fillna(0)
da_listing.beds = da_listing.beds.fillna(0)

da_listing.to_sql("Listing", engine, if_exists="append", index=False, dtype={"id" : sqlalchemy.INT, "listing_url" : sqlalchemy.CHAR(32), "name" : sqlalchemy.CHAR(32), "accommodates" : sqlalchemy.CHAR(32), "cancellation_policy" : sqlalchemy.CHAR(32), "host_id" : sqlalchemy.INT, "neighbourhood": sqlalchemy.CHAR(32), "city" : sqlalchemy.CHAR(32), "country_code" : sqlalchemy.CHAR(2), "latitude" : sqlalchemy.FLOAT, "longitude" : sqlalchemy.FLOAT, "property_type": sqlalchemy.CHAR(32), "room_type" : sqlalchemy.CHAR(32), "bathrooms" : sqlalchemy.FLOAT, "bedrooms" : sqlalchemy.INT, "beds" : sqlalchemy.INT, "bed_type" : sqlalchemy.CHAR(32), "interaction" : sqlalchemy.VARCHAR(1024), "house_rules" : sqlalchemy.VARCHAR(1024), "is_business_travel_ready" : sqlalchemy.CHAR(1), "require_guest_profile_picture" : sqlalchemy.CHAR(1), "require_guest_phone_verification" : sqlalchemy.CHAR(1)})



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value


In [192]:
da_listing = madrid_listings[["id", "listing_url", "name", "accommodates", "cancellation_policy", "host_id", "neighbourhood", "city", "country_code", "latitude", "longitude", "property_type", "room_type", "bathrooms", "bedrooms", "beds", "bed_type", "interaction", "house_rules", "is_business_travel_ready", "require_guest_profile_picture", "require_guest_phone_verification"]]
da_listing.name = da_listing.name.fillna("")
da_listing.interaction = da_listing.interaction.fillna("")
da_listing.house_rules = da_listing.house_rules.fillna("")

da_listing.city = da_listing.city.fillna("Madrid")
da_listing.bathrooms = da_listing.bathrooms.fillna(0)
da_listing.bedrooms = da_listing.bedrooms.fillna(0)
da_listing.beds = da_listing.beds.fillna(0)

da_listing.to_sql("Listing", engine, if_exists="append", index=False, dtype={"id" : sqlalchemy.INT, "listing_url" : sqlalchemy.CHAR(32), "name" : sqlalchemy.CHAR(32), "accommodates" : sqlalchemy.CHAR(32), "cancellation_policy" : sqlalchemy.CHAR(32), "host_id" : sqlalchemy.INT, "neighbourhood": sqlalchemy.CHAR(32), "city" : sqlalchemy.CHAR(32), "country_code" : sqlalchemy.CHAR(2), "latitude" : sqlalchemy.FLOAT, "longitude" : sqlalchemy.FLOAT, "property_type": sqlalchemy.CHAR(32), "room_type" : sqlalchemy.CHAR(32), "bathrooms" : sqlalchemy.FLOAT, "bedrooms" : sqlalchemy.INT, "beds" : sqlalchemy.INT, "bed_type" : sqlalchemy.CHAR(32), "interaction" : sqlalchemy.VARCHAR(1024), "house_rules" : sqlalchemy.VARCHAR(1024), "is_business_travel_ready" : sqlalchemy.CHAR(1), "require_guest_profile_picture" : sqlalchemy.CHAR(1), "require_guest_phone_verification" : sqlalchemy.CHAR(1)})



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value
