In [1]:
# Load libraries
import pandas as pd
import sqlalchemy as db

In [2]:
# Create database connection
engine = db.create_engine('postgresql://{user}:{user_pass}@{host}/{dataname1}')
con = engine.connect()

In [3]:
# Import the different restaurants tables
yelp_restaurants = pd.read_sql_table('yelp_restaurants', con = con)
more_yelp_restaurants = pd.read_sql_table('more_yelp_restaurants', con = con)
address_yelp_restaurants = pd.read_sql_table('address_yelp_restaurants', con = con)

  "Did not recognize type '%s' of column '%s'" % (attype, name)


In [29]:
# Create the set of shared columns
columns1 = set(yelp_restaurants.columns)
columns2 = set(more_yelp_restaurants.columns)
columns3 = set(address_yelp_restaurants.columns)
common_columns = list(columns1.intersection(columns2, columns3))

In [34]:
# Concatentate tables
all_restaurants = pd.concat([x.loc[:, common_columns].copy() 
                             for x in [yelp_restaurants, 
                                       more_yelp_restaurants,
                                       address_yelp_restaurants]
                            ])

In [35]:
# Remove duplicate restaurants
all_restaurants.sort_values('review_count', 
                            ascending = True, 
                            inplace = True)
all_restaurants.drop_duplicates('id', 
                                keep = 'first', 
                                inplace = True)
all_restaurants.reset_index(drop = True, 
                            inplace = True)

In [36]:
all_restaurants

Unnamed: 0,state,price,address1,name,categories,rating,zip_code,id,review_count,city,country,latitude,longitude,phone
0,TN,1,2062 US Hwy 45,Pizza Hut,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",0.0,38382,sz6SkHCNYWP48r1HbEgoMA,0.0,Trenton,US,35.964255,-88.932119,+17318554652
1,CA,-1,1970nCollege Blvd,Del Taco,[],0.0,92056,xDhigrxHEUMY87MhV0viTA,0.0,Oceanside,US,33.209000,-117.284320,+17609453786
2,KY,-1,361 Rudd Ln,Mother Mills Southern Comfort Food Mixes,"[{'alias': 'comfortfood', 'title': 'Comfort Fo...",0.0,40069,Enfk9E2tr6iKmSDCsiKtyA,0.0,Springfield,US,37.770490,-85.212480,+18596059606
3,NJ,-1,1 Borgata Way,Angeline by Michael Symon,[],0.0,08401,5DjiTczBAsORP3F2BP6HDA,0.0,Atlantic City,US,39.377390,-74.435680,
4,WA,-1,278 106th Ave NE,Just Poke,[],0.0,98004,drqFKsTZBoapMB4vTdlEYg,0.0,Bellevue,US,47.612767,-122.198804,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
760279,TN,-1,3304 W Andrew Johnson Hwy,Portofino Italian Restaurant,,,37814,SvA2Vi8LnI6YeWGtrFbQzA,,Morristown,US,36.201040,-83.339480,+14236160321
760280,TX,-1,2220 Plainview Rd,Sonic,,,76065,HzN3iiWaVu75zdZbt2WN-Q,,Midlothian,US,32.459170,-96.942160,+19727754550
760281,KY,-1,9039 US-42,chipotle Mexican Grill,,,41091,eJ5o_jKpFN6ic7Z9es4S8A,,Union,US,38.959828,-84.678032,
760282,TA,-1,,The Cosy Kitchen,"[{'alias': 'irish', 'title': 'Irish'}]",,,sVg4oe5C_fXrrilaXS1ryw,,Townparks,IE,52.371342,-7.922970,


In [37]:
# Export back to PostgreSQL
all_restaurants.to_sql('all_yelp_restaurants', 
                       con = engine, 
                       index = False, 
                       index_label = 'id', 
                       dtype = {'categories' : db.types.JSON}
                      )

In [38]:
# Close database connection
engine.dispose()