In [1]:
# Data Science
import pandas as pd
import numpy as np

# Data Vizualization
import seaborn as sns
import matplotlib.pyplot as plt

# database
import sqlite3


import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, text, inspect, func

In [2]:
# Read in CSV
Santa_Barbara = pd.read_csv("Resources/Santa_Barbara.csv")
Santa_Barbara.head()

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,categories,user_id,review_stars,text,date
0,6VVbLYay8czrjKJ4xRoTxQ,Judge For Yourself Cafe,1218 Santa Barbara St,Santa Barbara,CA,93101,34.42599,-119.702657,4.5,110,"Restaurants, Breakfast & Brunch",8H-17uPX904DiVyNFIixPg,5,"Been going here for years! Always great food,...",2019-02-10 22:53:31
1,6VVbLYay8czrjKJ4xRoTxQ,Judge For Yourself Cafe,1218 Santa Barbara St,Santa Barbara,CA,93101,34.42599,-119.702657,4.5,110,"Restaurants, Breakfast & Brunch",ukgieEhXwin4YRqsdguLrA,2,We found the food and coffee disappointing but...,2013-12-27 22:49:20
2,6VVbLYay8czrjKJ4xRoTxQ,Judge For Yourself Cafe,1218 Santa Barbara St,Santa Barbara,CA,93101,34.42599,-119.702657,4.5,110,"Restaurants, Breakfast & Brunch",H6frBOvhhQbs7HWKcH1lNA,5,Just moved into the neighborhood and decided t...,2018-10-07 15:19:13
3,6VVbLYay8czrjKJ4xRoTxQ,Judge For Yourself Cafe,1218 Santa Barbara St,Santa Barbara,CA,93101,34.42599,-119.702657,4.5,110,"Restaurants, Breakfast & Brunch",tQCc9h1zjbMbrPYGfrMKJg,5,I loved this place. My boyfriend and I came fo...,2017-10-30 02:38:31
4,6VVbLYay8czrjKJ4xRoTxQ,Judge For Yourself Cafe,1218 Santa Barbara St,Santa Barbara,CA,93101,34.42599,-119.702657,4.5,110,"Restaurants, Breakfast & Brunch",94cMFXuUjTdaKWnxRJjTkg,5,Santa Barbara's very best breakfast burrito! T...,2015-09-17 17:17:03


In [3]:
# Change Date to ####-##-##
Santa_Barbara['date'] = pd.to_datetime(Santa_Barbara['date'])
Santa_Barbara['date'] = pd.to_datetime(Santa_Barbara['date'], unit='s').dt.strftime('%Y-%m-%d')

In [4]:
Santa_Barbara.head()

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,categories,user_id,review_stars,text,date
0,6VVbLYay8czrjKJ4xRoTxQ,Judge For Yourself Cafe,1218 Santa Barbara St,Santa Barbara,CA,93101,34.42599,-119.702657,4.5,110,"Restaurants, Breakfast & Brunch",8H-17uPX904DiVyNFIixPg,5,"Been going here for years! Always great food,...",2019-02-10
1,6VVbLYay8czrjKJ4xRoTxQ,Judge For Yourself Cafe,1218 Santa Barbara St,Santa Barbara,CA,93101,34.42599,-119.702657,4.5,110,"Restaurants, Breakfast & Brunch",ukgieEhXwin4YRqsdguLrA,2,We found the food and coffee disappointing but...,2013-12-27
2,6VVbLYay8czrjKJ4xRoTxQ,Judge For Yourself Cafe,1218 Santa Barbara St,Santa Barbara,CA,93101,34.42599,-119.702657,4.5,110,"Restaurants, Breakfast & Brunch",H6frBOvhhQbs7HWKcH1lNA,5,Just moved into the neighborhood and decided t...,2018-10-07
3,6VVbLYay8czrjKJ4xRoTxQ,Judge For Yourself Cafe,1218 Santa Barbara St,Santa Barbara,CA,93101,34.42599,-119.702657,4.5,110,"Restaurants, Breakfast & Brunch",tQCc9h1zjbMbrPYGfrMKJg,5,I loved this place. My boyfriend and I came fo...,2017-10-30
4,6VVbLYay8czrjKJ4xRoTxQ,Judge For Yourself Cafe,1218 Santa Barbara St,Santa Barbara,CA,93101,34.42599,-119.702657,4.5,110,"Restaurants, Breakfast & Brunch",94cMFXuUjTdaKWnxRJjTkg,5,Santa Barbara's very best breakfast burrito! T...,2015-09-17


In [5]:
# Look for city spelling errors
Santa_Barbara.city.unique()

array(['Santa Barbara', 'Santa  Barbara'], dtype=object)

In [6]:
# Replace city spelling errors
Santa_Barbara['city'] = Santa_Barbara['city'].str.replace('Santa  Barbara', 'Santa Barbara')
Santa_Barbara.city.unique()

array(['Santa Barbara'], dtype=object)

In [7]:
# Find min/max of date range
print(Santa_Barbara['date'].min())
print(Santa_Barbara['date'].max())

2005-03-01
2022-01-19


In [8]:
# Return only 10 years of data
Santa_Barbara = Santa_Barbara[Santa_Barbara['date'] >= '2021-01-01']
print(Santa_Barbara['date'].min())
print(Santa_Barbara['date'].max())

2021-01-01
2022-01-19


In [9]:
# Drop text columns - makes db too large
Santa_Barbara_Clean = Santa_Barbara.drop('text', axis=1)
Santa_Barbara_Clean.head()

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,categories,user_id,review_stars,date
32,6VVbLYay8czrjKJ4xRoTxQ,Judge For Yourself Cafe,1218 Santa Barbara St,Santa Barbara,CA,93101,34.42599,-119.702657,4.5,110,"Restaurants, Breakfast & Brunch",TGoDkDFuYxdln_zRVfNGKg,5,2021-02-17
46,6VVbLYay8czrjKJ4xRoTxQ,Judge For Yourself Cafe,1218 Santa Barbara St,Santa Barbara,CA,93101,34.42599,-119.702657,4.5,110,"Restaurants, Breakfast & Brunch",SyM-HPhak5kkB4XW2lhbTg,5,2021-07-24
51,6VVbLYay8czrjKJ4xRoTxQ,Judge For Yourself Cafe,1218 Santa Barbara St,Santa Barbara,CA,93101,34.42599,-119.702657,4.5,110,"Restaurants, Breakfast & Brunch",NGnXX1V8cI8yE-wrtfhIiw,5,2021-04-25
52,6VVbLYay8czrjKJ4xRoTxQ,Judge For Yourself Cafe,1218 Santa Barbara St,Santa Barbara,CA,93101,34.42599,-119.702657,4.5,110,"Restaurants, Breakfast & Brunch",Gq4jobrXqYuLQnOnSh1Gvw,4,2022-01-03
53,6VVbLYay8czrjKJ4xRoTxQ,Judge For Yourself Cafe,1218 Santa Barbara St,Santa Barbara,CA,93101,34.42599,-119.702657,4.5,110,"Restaurants, Breakfast & Brunch",3iC8JyrmWukpa7U9XXW9FA,5,2021-05-02


In [10]:
Santa_Barbara_Clean.describe()

Unnamed: 0,postal_code,latitude,longitude,stars,review_count,review_stars
count,13537.0,13537.0,13537.0,13537.0,13537.0,13537.0
mean,93103.354731,34.420933,-119.705205,4.11668,695.442713,4.052818
std,6.440045,0.009945,0.031912,0.499442,763.952409,1.38607
min,93101.0,34.400241,-119.920673,1.5,5.0,1.0
25%,93101.0,34.414316,-119.706257,4.0,192.0,3.0
50%,93101.0,34.419682,-119.698191,4.0,434.0,5.0
75%,93105.0,34.424641,-119.690523,4.5,983.0,5.0
max,93190.0,34.463083,-119.623092,5.0,3834.0,5.0


In [11]:
categories_of_interest = ['Vietnamese', 'Chinese', 'Ethiopian', 'American (New)', 
                          'American (Traditional)', 'Mexican', 'Italian', 
                          'Japanese', 'Middle Eastern', 'Mediterranean', 'Korean', 
                          'Thai', 'Cuban', 'Irish']

In [12]:
def filter_categories(row):
    categories = row.split(', ')
    filtered = [cat for cat in categories if cat in categories_of_interest]
    return ', '.join(filtered) if filtered else None

In [13]:
Santa_Barbara_Clean['categories'] = Santa_Barbara_Clean['categories'].apply(filter_categories)

# Drop rows where 'categories' became None
Santa_Barbara_Categories = Santa_Barbara_Clean.dropna(subset=['categories'])

Santa_Barbara_Categories.info()

# Save or display the filtered DataFrame
# Santa_Barbara_Categories.to_csv('path_to_save_filtered_df/Filtered_Santa_Barbara_Clean.csv', index=False)

<class 'pandas.core.frame.DataFrame'>
Index: 9235 entries, 110 to 114648
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   business_id   9235 non-null   object 
 1   name          9235 non-null   object 
 2   address       9230 non-null   object 
 3   city          9235 non-null   object 
 4   state         9235 non-null   object 
 5   postal_code   9235 non-null   int64  
 6   latitude      9235 non-null   float64
 7   longitude     9235 non-null   float64
 8   stars         9235 non-null   float64
 9   review_count  9235 non-null   int64  
 10  categories    9235 non-null   object 
 11  user_id       9235 non-null   object 
 12  review_stars  9235 non-null   int64  
 13  date          9235 non-null   object 
dtypes: float64(3), int64(3), object(8)
memory usage: 1.1+ MB


In [14]:
Santa_Barbara_Categories.head()

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,categories,user_id,review_stars,date
110,8-ZExybRuyyXZf6aESgc3g,Paseo Nuevo Shopping Center,651 Paseo Nuevo,Santa Barbara,CA,93101,34.418991,-119.699996,4.0,125,American (Traditional),HA4rae4Oj41vctbk5VFq8w,5,2021-03-11
124,8-ZExybRuyyXZf6aESgc3g,Paseo Nuevo Shopping Center,651 Paseo Nuevo,Santa Barbara,CA,93101,34.418991,-119.699996,4.0,125,American (Traditional),27hyqoJVv66JN_Z8pduSBA,1,2021-07-11
127,8-ZExybRuyyXZf6aESgc3g,Paseo Nuevo Shopping Center,651 Paseo Nuevo,Santa Barbara,CA,93101,34.418991,-119.699996,4.0,125,American (Traditional),E-UATD6Qg2VikGZ2Mtc7sg,1,2021-11-09
129,8-ZExybRuyyXZf6aESgc3g,Paseo Nuevo Shopping Center,651 Paseo Nuevo,Santa Barbara,CA,93101,34.418991,-119.699996,4.0,125,American (Traditional),MvfjsTrQgnCtD3xm3Yg9cw,4,2021-06-12
130,8-ZExybRuyyXZf6aESgc3g,Paseo Nuevo Shopping Center,651 Paseo Nuevo,Santa Barbara,CA,93101,34.418991,-119.699996,4.0,125,American (Traditional),Aofw4GTSxQeYtbhUwD2ZJg,2,2021-04-25


In [15]:
Santa_Barbara_Categories['review_count'] = Santa_Barbara_Categories.groupby('business_id')['user_id'].transform('count')

Santa_Barbara_Categories.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Santa_Barbara_Categories['review_count'] = Santa_Barbara_Categories.groupby('business_id')['user_id'].transform('count')


Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,categories,user_id,review_stars,date
110,8-ZExybRuyyXZf6aESgc3g,Paseo Nuevo Shopping Center,651 Paseo Nuevo,Santa Barbara,CA,93101,34.418991,-119.699996,4.0,7,American (Traditional),HA4rae4Oj41vctbk5VFq8w,5,2021-03-11
124,8-ZExybRuyyXZf6aESgc3g,Paseo Nuevo Shopping Center,651 Paseo Nuevo,Santa Barbara,CA,93101,34.418991,-119.699996,4.0,7,American (Traditional),27hyqoJVv66JN_Z8pduSBA,1,2021-07-11
127,8-ZExybRuyyXZf6aESgc3g,Paseo Nuevo Shopping Center,651 Paseo Nuevo,Santa Barbara,CA,93101,34.418991,-119.699996,4.0,7,American (Traditional),E-UATD6Qg2VikGZ2Mtc7sg,1,2021-11-09
129,8-ZExybRuyyXZf6aESgc3g,Paseo Nuevo Shopping Center,651 Paseo Nuevo,Santa Barbara,CA,93101,34.418991,-119.699996,4.0,7,American (Traditional),MvfjsTrQgnCtD3xm3Yg9cw,4,2021-06-12
130,8-ZExybRuyyXZf6aESgc3g,Paseo Nuevo Shopping Center,651 Paseo Nuevo,Santa Barbara,CA,93101,34.418991,-119.699996,4.0,7,American (Traditional),Aofw4GTSxQeYtbhUwD2ZJg,2,2021-04-25


In [19]:
Santa_Barbara_Categories['stars'] = Santa_Barbara_Categories.groupby('business_id')['review_stars'].transform('mean').round(1)

Santa_Barbara_Categories.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Santa_Barbara_Categories['stars'] = Santa_Barbara_Categories.groupby('business_id')['review_stars'].transform('mean').round(1)


Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,categories,user_id,review_stars,date
110,8-ZExybRuyyXZf6aESgc3g,Paseo Nuevo Shopping Center,651 Paseo Nuevo,Santa Barbara,CA,93101,34.418991,-119.699996,3.3,7,American (Traditional),HA4rae4Oj41vctbk5VFq8w,5,2021-03-11
124,8-ZExybRuyyXZf6aESgc3g,Paseo Nuevo Shopping Center,651 Paseo Nuevo,Santa Barbara,CA,93101,34.418991,-119.699996,3.3,7,American (Traditional),27hyqoJVv66JN_Z8pduSBA,1,2021-07-11
127,8-ZExybRuyyXZf6aESgc3g,Paseo Nuevo Shopping Center,651 Paseo Nuevo,Santa Barbara,CA,93101,34.418991,-119.699996,3.3,7,American (Traditional),E-UATD6Qg2VikGZ2Mtc7sg,1,2021-11-09
129,8-ZExybRuyyXZf6aESgc3g,Paseo Nuevo Shopping Center,651 Paseo Nuevo,Santa Barbara,CA,93101,34.418991,-119.699996,3.3,7,American (Traditional),MvfjsTrQgnCtD3xm3Yg9cw,4,2021-06-12
130,8-ZExybRuyyXZf6aESgc3g,Paseo Nuevo Shopping Center,651 Paseo Nuevo,Santa Barbara,CA,93101,34.418991,-119.699996,3.3,7,American (Traditional),Aofw4GTSxQeYtbhUwD2ZJg,2,2021-04-25


In [20]:
# Create engine using the `santa_barbara_food.sqlite` database file
engine = create_engine("sqlite:///Resources/santa_barbara_food.sqlite")
# write to the database
Santa_Barbara_Categories.to_sql("santa_barbara_food", con=engine, index=False, if_exists="replace", chunksize=500)



9235

In [21]:
# Close the database connection
engine.dispose()