In [323]:
import pandas as pd
import numpy as np
import csv
import os, glob
import re
from collections import Counter

# Calculate authenticity score

### In this file, we combine all scraping data (one for restaurant information and the other for reviews), calculate some values for necessary features, and add them into the combined restaurant dataset 

In [324]:
# This table comes from "Authenticity and Consumer Value Ratings- Empirical Tests from the Restaurant Domain" (Kovács et al. (2014))
auth_table = {"authentic":95, "genuine":92,"real":88, "skilled":83,"faithful":81,
              "legitimate":81,"original":80,"traditional":79, "pure":78, "historical":77,
              "sincere":77,"master chef":75,"craftsmanship":74,"honest":74,"integrity":74,
              "quintessential":74,"expert":73,"iconic":73,"inspiring":73,"unique":72,
              "wholesome":72,"professional":70,"skillful":70,"truthful":68,
              "unmistakable":68,"artisan":67,"unpretentious":67,"heartful":66,
              "delicious":65,"virtuous":64,"normal":63,"creative":62,"interesting":62,
              "orthodox":62,"artful":60,"special":60,"righteous":58,"substantial":57,
              "authoritative":56,"typical":56,"awesome":55,"moral":55,"eccentric":54,
              "ethical":54,"fresh":53,"old-fashioned":53, "usual":53, "decent":51,
              "unusual":51,"caring":49, "ambitious":48, "replica":46,"offbeat":43,
             "atypical":41,"unassuming":37,"invented":36,"new":36,"unconventional":36,
             "peculiar":35,"outlandish":32,"assumed":30,"idiosyncratic":30,"quirkly":29,
             "extroverted":28,"modern":27,"unorthodox":27,"pretentious":17,"artificial":14,
             "bogus":13,"forgery":13,"fake":12,"hoax":11,"cheat":10,"dishonest":10,
             "feigned":10,"ersatz":9,"faked":9,"limitation":9,"quack":9,"unreal":8,
             "humbug":7,"impostor":7, "sham":7, "unauthentic":7,"deceptive":6,"inauthentic":6,
             "false":6,"phony":5,"scam":4}

In [325]:
def check_authentic_score(csv):
    """
    calculate authentic_score for each restaruant
    
    input: csv - csv file with reviews in each neghborhood
    return: a dictionary with restaurant name as keys and 
        authenticity score as values.
    """
    reviews_df = pd.read_csv(csv)
    res = []
    [res.append(x) for x in reviews_df['restaurant_name'].tolist() if x not in res]
    dic = {}
    for re in res:
        wc = 0
        total = 0
        data= reviews_df.loc[reviews_df["restaurant_name"] == re]
        for ind in data.index:
            txt =data.at[ind, "review"].split()
            txt = (map(lambda x: x.lower(), txt))
            for wrd in txt:
                if wrd in auth_table.keys():
                    wc += 1
                    total += auth_table[wrd]
        if wc == 0:
            dic[re] = 0
        else:
            score = total/wc
            dic[re] = score
    return dic

In [326]:
#test
dic_test = check_authentic_score('reviews_HumboldtPark.csv')
dic_test

{"Jimmy's Red Hots": 75.36363636363636,
 'Cemitas Puebla': 67.22222222222223,
 'Feed': 62.333333333333336,
 'La Palma Puerto Rican Restaurant': 68.0,
 'La Scarola': 68.63076923076923,
 'Piece': 55.6953642384106}

In [327]:
#list of words that represent nationalities and regions
cultural_dict = {
    'American' : ['American', 'America'],
    'Argentinean' : ['Argentinean', 'Argentina'], 
    'Asian' : ['Asian', 'Asia'], 
    'Austrian' : ['Austrian', 'Austria'],
    'Belgian' : ['Belgian', 'Belgium'], 
    'British' : ['British', 'Britan'],
    'Cajun' : ['Cajun'], 
    'Cantonese' : ['Cantonese'], 
    'Caribbean' : ['Caribbean'], 
    'Central American' : ['America'], 
    'Chinese' : ['Chinese', 'China'], 
    'Creole' : ['Creole'], 
    'Croatian' : ['Croatian', 'Croatia'],
    'Dutch' : ['Dutch', 'Netherlands'], 
    'Eastern European' : ['Europe'], 
    'Egyptian' : ['Egyptian', 'Egypt'], 
    'European' : ['European', 'Europe'], 
    'Filipino' : ['Filipino', 'Philippines'], 
    'French' : ['French'],
    'German' : ['German', 'Germany'], 
    'Greek' : ['Greek', 'Greece'], 
    'Halal' : ['Halal'], 
    'Hawaiian' : ['Hawaiian', 'Hawaii'], 
    'Hong Kong' : ['HongKong', 'Hong'], 
    'Hunan' : ['Hunan'], 
    'Indian' : ['Indian', 'India'], 
    'Irish' : ['Irish', 'Ireland'], 
    'Israeli' : ['Israeli', 'Isarel'], 
    'Italian' : ['Italian', 'Italy'], 
    'Jamaican' : ['Jamaican', 'Jamaica'], 
    'Japanese' : ['Japanese', 'Japan'], 
    'Korean' : ['Korean', 'Korea'], 
    'Kosher' : ['Kosher'],
    'Latin' : ['Latin', 'Latino'], 
    'Lazio' : ['Lazio'], 
    'Lebanese' : ['Lebanese', 'Lebanon'], 
    'Mediterranean' : ['Mediterranean'], 
    'Mexican' : ['Mexican', 'Mexico'], 
    'Middle Eastern' : [], 
    'Nepali' : ['Nepali', 'Nepal'], 
    'Persian' : ['Persian'], 
    'Peruvian' : ['Peruvian', 'Peru'], 
    'Polish' : ['Polish', 'Poland'], 
    'Polynesian' : ['Polynesian', 'Polynesia'], 
    'Portuguese' : ['Portuguese', 'Portugal'], 
    'Romana' : ['Romana', 'Roman'], 
    'Russian' : ['Russian', 'Russia'], 
    'Scandinavian' : ['Scandinavian', 'Scandinavia'], 
    'Shanghai' : [], 
    'South American' : ['American', 'America'], 
    'Southwestern' : ['Southwestern', 'Southwest'], 
    'Spanish' : ['Spanish', 'Spain', 'Hispanic'], 
    'Swedish' : ['Swedish', 'Sweden'], 
    'Szechuan' : ['Szechuan'],
    'Thai' : ['Thai', 'Thailand'], 
    'Turkish' : ['Turkish', 'Turkey'], 
    'Tuscan' : ['Tuscan', 'Tuscany'], 
    'Ukrainian' : ['Ukrainian', 'Ukraine'], 
    'Vietnamese' : ['Vietnamese', 'Vietnam']}

In [328]:
cultural_dict = dict((k, [w.lower() for w in v]) for k,v in cultural_dict.items())

In [329]:
review_files

['reviews_Bridgeview.csv',
 'reviews_HumboldtPark.csv',
 'reviews_Lincoln_Park.csv',
 'reviews_South_Loop.csv',
 'reviews_andersonville.csv',
 'reviews_chinatown.csv',
 'reviews_depaul.csv',
 'reviews_lincolnsquare.csv',
 'reviews_littleitaly.csv',
 'reviews_northpark.csv',
 'reviews_pilsen.csv',
 'reviews_westloop.csv',
 'reviews_westtown.csv']

In [330]:
def review_words(reviews_files, restaurants_all):
    """
    create a dictionaly of each word and calculate word counts for reviews in
    each restaurant
    
    input: 
        reviews_files- list of csvs with review datasets for each neighborhood
        restaurants_all- a combined dataset for all restaurant information
    return: 
        restaurants_all- a combined dataset for all restaurant information with
            a new column of word counts.
    """
    review_cats = pd.DataFrame()
    for file in reviews_files:
        reviews = pd.read_csv(file)
        review_cat = reviews.groupby(['link'])['review'].apply(','.join).reset_index()
        review_cat['review'] = review_cat['review']\
                        .apply(lambda x: re.findall(r"[a-z]\w+", x.lower()))
        review_cat['word_count'] = review_cat['review'].apply(lambda x: len(x))
        review_cat['review'] = review_cat['review'].apply(lambda x: dict(Counter(x)))
        review_cats = review_cats.append(review_cat, ignore_index=False)
    return restaurants_all.merge(review_cats)

In [354]:
def culture_score(restaurants_all):
    """
    count words connecting with nationalities of region and culculate culutral score with frequency of
    nationalities-related word given all word counts of each restaurant review.
    
    input: restaurants_all- a combined dataset for all restaurant information(with word count column)
    return: restaurants_all- a combined dataset for all restaurant information with new columns of
        count of cultural words(nationalities-related, words in cultural_dict) and cultural score.
    """
    culture_word_col = []
    restaurants_all["category"] = restaurants_all["category"].apply(lambda x: re.findall(r'\w+\s?\w+', x))
    for i in range(restaurants_all.shape[0]):
        cultural_words = []
        culture_score = 0
        for category in restaurants_all['category'][i]:
            if category in cultural_dict.keys():
                cultural_words.extend(cultural_dict[category])
        for word in cultural_words:
            if word in restaurants_all['review'][i].keys():
                culture_score += restaurants_all['review'][i][word]
        culture_word_col.append(culture_score)
    restaurants_all['culture_word_count'] = culture_word_col
    restaurants_all['culture_score'] = culture_word_col/restaurants_all['word_count']

In [332]:
#list ot cultural neighborhood we use for this project
cultural_neighborhood = {'restaurants_Bridgeview.csv':"arabic",'restaurants_HumboldtPark.csv':"puerto rican",
                         'restaurants_chinatown.csv':"chinese",'restaurants_lincolnsquare.csv': "german",
                         "restaurants_littleitaly.csv":"italian",'restaurants_pilsen.csv':"mexican",
                         'restaurants_westtown.csv':"polish"}

In [333]:
def add_neiborhood(rest_files):
    """
    add a column at restaurant datasets, label representive cultur 
    in the clumn for cultural_neighborhood and lable None otherwise.
    
    input: rest_files- a list of files with restaurant datasets in each neghborhood
    return: rest_files- the same list of files with a new column of cultural_neghborhood.
    """
    for file in rest_files:
        df = pd.read_csv(file)
        if file in cultural_neighborhood.keys():
            df['cultural_neighborhood'] = cultural_neighborhood[file]
            df.to_csv(file, index=False)
        else:
            df['cultural_neighborhood'] = "None"
            df.to_csv(file,index=False)
    return rest_files

In [210]:
add_neiborhood(rest_files)

['restaurants_Bridgeview.csv',
 'restaurants_HumboldtPark.csv',
 'restaurants_Lincoln_Park.csv',
 'restaurants_South_Loop.csv',
 'restaurants_andersonville.csv',
 'restaurants_chinatown.csv',
 'restaurants_depaul.csv',
 'restaurants_lincolnsquare.csv',
 'restaurants_littleitaly.csv',
 'restaurants_northpark.csv',
 'restaurants_pilsen.csv',
 'restaurants_westloop.csv',
 'restaurants_westtown.csv']

In [290]:
def combine_csv2():
    """
    combine all restaurant and review datasets into two combined datasets
    """
    
    all_filenames = [i for i in glob.glob('*.{}'.format('csv'))]
    
    #combine restaurants 
    rest_files = [i for i in all_filenames if "restaurant" in i]
    rest_csv = pd.concat([pd.read_csv(f) for f in rest_files])
    rest_csv.to_csv("combined_restaurants.csv", index=False, encoding='utf-8-sig')
    
    #combine reviews
    review_files = [i for i in all_filenames if "reviews" in i]
    reviews_csv = pd.concat([pd.read_csv(f) for f in review_files])
    reviews_csv.to_csv("combined_reviews.csv", index=False, encoding='utf-8-sig')

In [355]:
combine_csv2()

In [356]:
restaurants_df = pd.read_csv("combined_restaurants.csv")
restaurants_all.to_csv("combined_restaurants.csv",index=False)
restaurants_df.head()

Unnamed: 0,restaurant_name,link,price_class,price_range,category,michelin,ranking,avg_rating,food_rating,service_rating,value_rating,atmosphere_rating,review_count,rate5_count,rate4_count,rate3_count,rate2_count,rate1_count,cultural_neighborhood
0,Bryn Mawr Breakfast Club,https://www.tripadvisor.com/Restaurant_Review-...,2.5,[],"['American', 'Cafe', 'Vegetarian Friendly']",False,363,4,,,,,67,46,15,6,0,0,
1,Tre Kronor,https://www.tripadvisor.com/Restaurant_Review-...,2.5,[],"['European', 'Swedish', 'Scandinavian']",False,19,4,4.5,4.5,4.5,4.5,142,83,37,9,2,5,
2,Via Veneto Ristorante,https://www.tripadvisor.com/Restaurant_Review-...,2.5,"[30, 30]","['Italian', 'Vegetarian Friendly']",False,195,4,4.5,5.0,4.5,4.5,47,22,17,5,0,2,
3,Hoanh Long Vietnamese & Chinese Restaurant,https://www.tripadvisor.com/Restaurant_Review-...,1.0,[],"['Chinese', 'Asian', 'Vietnamese']",False,53,4,4.5,4.0,4.5,4.5,22,9,11,2,0,0,
4,Martino's,https://www.tripadvisor.com/Restaurant_Review-...,2.5,[],"['Italian', 'Pizza', 'Vegetarian Friendly']",False,1,4,,,,,20,10,6,4,0,0,


In [357]:
#check length of restaurant dataset (= number of observed restaurants)
len(restaurants_df)

843

In [358]:
#double-check the length
n = 0
for i in rest_files:
    reviews_df = pd.read_csv(i)
    n += len(reviews_df)
print(n)

843


In [359]:
#calculate authentic scores and create a dictionary from the combined review dataset
scoredict = check_authentic_score("combined_reviews.csv")
len(scoredict)

692

In [360]:
# add authenticity score to the combined restaurant dataset
restaurants_df['authentity_score']= restaurants_df['restaurant_name'].map(scoredict)
restaurants_df.drop_duplicates()

Unnamed: 0,restaurant_name,link,price_class,price_range,category,michelin,ranking,avg_rating,food_rating,service_rating,value_rating,atmosphere_rating,review_count,rate5_count,rate4_count,rate3_count,rate2_count,rate1_count,cultural_neighborhood,authentity_score
0,Bryn Mawr Breakfast Club,https://www.tripadvisor.com/Restaurant_Review-...,2.5,[],"['American', 'Cafe', 'Vegetarian Friendly']",False,363,4,,,,,67,46,15,6,0,0,,59.025974
1,Tre Kronor,https://www.tripadvisor.com/Restaurant_Review-...,2.5,[],"['European', 'Swedish', 'Scandinavian']",False,19,4,4.5,4.5,4.5,4.5,142,83,37,9,2,5,,65.488372
2,Via Veneto Ristorante,https://www.tripadvisor.com/Restaurant_Review-...,2.5,"[30, 30]","['Italian', 'Vegetarian Friendly']",False,195,4,4.5,5.0,4.5,4.5,47,22,17,5,0,2,,66.220000
3,Hoanh Long Vietnamese & Chinese Restaurant,https://www.tripadvisor.com/Restaurant_Review-...,1.0,[],"['Chinese', 'Asian', 'Vietnamese']",False,53,4,4.5,4.0,4.5,4.5,22,9,11,2,0,0,,63.533333
4,Martino's,https://www.tripadvisor.com/Restaurant_Review-...,2.5,[],"['Italian', 'Pizza', 'Vegetarian Friendly']",False,1,4,,,,,20,10,6,4,0,0,,68.312500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
838,Taqueria Casa Del Pueblo,https://www.tripadvisor.com/Restaurant_Review-...,1.0,[],"['Mexican', 'Latin', 'Spanish']",False,131,4,,,,,19,9,6,1,0,2,mexican,81.000000
839,May St. Cafe,https://www.tripadvisor.com/Restaurant_Review-...,2.5,"[20, 40]","['Latin', 'Spanish']",False,119,4,4.5,4.0,4.0,4.0,19,11,5,0,2,1,mexican,61.000000
840,DeColores,https://www.tripadvisor.com/Restaurant_Review-...,2.5,"[29, 31]","['Mexican', 'Southwestern', 'Spanish']",True,1,4,3.5,3.5,3.5,3.5,15,6,5,2,1,1,mexican,
841,Hony Tonk BBQ,https://www.tripadvisor.com/Restaurant_Review-...,2.5,[],['Barbecue'],False,57,4,,,,,10,5,3,1,1,0,mexican,


In [361]:
#add cultural scores to the combined restaurant dataset
restaurants_df = review_words(review_files, restaurants_df)
culture_score(restaurants_df)

In [362]:
restaurants_df

Unnamed: 0,restaurant_name,link,price_class,price_range,category,michelin,ranking,avg_rating,food_rating,service_rating,...,rate4_count,rate3_count,rate2_count,rate1_count,cultural_neighborhood,authentity_score,review,word_count,culture_word_count,culture_score
0,Bryn Mawr Breakfast Club,https://www.tripadvisor.com/Restaurant_Review-...,2.5,[],"[American, Cafe, Vegetarian Friendly]",False,363,4,,,...,15,6,0,0,,59.025974,"{'for': 93, 'pair': 2, 'of': 160, 'year': 6, '...",9631,2,0.000208
1,Tre Kronor,https://www.tripadvisor.com/Restaurant_Review-...,2.5,[],"[European, Swedish, Scandinavian]",False,19,4,4.5,4.5,...,37,9,2,5,,65.488372,"{'being': 11, 'swedish': 115, 'came': 12, 'to'...",11486,146,0.012711
2,Via Veneto Ristorante,https://www.tripadvisor.com/Restaurant_Review-...,2.5,"[30, 30]","[Italian, Vegetarian Friendly]",False,195,4,4.5,5.0,...,17,5,0,2,,66.220000,"{'via': 31, 'veneto': 31, 'ristorante': 6, 'ha...",3934,38,0.009659
3,Hoanh Long Vietnamese & Chinese Restaurant,https://www.tripadvisor.com/Restaurant_Review-...,1.0,[],"[Chinese, Asian, Vietnamese]",False,53,4,4.5,4.0,...,11,2,0,0,,63.533333,"{'choose': 2, 'the': 112, 'hot': 5, 'pot': 3, ...",1818,19,0.010451
4,Martino's,https://www.tripadvisor.com/Restaurant_Review-...,2.5,[],"[Italian, Pizza, Vegetarian Friendly]",False,1,4,,,...,6,4,0,0,,68.312500,"{'we': 51, 'like': 15, 'to': 50, 'come': 3, 'm...",2378,16,0.006728
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
819,Simone's,https://www.tripadvisor.com/Restaurant_Review-...,1.0,"[5, 15]","[Bar, International]",False,1,4,4.0,4.5,...,5,1,1,0,mexican,55.181818,"{'went': 4, 'for': 17, 'lunch': 2, 'on': 17, '...",1283,0,0.000000
820,Los Comales of 18th Street,https://www.tripadvisor.com/Restaurant_Review-...,1.0,[],"[Mexican, Spanish]",False,114,4,,,...,9,4,1,0,mexican,83.000000,"{'grew': 1, 'up': 6, 'coming': 1, 'here': 5, '...",1119,12,0.010724
821,Taqueria Casa Del Pueblo,https://www.tripadvisor.com/Restaurant_Review-...,1.0,[],"[Mexican, Latin, Spanish]",False,131,4,,,...,6,1,0,2,mexican,81.000000,"{'have': 20, 'been': 19, 'to': 116, 'this': 36...",3767,39,0.010353
822,May St. Cafe,https://www.tripadvisor.com/Restaurant_Review-...,2.5,"[20, 40]","[Latin, Spanish]",False,119,4,4.5,4.0,...,5,0,2,1,mexican,61.000000,"{'friend': 4, 'suggested': 2, 'we': 34, 'go': ...",2176,7,0.003217


In [363]:
#save the changes to the combined restaurant dataset
restaurants_df.to_csv("combined_restaurants.csv",index=False)

In [364]:
restaurants_df = pd.read_csv("combined_restaurants.csv")
restaurants_df.head()

Unnamed: 0,restaurant_name,link,price_class,price_range,category,michelin,ranking,avg_rating,food_rating,service_rating,...,rate4_count,rate3_count,rate2_count,rate1_count,cultural_neighborhood,authentity_score,review,word_count,culture_word_count,culture_score
0,Bryn Mawr Breakfast Club,https://www.tripadvisor.com/Restaurant_Review-...,2.5,[],"['American', 'Cafe', 'Vegetarian Friendly']",False,363,4,,,...,15,6,0,0,,59.025974,"{'for': 93, 'pair': 2, 'of': 160, 'year': 6, '...",9631,2,0.000208
1,Tre Kronor,https://www.tripadvisor.com/Restaurant_Review-...,2.5,[],"['European', 'Swedish', 'Scandinavian']",False,19,4,4.5,4.5,...,37,9,2,5,,65.488372,"{'being': 11, 'swedish': 115, 'came': 12, 'to'...",11486,146,0.012711
2,Via Veneto Ristorante,https://www.tripadvisor.com/Restaurant_Review-...,2.5,"[30, 30]","['Italian', 'Vegetarian Friendly']",False,195,4,4.5,5.0,...,17,5,0,2,,66.22,"{'via': 31, 'veneto': 31, 'ristorante': 6, 'ha...",3934,38,0.009659
3,Hoanh Long Vietnamese & Chinese Restaurant,https://www.tripadvisor.com/Restaurant_Review-...,1.0,[],"['Chinese', 'Asian', 'Vietnamese']",False,53,4,4.5,4.0,...,11,2,0,0,,63.533333,"{'choose': 2, 'the': 112, 'hot': 5, 'pot': 3, ...",1818,19,0.010451
4,Martino's,https://www.tripadvisor.com/Restaurant_Review-...,2.5,[],"['Italian', 'Pizza', 'Vegetarian Friendly']",False,1,4,,,...,6,4,0,0,,68.3125,"{'we': 51, 'like': 15, 'to': 50, 'come': 3, 'm...",2378,16,0.006728


In [252]:
rest_files

['restaurants_Bridgeview.csv',
 'restaurants_HumboldtPark.csv',
 'restaurants_Lincoln_Park.csv',
 'restaurants_South_Loop.csv',
 'restaurants_andersonville.csv',
 'restaurants_chinatown.csv',
 'restaurants_depaul.csv',
 'restaurants_lincolnsquare.csv',
 'restaurants_littleitaly.csv',
 'restaurants_northpark.csv',
 'restaurants_pilsen.csv',
 'restaurants_westloop.csv',
 'restaurants_westtown.csv']

# Supplimentary function to create cultural dict

In [None]:
def collect_cultural_categories(restaurants_folder):
    """
    collect cultural categories from restaurant datasets in each neghborhood
    
    input: restaurants_folder- url for a folder with restaurants datasets in each neghborhood
    return: combined restaurants dataset and list of category names
    """
    os.chdir(restaurants_folder)
    restaurants_files = glob.glob('*.{}'.format('csv'))
    category_names = set()
    restaurants_all = pd.DataFrame()
    for file in restaurants_files: 
        restaurants = pd.read_csv(file)
        restaurants['category'] = restaurants['category'].apply(lambda x: re.findall(r'\w+\s?\w+', x))
        restaurants_all = restaurants_all.append(restaurants, ignore_index=False)
        for category in restaurants['category']:
            category_names.update(category)
    return restaurants_all, category_names

In [None]:
restaurants_folder = '/Users/emilyyeh/Documents/MACS 30122/final-project-team/Trip Advisor Data/restaurants'
restaurants_all, category_names = collect_cultural_categories(restaurants_folder)
category_names

In [162]:
#Manually delete unwanted categories
cultural_categories = set([
    'American', 'Argentinean', 'Asian', 'Austrian', 'Belgian', 'British',
    'Cajun', 'Cantonese', 'Caribbean', 'Central American', 'Chinese', 'Creole', 'Croatian',
    'Dutch', 'Eastern European', 'Egyptian', 'European', 'Filipino', 'French',
    'German', 'Greek', 'Halal', 'Hawaiian', 'HongKong', 'Hunan', 
    'Indian', 'Irish', 'Israeli', 'Italian', 'Jamaican', 'Japanese', 'Korean', 'Kosher',
    'Latin', 'Lazio', 'Lebanese', 'Mediterranean', 'Mexican', 'Middle Eastern', 'Nepali', 
    'Persian', 'Peruvian', 'Polish', 'Polynesian', 'Portuguese', 'Romana', 'Russian', 
    'Scandinavian', 'Shanghai', 'South American', 'Southwestern', 'Spanish', 'Swedish', 'Szechuan',
    'Thai', 'Turkish', 'Tuscan', 'Ukrainian', 'Vietnamese'])
#Manually add desireded categories accordingly
cultural_categories.update([
    'America', 'Argentina', 'Asia', 'Austria', 'Belgium', 'Britan',
    'China', 'Croatia',
    'Netherlands', 'Europe', 'Egypt', 'Philippines', 'France',
    'Germany', 'Greece', 'Hawaii',
    'India', 'Ireland', 'Isareal', 'Italy', 'Jamaica', 'Japane', 'Korea', 
    'Latino', 'Lebanon', 'Mexico', 'Nepal', 
    'Peru', 'Poland', 'Polynesia', 'Portugal', 'Roman', 'Russia', 
    'Scandinavia', 'Spain', 'Sweden', 'Thailand', 'Tuscany', 'Ukrain', 'Vietnam'])