In [1]:
'''
Importing packages required
'''

import csv
import json
from population_extraction import extract_Population, get_mean_population_for_state, get_population_for_postcode
from income_extraction import get_income_for_postcode, income_dictionary, extract_income
from covid_feature_extraction import extract_covid_feature_value
from business_categories import attribute
from business_attributes import (get_business_names, identify_region, check_Business_Cards, check_Bike_Parking,
check_wifi, check_alcohol, check_TV, check_outdoor_seating, check_noise_level, check_Price_Range, check_Business_Parking)

In [2]:
#label and features of parsed dataset

fields = ['Is_Open','Latitude', 'Longitude', 'Income_Category', 'Population_Postcode', 'is_Chain',
           'Stars', 'Review_Count','Business_Accepts_Credit_Cards', 'Bike_Parking', 'WiFi',
          'Business_Parking', 'Alcohol', 'Has_TV', 'Noise_Level', 'Price_Range', 'Outdoor_Seating',
          'Highlights','delivery_or_takeout', 'Grubhub_enabled','Call_To_Action_enabled', 'Request_a_Quote_Enabled',  
           'Covid_Banner', 'Temporarily_Closed', 'Virtual_Services_Offered',
          'Active_Life','Arts_&_Entertainment','Automotive', 'Beauty_&_Spas', 'Education', 'Event_Planning_&_Services', 
          'Financial_Services','Food', 'Health_&_Medical', 'Home_Services', 'Hotels_&_Travel', 
          'Local_Flavor', 'Local_Services', 'Mass_Media', 'Nightlife', 'Pets', 'Professional_Services', 
          'Public_Services_&_Government', 'Religious_Organizations', 'Restaurants', 'Shopping']

In [3]:
#open covid json file 
covid = open('yelp_academic_dataset_covid_features.json', encoding = 'utf-8')

covid_list = {}

#save records of businesses from covid json
for line in covid:
    busn = json.loads(line)
    bus_id = busn['business_id']
    covid_list[bus_id] = busn

#income list that contains dictionaries for each state, each dictionary contains income categories 
#       for postcodes of the state
income_list = income_dictionary()

#population list that contains dictionaries for each state, each dictionary contains population for 
#           postcodes of the state
population_list = extract_Population()

#a distinct list of business names across the datatset
business_names = get_business_names()

#open business json file
business = open('yelp_academic_dataset_business.json', encoding = 'utf-8')

#create parsed dataset
with open('Dataset.csv', encoding = 'utf-8', mode = 'w', newline = '') as csv_file:
    
    writer = csv.DictWriter(csv_file, fieldnames = fields)
    writer.writeheader()
    
    #iterate through business records
    for line in business:
        
        l = json.loads(line)
        
        #business name
        name = l['name'].strip()
        
        #label - is business open or closed
        is_open = l['is_open']
        
        #state
        state = l['state'].strip()
        
        #postcode
        post = l['postal_code'].strip()
        
        #number of stars
        stars = l['stars']
        
        #number of reviews
        review_count = l['review_count']
        
        #latitude
        latitude = l['latitude']
        
        #longitude
        longitude = l['longitude']
        
        #business id
        business_id = l['business_id'].strip()
        
        #will be used to check if business is a chain
        is_Chain = 0
        
        
        #Region  
        southwest, west, midwest, southeast, northeast = identify_region(state)
        
        #if not an American business, skip record
        if(southwest == 0 and west == 0 and midwest == 0 and southeast == 0 and northeast == 0):
            continue
        
        
        #check if business is_Chain
        if(business_names[name] >= 2):
            is_Chain = 1
            
        
        #Postcode Income
        postcode_income = get_income_for_postcode(post, state, income_list)
        
        
        #Postcode Population
        postcode_population = get_population_for_postcode(post, state, population_list)

        
        #Attributes in Dataset        
        business_accepts_credit_cards = 0
        bike_Parking = 0
        wifi = 0
        business_parking = 0
        outdoor_seating = 0
        alcohol = 0
        hasTV = 0
        noise_level = 2
        price_range = 2
        
        
      
        if(l['attributes'] != None):
            
            for key in l['attributes']:
                
                business_accepts_credit_cards = check_Business_Cards(key, l)
                bike_Parking = check_Bike_Parking(key, l)
                wifi = check_wifi(key, l)
                alcohol = check_alcohol(key, l)
                hasTV = check_TV(key, l)
                outdoor_seating = check_outdoor_seating(key, l)
                noise_level = check_noise_level(key, l)
                price_range = check_Price_Range(key, l)
                business_parking = check_Business_Parking(key, l)
                
                
    
        #COVID FEATURES
        elem = covid_list[business_id]
        
        highlights = extract_covid_feature_value(elem, 'highlights')
        delivery_or_takeout = extract_covid_feature_value(elem, 'delivery or takeout')
        grubhub = extract_covid_feature_value(elem, 'Grubhub enabled')
        call_To_Action_enabled = extract_covid_feature_value(elem, 'Call To Action enabled')
        request_a_Quote_Enabled =  extract_covid_feature_value(elem, 'Request a Quote Enabled')               
        covid_Banner =  extract_covid_feature_value(elem, 'Covid Banner')
        temporarily_closed = extract_covid_feature_value(elem, 'Temporary Closed Until')
        virtual_services = extract_covid_feature_value(elem, 'Virtual Services Offered')
     
           
        #Category of Business
        Active_Life = 0
        ArtsandEntertainment = 0
        Automotive = 0
        Beauty_Spas = 0
        Education = 0
        Event_Planning_Services = 0
        Financial_Services = 0
        Food = 0
        Health_Medical = 0
        Home_Services = 0
        Hotels_Travel = 0
        Local_Flavor = 0
        Local_Services = 0
        Mass_Media = 0
        NightLife = 0
        Pets = 0
        Professional_Services = 0
        Public_Services_Govt = 0
        Real_Estate = 0
        Religious_Org = 0
        Restaurants = 0
        Shopping = 0
        
        
        
        if(l['categories'] != None):
            input_Str = l['categories'].split(',')
        
            for j in range(len(input_Str)):
                input_Str[j] = input_Str[j].strip()
            
            
            items = attribute(input_Str)
            
            if('Active Life' in items):
                Active_Life = 1
                
        
            if('Arts & Entertainment' in items):
                ArtsandEntertainment = 1
                
        
            if('Automotive' in items):
                Automotive = 1
               
        
            if('Beauty & Spas' in items):
                Beauty_Spas = 1
                
        
            if('Education' in items):
                Education = 1
                
        
            if('Event Planning & Services' in items):
                Event_Planning_Services = 1
               
        
            if('Financial Services' in items):
                Financial_Services = 1
               
        
            if('Food' in items):
                Food = 1
               
        
            if('Health & Medical' in items):
                Health_Medical = 1
               
        
            if('Home Services' in items):
                Home_Services = 1
               
        
            if('Hotels & Travel' in items):
                Hotels_Travel = 1
               
        
            if('Local Flavor' in items):
                Local_Flavor = 1
                
                
            if('Local Services' in items):
                Local_Services = 1
               
         
            if('Mass Media' in items):
                Mass_Media = 1
                
        
            if('Nightlife' in items):
                NightLife = 1
              
        
            if('Pets' in items):
                Pets = 1
                
        
            if('Professional Services' in items):
                Professional_Services = 1
               
            
            if('Public Services & Government' in items):
                Public_Services_Govt = 1
                    
            
            if('Religious Organizations' in items):
                Religious_Org = 1
            
        
            if('Restaurants' in items):
                Restaurants = 1
               
        
            if('Shopping' in items):
                Shopping = 1
            
         
        
        writer.writerow({ 'Is_Open': is_open, 'Latitude': latitude, 'Longitude': longitude, 
                         'Income_Category': postcode_income, 'Population_Postcode': postcode_population, 
                         'is_Chain': is_Chain, 'Stars': stars, 'Review_Count': review_count,
                         'Business_Accepts_Credit_Cards': business_accepts_credit_cards, 'Bike_Parking': bike_Parking,
                          'WiFi': wifi, 'Business_Parking':business_parking, 'Outdoor_Seating': outdoor_seating,
                         'Alcohol': alcohol, 'Has_TV': hasTV , 'Noise_Level': noise_level, 'Price_Range': price_range,
                         'Highlights': highlights, 'delivery_or_takeout': delivery_or_takeout,'Grubhub_enabled': grubhub,
                          'Call_To_Action_enabled': call_To_Action_enabled ,'Request_a_Quote_Enabled': request_a_Quote_Enabled, 
                         'Covid_Banner':  covid_Banner, 'Temporarily_Closed':  temporarily_closed, 'Virtual_Services_Offered': virtual_services,
                        'Active_Life':Active_Life, 'Arts_&_Entertainment': ArtsandEntertainment, 'Automotive': Automotive,
                        'Beauty_&_Spas': Beauty_Spas, 'Education': Education, 'Event_Planning_&_Services': Event_Planning_Services,
                       'Financial_Services': Financial_Services, 'Food': Food, 'Health_&_Medical':Health_Medical,
                        'Home_Services': Home_Services, 'Hotels_&_Travel': Hotels_Travel, 'Local_Flavor':Local_Flavor,
                        'Local_Services': Local_Services, 'Mass_Media': Mass_Media, 'Nightlife':NightLife,
                        'Pets': Pets, 'Professional_Services': Professional_Services, 'Public_Services_&_Government': Public_Services_Govt,
                        'Religious_Organizations': Religious_Org ,'Restaurants': Restaurants, 'Shopping': Shopping})
        
    