# Question 2

## Part 1
#### Read all json files
#### find all "terms"
#### Create folder hierarchy
#### Store json files into respective folders

In [24]:
import json, glob, os, csv, re

In [25]:
#Function to create the parent folder data_processed
#Takes relative path and creates folder if it does not exist
def create_parent_directory():
    current_dir = os.path.dirname('__file__')
    home_folder = os.path.join(current_dir, 'Data Processed')
    if not os.path.exists(home_folder):
        os.mkdir(home_folder)
    return home_folder

#Function to create folder structure by joining the input parameters and return the folder path
#If folder is already created, return its path
def make_directory_with_country(home_folder, country_name, city_name, term, category):
    directory =os.path.join(home_folder, country_name, city_name, term, category)
    if not os.path.exists(directory):
        os.makedirs(directory)
        return directory
    else:
        return directory

#Function to write data to json file at respective location
def write_to_json_file(file_path, json_data):
    with open(file_path, 'w') as json_out:
        json.dump(json_data, json_out)
        
#Lambda expression to remove numbers
remove_numbers_lam = lambda value: re.sub(r'\d+', '', value).strip()

In [26]:
home_folder = create_parent_directory()
#Read all the json files at location using glob
for filename in glob.glob(r'C:\Users\infer\Desktop\Spring17\Python\lectures\DataAnalysis4Python_Spring17\Assignment 2\Data\*.json'):
    
    #Get the file name currently reading
    file_name = os.path.basename(filename)
    with open(filename) as f:
        
        #Load data from json file
        data_from_file = json.load(f)
        
        #Get all categories
        categories = [category["title"] for category in data_from_file["categories"]]
        for category in categories:
            
            #Remove multiple spaces with single space. For ex: "New York" and "New  York"
            category = ' '.join(category.split())
            
            #Remove extra spaces and numbers from city names
            city_name = ' '.join((data_from_file["location"]["city"]).split())
            city_name = remove_numbers_lam(city_name)
            
            #Call function to get folder path
            data_dir = make_directory_with_country(home_folder, data_from_file["location"]["country"], city_name, data_from_file["term"], category)
            
            #Create file path by joining folder structure and file name
            file_path = os.path.join(data_dir, file_name)
            
            #Write to json file
            write_to_json_file(file_path, data_from_file)

### Part 1 End Result:

##### The categorization is done like follow:

##### Data Processed > Country > City > Type > Category > File.json


## Part 2
#### Read all json files
#### find all "restaurants"
#### Get timing for each restaurant for each day
#### Store results in a csv file

In [29]:
#Function to write data for each restaurant for each day in separate row
def add_rows(file, details):
    rows = []
    for detail in details:
        row = []
        row.append(file["name"])
        row.append(' '.join((file["location"]["city"]).split()))
        row.append(file["location"]["country"])
        row.append(detail['day'])
        row.append(detail['start'][:2])
        row.append(detail['start'][2:])
        row.append(detail['end'][:2])
        row.append(detail['end'][2:])
        rows.append(row)
    return rows

#Function to write data to given csv file
def write_to_csv(file_name, restaurant_timings):
    
    #Use utf-8 encoding because of some restaurant names are not in english letters
    with open(file_name, 'w', encoding='utf-8') as csv_output:
        
        #Use escapechar to separate the delimiter and words
        writer = csv.writer(csv_output, delimiter=',', quoting=csv.QUOTE_NONE, lineterminator='\n',escapechar='\\')
        
        #Title row
        writer.writerow(("Name of Restaurant", 'City', 'Country Code', 'Day of Week', 'Start Time Hour', 'Start Time Minutes', 'End Time Hour', 'End Time Minutes'))
        for row in restaurant_timings:
            writer.writerow(row)
    
        #Close connection
        csv_output.close()

In [30]:
#Read all the json files at location using glob
restaurant_timings = []
for filename in glob.glob(r'C:\Users\infer\Desktop\Spring17\Python\lectures\DataAnalysis4Python_Spring17\Assignment 2\Data\*.json'):
    with open(filename) as f:
        
        #Load data from json file
        data_from_file = json.load(f)
        
        #Check if the place is a restaurant
        if data_from_file["term"] == 'restaurants':
            
            #Check if there are hours mentioned for a restaurant
            try:
                details = [value for value in data_from_file["hours"]]
            
            #Create a dummy row to populate the data in case if it is not present
            except:
                details = [{"open":[{"day": "NA","start": "NANA","end": "NANA"}]}]
                
            #Populate the data in a list
            restaurant_timings.extend(add_rows(data_from_file, details[0]["open"]))
            
write_to_csv('restaurant_timings.csv', restaurant_timings)