In [1]:
import csv
import json

In [2]:
def parse_csv(file_path):
    '''
    dictReader uses first column as key. Make sure that value is unique. 
    '''
    my_dict = { }

    with open(file_path, newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        first = reader.fieldnames[0]
        for row in reader:
            my_dict[row.pop(first)] = row 
    return my_dict
                

In [3]:
def parse_json(file_path):
    with open(file_path, 'r', encoding='utf-8') as jsonfile:
        return json.load(jsonfile)

In [4]:
def write_json(data, file_path):
    with open(file_path, 'w', encoding='utf-8') as file:
        json.dump(data, file, indent=4)

In [5]:
def merge_json(file_path1, file_path2, output_file_path):
    data1 = read_json(file_path1)
    data2 = read_json(file_path2)

    merged_data = data1 + data2

    write_json(merged_data, output_file_path)    

In [6]:
def join_csv_json(csv_data, json_data, key):
    joined_data = []

    # Convert JSON data to a set for faster lookup
    zip_set = set()
    for zip_group in json_data:
        for zip_code in zip_group.split(', '):
            zip_set.add(zip_code.strip())

    # Iterate through CSV data and join
    for row in csv_data:
        if csv_data[row][key] in zip_set:
            joined_data.append(row)

    return joined_data


In [None]:
import time
start = time.time()

In [7]:
csv_data = parse_csv('result.csv')
csv_data

{'50': {'name': 'Shogun of La Jolla',
  'address': 'Shogun of La Jolla, 9500 Gilman Dr, La Jolla, CA 92093',
  'avg_rating': '3.4',
  'zip': '92093',
  'categories': '{"Japanese restaurant"}',
  'city': 'La Jolla'},
 '157': {'name': "James' Place Prime Seafood Sushi",
  'address': "James' Place Prime Seafood Sushi, 2910 La Jolla Village Dr, La Jolla, CA 92093",
  'avg_rating': '4.5',
  'zip': '92093',
  'categories': '{"Sushi restaurant",Restaurant}',
  'city': 'La Jolla'},
 '244': {'name': 'The Bistro at the Strand',
  'address': 'The Bistro at the Strand, 9500 Gilman Dr, La Jolla, CA 92093',
  'avg_rating': '3.8',
  'zip': '92093',
  'categories': '{"Asian fusion restaurant","Asian restaurant","Sushi restaurant"}',
  'city': 'La Jolla'}}

In [8]:
json_data = parse_json('result.json') 
json_data

['92128, 92129',
 '92025, 92029, 92064, 92067, 92127, 92128',
 '92064, 92131, 92145',
 '92128, 92129, 92131']

In [9]:
joined_data = join_csv_json(csv_data, json_data, 'zip')

In [10]:
joined_data

[]

In [None]:
end = time.time()
print(end - start)

There are no restaurants near Poway that are Japanese

In [11]:
csv_data = parse_csv('nourish_public_ca_business.csv')
json_data = parse_json('result.json') 
joined_data = join_csv_json(csv_data, json_data, 'zip')

In [12]:
## all entries near Poway, but not including Poway
joined_data

['Basic Riders Course (BRC)',
 'Jamba',
 'GNC',
 'GameStop Military',
 "Domino's Pizza",
 'California Commercial Asphalt',
 'MCAS Miramar Veterinary Treatment Facility',
 'Coast Citrus Distributors',
 'Miramar Fuel Farm',
 'The Barn',
 'Consolidated Bachelor Qtrs',
 "Dunkin'",
 'Miramar CBQ',
 'Chipotle Mexican Grill',
 'L&L Hawaiian Barbecue',
 'Auto Skills Center',
 'Miramar International Airport',
 'Inns of The Marine Corps',
 'Navy Marine Corps Relief Society Thrift Shop',
 'Firestone Complete Auto Care',
 'Miramar Nail Spa',
 'MCX Autoport Gas Station',
 'Navy Federal Credit Union - ATM',
 'MC Miramar Veterinary Clinic',
 'A1 Soils',
 'Miramar Gas Station',
 "Denny's",
 'MCAS Miramar',
 'The HUB',
 "McDonald's",
 'Hanson',
 'Gonzales Mess Hall',
 'Flying Leatherneck Aviation Museum',
 '3rd MAW Band',
 'VMFAT-101']