# Read in data

In [3]:
import json

def read_json(filename, num_objects=3):
    with open(filename, 'r') as file:
        data = json.load(file)
        # Assuming the JSON data is a list of objects
        for i in range(min(num_objects, len(data))):
            print(data[i])

# Call the function with the filename and the number of objects to print
read_json('Activities.json', 5)


{'input': 'Can you recommend some activities to do in Paris?', 'output': {'action': ['activities'], 'destination': 'PAR'}}
{'input': 'What are the best activities to try in Paris?', 'output': {'action': ['activities'], 'destination': 'PAR'}}
{'input': 'What are some fun activities to do in Paris?', 'output': {'action': ['activities'], 'destination': 'PAR'}}
{'input': 'What are the best activities to do in Tokyo?', 'output': {'action': ['activities'], 'destination': 'TYO'}}
{'input': 'What activities can I do in Paris?', 'output': {'action': ['activities'], 'destination': 'PAR'}}


# Adding diversity

The two cells below have been ran iteratevely substitutting different cities to ensure the integrity of ourr data set and have sparse locations.

In [13]:
import json
from collections import Counter

def count_destinations(filename):
    with open(filename, 'r') as file:
        data = json.load(file)
        # Initialize a list to store the destinations
        destinations = []
        # Assuming the JSON data is a list of such dictionaries
        for item in data:
            if 'output' in item and 'destination' in item['output']:
                destinations.append(item['output']['destination'])
        # Count each unique destination
        destination_counts = Counter(destinations)
        return destination_counts

# Example usage:
destination_counts = count_destinations('Modified_Modified_Activities.json')
print(destination_counts)


Counter({'BCN': 9, 'SEL': 8, 'NYC': 8, 'CAI': 7, 'SHA': 6, 'LON': 6, 'SYD': 6, 'MEX': 5, 'BER': 5, 'BKK': 5, 'LAX': 4, 'SIN': 4, 'LIS': 3, 'PRG': 3, 'AMS': 3, 'RIO': 2, 'GRU': 2, 'HKG': 2, 'CPT': 2, 'BUD': 2, 'IST': 2, 'DXB': 1, 'CDG': 1, 'FLR': 1, 'BUE': 1, 'ROM': 1, 'MAD': 1})


In [12]:
import json
import random

def replace_destination(filename):
    # Mapping of city codes to city names
    city_codes_to_names = {
        'NYC': 'New York',
        'PAR': 'Paris',
        'BCN': 'Barcelona',
        'SYD': 'Sydney',
        'LON': 'London',
        'LAX': 'Los Angeles',
        'FLR': 'Florence',
        'BER': 'Berlin',
        'MOW': 'Moscow',
        'BKK': 'Bangkok',
        'IST': 'Istanbul',
        'HND': 'Tokyo',  # Note: Usually, we wouldn't replace with the same city, but it's listed for completeness.
        'SIN': 'Singapore',
        'DXB': 'Dubai',
        'HKG': 'Hong Kong',
        'MAD': 'Madrid',
        'ROM': 'Rome',
        'CPT': 'Cape Town',
        'GRU': 'São Paulo',
        'BOM': 'Mumbai',
        'YTO': 'Toronto',
        'SEL': 'Seoul',
        'SHA': 'Shanghai',
        'MEX': 'Mexico City',
        'AMS': 'Amsterdam',
        'CAI': 'Cairo',
        'RIO': 'Rio de Janeiro',
        'LIS': 'Lisbon',
        'PRG': 'Prague',
        'BUD': 'Budapest',
        'BUE': 'Buenos Aires'
    }
    # Exclude 'TYO' from possible new destinations if you want to ensure it's replaced with a different city
    city_codes = [code for code in city_codes_to_names if code != 'PAR']

    # Read the JSON file
    with open(filename, 'r') as file:
        data = json.load(file)
    
    # Iterate through each item and replace 'TYO' with a new random city code and name for each item
    for item in data:
        if item['output']['destination'] == 'PAR':
            new_city_code = random.choice(city_codes)  # Randomly choose a new city code for each 'TYO'
            item['output']['destination'] = new_city_code
            new_city_name = city_codes_to_names[new_city_code]
            item['input'] = item['input'].replace('Paris', new_city_name).replace('paris', new_city_name).replace('PAR', new_city_name)

    # Save the modified data back to the file or to a new file
    with open('Modified_' + filename, 'w') as file:
        json.dump(data, file, indent=4)

# Example usage:
replace_destination('Modified_Activities.json')