In [12]:
import csv
from collections import OrderedDict
import json


file_path = "../data/Global_Mobility_Report.csv"

In [13]:
# country ->
#   date -> (grocery_pharmacy, park, residential, retail_recreation, transit_station, workplace)

places = ["grocery_pharmacy", "park", "residential", "retail_recreation", "transit_station", "workplace"]

raw_data = OrderedDict()
dates = set()
# Open the file in read mode
with open(file_path, "r") as file:
    reader = csv.reader(file)

    # Iterate over each row in the CSV file
    for r in reader:
        country = r[1]
        province = r[2]

        if country == "country_region":
            continue

        if province != "":
            continue

        date = r[8]
        dates.add(date)

        retail_recreation = float(r[9]) / 100 if r[9] != "" else 999.0
        grocery_pharmacy = float(r[10]) / 100 if r[10] != "" else 999.0
        park = float(r[11]) / 100 if r[11] != "" else 999.0
        transit_station = float(r[12]) / 100 if r[12] != "" else 999.0
        workplace = float(r[13]) / 100 if r[13] != "" else 999.0
        residential = float(r[14]) / 100 if r[14] != "" else 999.0

        # country
        if raw_data.get(country) == None:
            raw_data[country] = OrderedDict()
            for p in places:
                raw_data[country][p] = OrderedDict()
        
        # date
        for p in places:
            if raw_data[country][p].get(date) == None:
                raw_data[country][p][date] = []

        
        # places
        if grocery_pharmacy != 999.0:
            raw_data[country]["grocery_pharmacy"][date].append(grocery_pharmacy)
        if park != 999.0:
            raw_data[country]["park"][date].append(park)
        if residential != 999.0:
            raw_data[country]["residential"][date].append(residential)
        if retail_recreation != 999.0:
            raw_data[country]["retail_recreation"][date].append(retail_recreation)
        if transit_station != 999.0:
            raw_data[country]["transit_station"][date].append(transit_station)
        if workplace != 999.0:
            raw_data[country]["workplace"][date].append(workplace)

date_sorted = sorted(dates)

In [15]:
# average multiple values for each date
# fill in missing dates
mobility_data = OrderedDict()

for country in raw_data:
    mobility_data[country] = OrderedDict()
    for category in raw_data[country]:
        mobility_data[country][category] = OrderedDict()
        for date in raw_data[country][category]:
            if len(raw_data[country][category][date]) == 0:
                continue
            perc = sum(raw_data[country][category][date]) / len(raw_data[country][category][date])
            mobility_data[country][category][date] = perc
        
        # fill in missing dates 
        prev = 0.0
        for date in date_sorted:
            if mobility_data[country][category].get(date) == None:
                mobility_data[country][category][date] = prev
            prev = mobility_data[country][category][date]
        

In [8]:
# Convert the ordered map to a standard dictionary
standard_dict = dict(mobility_data)

# Save the standard dictionary as JSON
file_path = "../data/mobility_country_place_date.json"
with open(file_path, 'w') as file:
    json.dump(standard_dict, file)