In [None]:
import pandas as pd
import numpy as np

import requests
import re

from bav_utils import token, get_url, get_pages, get_json

In [None]:
dict_studies = {"Australia": 586, "USA": 628}
dict_bases = {"All adults": 1, "Male": 2, "Female": 3}
dict_categories = {"Automobiles": 2, "Electric Vehicle": 1383, "Hybrid Vehicle": 1463, "Hybrid/Electric vehicles": 1464}

In [None]:
json = []

# For loop prevents problems with skipping ids
for study_name, study_id in dict_studies.items():  
    print(study_name)
    for base_name, base_id in dict_bases.items():
        print(base_name)
        url = get_url(
            name = "brandscape-data", 
            extensions = [
                f"studies={study_id}",
                f"bases={base_id}",
            ]
        )
        pages = get_pages(url, token)
        json += get_json(url, token, pages)

In [None]:
dict_profile_vars = {
    "brand": ["id", "name"], 
    "study": ["id", "country"], 
    "base": ["id", "name"], 
    "category": ["id", "name", "sector"]
}

In [None]:
# IF brand is NOT none
# AND category matches dict_categories
# THEN flatten json with variables spec in dict_profiles 
dict_profiles = {idx: 
    { f"{k}_{v}": json_data[k][v] for k in dict_profile_vars.keys() for v in dict_profile_vars[k]}
    for idx, json_data in enumerate(json)
    if (json_data["brand"] != None) & (json_data["category"]["id"] in dict_categories.values())
}

# Add idx to dict
for idx, values in dict_profiles.items():
    values.update({"idx": idx})

# Get json profiles for selected cats
profiles = pd.DataFrame([x for x in dict_profiles.values()])
profiles

In [None]:
# Process metrics
list_metrics = []

# FOR each brand in json, process each metric
for idx in dict_profiles.keys():
    json_data = json[idx]["metrics"]
    
    for metric in json_data:
        dict_metrics = {
            "idx": idx,
            "metric_name": metric["name"],
            "metric_group": metric["group"]["name"]
        }
        
        # Update metrics dict with scores subdict
        dict_metrics.update(metric["scores"])
        
        list_metrics.append(dict_metrics)
        
metrics = pd.DataFrame(list_metrics)

# Post-processing of metrics
metric_groups = ["Base Sizes", "Powergrid", "Usage", "Preference", "Recommendation", "Imagery", "Love"]
metrics = metrics.loc[metrics["metric_group"].isin(metric_groups)]
metrics = metrics.melt(id_vars = ["idx", "metric_name", "metric_group"], var_name = "metric_var", value_name = "value")
metrics = metrics.dropna(subset=["value"])
metrics.reset_index(drop=True, inplace=True)
metrics

In [None]:
# Merge profiles and metrics together
brandscape = profiles.merge(metrics, how="outer")
brandscape["base"] = brandscape["base_id"].astype(str) + "_" + brandscape["base_name"]
cols = ["study_country", "category_id", "category_name", "base", "brand_name", "metric_group", "metric_name", "metric_var", "value"]
brandscape = brandscape[cols]
col_names = {
    "study_country": "country",
    "category_id": "cat_id",
    "category_name": "cat_name",
    "brand_name": "brand",
}
brandscape.rename(columns=col_names, inplace=True)
brandscape

In [None]:
# Check categories
if brandscape["cat_name"].nunique() == 1:
    print("Brandscape only contains 1 category")
    brandscape.drop(columns=["cat_id", "cat_name"], inplace=True)

In [None]:
# Export
brandscape.to_csv("brandscape.csv")