In [None]:
"""
- Read in BAV brandscape (brand landscape data)
- EDA/GEDA across metric groups:
    - Usage
    - Recommendation
    - Love
    - Preference
    - Powergrid
    - Imagery
"""

In [None]:
import pandas as pd
import numpy as np

import plotly.express as px
import plotly.io as pio

pd.options.plotting.backend = "plotly"
pio.templates.default = "plotly_white"

from sklearn.decomposition import PCA

from utils_analysis import (
    data_subset,
    data_subset_ownership,
    data_subset_wide,
    plot_scatter,
    plot_rank,
)

In [None]:
# GET DATA
brandscape = pd.read_csv("brandscape.csv", index_col=0)
brandscape

In [None]:
# USAGE
# Get usage data
mask = (
    (brandscape["metric_group"] == "Usage") & 
    (brandscape["metric_name"].isin(["Total Users", "Lapsed User", "Never Used"]))
)
cols = ["country", "base", "brand", "metric_name", "value"]
usage = data_subset(input_data=brandscape, mask=mask, cols=cols, rank_groupby=["country", "base", "metric_name"])

# Get usage in wide form
usage_wide = data_subset_wide(data_subset=usage, index_cols=["country", "base", "brand"])
usage_wide["rank"] = usage_wide.groupby(["country", "base"])["Total Users"].rank(ascending=False)
usage_wide["top5_brand"] = np.where(usage_wide["rank"] <= 5, usage_wide["brand"], "")

# What are the top owned brands?
plot_rank(usage.query("metric_name == 'Total Users' & rank <= 5"), color="brand")

# Compare current and former users of each brand  # INCLUDE AS OUTPUT
plot_scatter(usage_wide, x="Total Users", y="Lapsed User", label="top5_brand", coord_equal=True)

In [None]:
# RECOMMENDATION
# Get recommendation data
mask = brandscape["metric_name"] == "Recommend To A Friend"
cols = brandscape.columns
recommend = data_subset(input_data=brandscape, mask=mask, cols=cols, rank_groupby=["country", "base"])
recommend = data_subset_ownership(usage, recommend)

# Get recommend in wide form
recommend_wide = data_subset_wide(recommend, index_cols=["country", "base", "brand", "top_ownership"])

# Where do top brands sit in recommendation ranking?
plot_rank(recommend.query("rank <= 15"))

In [None]:
# LOVE
# Get love data
mask = (brandscape["metric_group"] == "Love") & (brandscape["metric_var"] == "percentage")
cols = ["country", "base", "brand", "metric_name", "value"]
love = data_subset(input_data=brandscape, mask=mask, cols=cols, rank_groupby=["country", "base", "metric_name"])
love = data_subset_ownership(usage, love)

# Get love in wide form
love_wide = data_subset_wide(love, index_cols=["country", "base", "brand", "top_ownership"])

# Where do top brands sit in love ranking? (Most loved)
plot_rank(love.query("metric_name == 'Most Loved' & rank <= 15"))

# Where do top brands sit in love ranking? (Most hated)
plot_rank(love.query("metric_name == 'Most Hated'"))

# Compare most hated and most loved brands
plot_scatter(love_wide, x="Most Hated", y="Most Loved", color="top_ownership", coord_equal=True)

In [None]:
# PREFERENCE
# Get preference data
mask = (brandscape["metric_group"] == "Preference") & (brandscape["metric_name"].isin(["One I Prefer", "Total Prefer"]))
cols = ["country", "base", "brand", "metric_name", "value"]
pref = data_subset(input_data=brandscape, mask=mask, cols=cols, rank_groupby=["country", "base", "metric_name"])
pref = data_subset_ownership(usage, pref)

# Get preference in wide form
pref_wide = data_subset_wide(pref, index_cols=["country", "base", "brand", "top_ownership"])

# Where do top brands sit in preference? (One I Prefer)
plot_rank(pref.query("metric_name == 'One I Prefer'"))

# Where do top brands sit in preference? (Total Prefer)
plot_rank(pref.query("metric_name == 'Total Prefer'"))

# Compare one I prefer and total prefer
plot_scatter(pref_wide, x="One I Prefer", y="Total Prefer", color="top_ownership", coord_equal=True)

In [None]:
(
    usage.pivot(index = ["country", "base", "brand"], columns = "metric_name", values = "pct").reset_index().merge(
        love.pivot(index = ["country", "base", "brand"], columns = "metric_name", values = "value").reset_index()
    )
    .merge(ownership_brands, how = "outer")
    .assign(top_ownership = lambda d: d["top_ownership"].fillna(False))
    .plot.scatter(x = "Total Users", y = "Most Hated", color = "top_ownership", hover_data = ["brand"], facet_col = "country", facet_row = "base")
    .update_layout(height = 800)
    .update_xaxes(range = [0, 60], nticks = 4, constrain='domain')
    .update_yaxes(range = [0, 60], nticks = 4, scaleanchor = "x", scaleratio = 1, constrain='domain')
)

In [None]:
# POWERGRID
# Get powergrid data
mask = (
    (brandscape["metric_group"] == "Powergrid") & 
    (brandscape["metric_var"] == "rank") & 
    (brandscape["base"] == "1_All Adults")
)
cols = ["country", "brand", "base", "metric_name", "value"]
powergrid = brandscape.copy().loc[mask, cols].reset_index(drop=True)
powergrid = data_subset_ownership(usage, powergrid).dropna()

# Get powergrid in wide form
powergrid_wide = data_subset_wide(powergrid, index_cols=["country", "brand", "top_ownership"])

# Plot BAV powergrid https://www.bavgroup.com/about-bav/brandassetr-valuator # INCLUDE AS OUTPUT
powergrid_wide.plot.scatter(x="Brand Stature", y="Brand Strength", facet_col="country", color="top_ownership")

-----
# PCA - Important imagery

In [None]:
def unite_cols(data, idx_cols):
    
    if isinstance(idx_cols, list):
        subset = data[idx_cols]
        first_col = idx_cols[0]
        idx = subset[first_col].astype(str)

        for col in subset.columns[1:]:
            idx = idx + "_" + subset[col].astype(str)
    else:
        idx = data[idx_cols]
        
    return idx

In [None]:



def apply_pca(brandscape, mask, idx_cols, n = 2, nrows = 10):
    cols = ["country", "base", "brand", "metric_name", "value"]
    imagery = brandscape.loc[mask, cols]
    attr = imagery["metric_name"].unique().tolist()
    print(len(attr))
    cols_idx_tmp = imagery.columns[~imagery.columns.isin(["metric_name", "value"])].tolist()
    imagery_wide = imagery.pivot(index = cols_idx_tmp, columns="metric_name", values="value").reset_index()
    idx = unite_cols(imagery_wide, idx_cols)
    imagery_wide = imagery_wide.copy().set_index(idx)[attr]
    
    #n = 2
    idx_n = ["pc" + i.astype(str) for i in np.arange(0,n)] 
    print(idx_n)
    pca = PCA(n_components=n)
    principalComponents = pca.fit_transform(imagery_wide)
    print(pca.explained_variance_ratio_)
    print(pca.explained_variance_ratio_.sum())
    print(pca.components_.shape)
    output = pd.DataFrame(abs(pca.components_), columns = attr, index = idx_n).reset_index().melt(id_vars = "index")
    output = output.sort_values(["value"], ascending=False).assign(rank = lambda d: d["value"].rank(ascending=False))
    return(output)

In [None]:
# Subset to imagery
mask = (
    (brandscape["metric_group"] == "Imagery") &
    (brandscape["metric_var"] == "percentage")
)
cols = ["country", "base", "brand", "metric_name", "value"]
imagery = brandscape.copy().loc[mask, cols].reset_index(drop=True)
imagery_wide = imagery_subset.pivot(index=["country", "base", "brand"], columns="metric_name", values="value").reset_index()

# Get list of imagery attributes
attr = imagery["metric_name"].unique().tolist()
print(len(attr))

In [None]:
imagery_wide

In [None]:
base_list = ["1_All Adults"]
country_list = ["Australia", "USA"]
n=2

# Subset for mask
mask = (
    imagery_wide["base"].isin(base_list) &
    imagery_wide["country"].isin(country_list)
)
cols = ["country", "base", "brand"]
data = imagery_wide.copy().loc[mask, [*cols, *attr]]
cols_united = unite_cols(data, cols)
data.set_index(cols_united, inplace=True)
data.drop(columns=cols, inplace=True)
data

# Apply PCA
idx_n = ["pc" + i.astype(str) for i in np.arange(0,n)] 
pca = PCA(n_components=n)
principalComponents = pca.fit_transform(data)
print(f"Explained variance by PC: {pca.explained_variance_ratio_} = {pca.explained_variance_ratio_.sum()}")

# # Process output
output = pd.DataFrame(abs(pca.components_), columns=attr, index=idx_n).reset_index()
output = output.melt(id_vars="index")
output = output.groupby(["variable"]).max("value").reset_index().sort_values("value", ascending=False)
output["rank"] = output["value"].rank(ascending=False)
output

# then return output

In [None]:
[col for col in cols if col is not in ["metric_name", "value"]]

In [None]:
pca_output = []

In [None]:
# What is important for customers of all brands, in Australia
mask = (
    (brandscape["metric_group"] == "Imagery") & 
    (brandscape["metric_var"] == "percentage") & 
    (brandscape["base"] == "1_All Adults") &
    (brandscape["country"] == "Australia")
)
pca_output.append(apply_pca(brandscape, mask, idx_cols=["country", "base", "brand"]).reset_index(drop=True).assign(grp = "allbrands_Australia"))

In [None]:
# What is important for customers of all brands, in the USA
mask = (
    (brandscape["metric_group"] == "Imagery") & 
    (brandscape["metric_var"] == "percentage") & 
    (brandscape["base"] == "1_All Adults") &
    (brandscape["country"] == "USA")
)
pca_output.append(apply_pca(brandscape, mask, idx_cols=["country", "base", "brand"]).reset_index(drop=True).assign(grp = "allbrands_USA"))

In [None]:
# What is important for customers of all brands
mask = (
    (brandscape["metric_group"] == "Imagery") & 
    (brandscape["metric_var"] == "percentage") & 
    (brandscape["base"] == "1_All Adults")
)
pca_output.append(apply_pca(brandscape, mask, idx_cols=["country", "base", "brand"]).reset_index(drop=True).assign(grp = "allbrands_both"))

In [None]:
# What is important for female customers of all brands?
mask = (
    (brandscape["metric_group"] == "Imagery") & 
    (brandscape["metric_var"] == "percentage") & 
    (brandscape["base"] == "3_Female")
)
pca_output.append(apply_pca(brandscape, mask, idx_cols=["country", "base", "brand"]).reset_index(drop=True).assign(grp = "allbrands_female"))

In [None]:
# What is important for male customers of all brands?
mask = (
    (brandscape["metric_group"] == "Imagery") & 
    (brandscape["metric_var"] == "percentage") & 
    (brandscape["base"] == "2_Male")
)
pca_output.append(apply_pca(brandscape, mask, idx_cols=["country", "base", "brand"]).reset_index(drop=True).assign(grp = "allbrands_male"))

In [None]:
# What is important for Toyota customers
mask = (
    (brandscape["metric_group"] == "Imagery") & 
    (brandscape["metric_var"] == "percentage") & 
    (brandscape["base"] == "1_All Adults") & 
    (brandscape["brand"] == "Toyota")
)
pca_output.append(apply_pca(brandscape, mask, idx_cols=["country", "base", "brand"]).reset_index(drop=True).assign(grp = "allbrands_toyota"))

In [None]:
# What is important for Ford customers
mask = (
    (brandscape["metric_group"] == "Imagery") & 
    (brandscape["metric_var"] == "percentage") & 
    (brandscape["base"] == "1_All Adults") & 
    (brandscape["brand"] == "Ford")
)
pca_output.append(apply_pca(brandscape, mask, idx_cols=["country", "base", "brand"]).reset_index(drop=True).assign(grp = "allbrands_ford"))

In [None]:
pd.concat(pca_output).to_csv("imagery_PCA.csv")