In [None]:
import pandas as pd
pd.set_option('display.max_columns', 200)
import numpy as np
import tensorflow as tf
from tensorflow import keras
tf.compat.v1.disable_eager_execution()
from tqdm import tqdm
import os
import pickle
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt

In [None]:
pd.set_option('display.float_format', lambda x: "%.3f" % x)

In [None]:
from model import Synergy_Model

In [None]:
os.chdir('..')

In [None]:
os.getcwd()

In [None]:
country='korea'
brand='cass'

# Data Creation

### Vehicle Input

In [None]:
# vehicles = ['digitaldisplayandsearch', 'facebook', 'instagram', 'programmaticvideo', 'youtube']  ## for korea
# vehicles = ['digitaldisplayandsearch', 'digitalvideo', 'tv', 'ott', 'otv', 'ooh'] ## for china
# vehicles = ['digitaldisplayandsearch','digitalvideo','facebook','instagram','ooh', 'opentv', 'paytv', 'print', 'programatic_display', 'programatic_video', 'programmatic', 'radio', 'twitter', 'youtube'] ## for peru
# vehicles = [] ## for mexico
# vehicles.sort()

In [None]:
vehicles = pd.read_csv("raw_data/South Korea Q3 Model/mapping.csv")
vehicles = vehicles.loc[vehicles['Harmonized'] != 'REMOVE']
vehicles = vehicles['vehicle'].tolist()
vehicles.sort()
vehicles

### Data Processing

In [None]:
dataset = Synergy_Model(country=country, brand=brand, vehicles = vehicles)
# dataset = Synergy_Model(country=country, brand=brand)

In [None]:
dataset.data_creation()

In [None]:
data = dataset.data.copy()

In [None]:
data

### Synergy Variable Creation

In [None]:
volume_cols = [x for x in data.columns if '_volume' in x]
data['Total'] = data[volume_cols].sum(axis=1)
data

In [None]:
for pair in dataset.pairs:
    # data[pair[0] + "x" + pair[1] + '_spend'] = np.sqrt(np.sqrt(data[pair[0]+'_spend'] * data[pair[1]+'_spend']))
    data[pair[0] + "x" + pair[1] + '_spend'] = np.sqrt(data[pair[0]+'_spend'] * data[pair[1]+'_spend'])
data

In [None]:
data.sum()

In [None]:
# data.loc[data['year'] == 2022]

# Modelling

### Model Definition

In [None]:
class BetaGammaDecay:
    def __init__(self, x, beta, gamma, num_dates, num_vehicles):
        self.impact_by_signal_instant = x
        self.beta = beta
        self.gamma = gamma
        self.num_vehicles = num_vehicles

        self.impact_by_signal_decayed = decayed_signal = self.impact_by_signal_instant * self.beta

        # self.num_dates = min(x.shape[0], num_dates) 
        for i in range(num_dates):
            gamma = self.gamma
            decayed_signal = tf.concat(
                (
                    tf.zeros(shape=(1, self.num_vehicles), dtype=tf.float32),
                    decayed_signal[:-1] * gamma,
                ),
                0,
            )
            self.impact_by_signal_decayed += decayed_signal

In [None]:
class new_Synergy():
    def __init__(self, input_shape, data_input, data_target, num_significant, aux_data):
        self.input_shape = input_shape
        self.num_significant = num_significant
        self.np_input = data_input.to_numpy().reshape(self.input_shape[0], self.input_shape[1])
        self.np_target = data_target.to_numpy().reshape(self.input_shape[0], 1)
        self.np_aux = aux_data.to_numpy().reshape(self.input_shape[0], self.num_significant)
        self.create_placeholders()        
        self.create_variables()
        self.create_infra()
        self.train_op = tf.compat.v1.train.GradientDescentOptimizer(learning_rate = 1).minimize(self.loss)
        self.feed_dict = {
            self.ph_target:self.np_target,
            self.ph_input:self.np_input,
            self.ph_aux:self.np_aux
        }


    def create_placeholders(self):
        self.ph_input = tf.compat.v1.placeholder(dtype=tf.float32, shape=(None, self.input_shape[1]))
        self.ph_target = tf.compat.v1.placeholder(dtype=tf.float32, shape=(None, 1))
        self.ph_aux = tf.compat.v1.placeholder(dtype=tf.float32, shape=(None, self.num_significant))
    

    def create_variables(self):
        # gamma
        gamma_initial = np.zeros(shape = (1, self.input_shape[-1])).astype(np.float32)
        self.gamma_raw = tf.Variable(gamma_initial, trainable=True, name = 'decay_gamma_raw')

        # beta
        beta_initial_sig = np.zeros(shape = (1, self.num_significant)).astype(np.float32)
        self.beta_raw_sig = tf.Variable(beta_initial_sig, trainable=True, name = 'decay_beta_raw_sig')
        beta_initial_syn = np.zeros(shape = (1, self.input_shape[-1] - self.num_significant)).astype(np.float32)
        self.beta_raw_syn = tf.Variable(beta_initial_syn, trainable=True, name = 'decay_beta_raw_syn')

        # sigmoid curve a
        initial_a_sig = np.zeros(shape = (1, self.num_significant)).astype(np.float32)
        self.a_raw_sig = tf.Variable(initial_a_sig, trainable=True, name = 'sigmoid_a_raw_sig')
        initial_a_syn = np.zeros(shape = (1, self.input_shape[-1] - self.num_significant)).astype(np.float32)
        self.a_raw_syn = tf.Variable(initial_a_syn, trainable=True, name = 'sigmoid_a_raw_syn')

        # sigmoid curve b
        initial_b = np.zeros(shape = (1, self.input_shape[-1])).astype(np.float32)
        self.b_raw = tf.Variable(initial_b, trainable=True, name = 'sigmoid_b_raw')

    
    def create_infra(self):
        self.init = tf.compat.v1.global_variables_initializer()
        with tf.compat.v1.variable_scope("synergy_volumes"):
            # beta-gamma
            self.gamma = tf.nn.sigmoid(self.gamma_raw, name = 'decay_gamma') * 0.3 + 0.5 
            self.beta_sig = tf.nn.sigmoid(self.beta_raw_sig, name = 'decay_beta_sig') * 0.3 + 0.5
            self.beta_syn = tf.nn.sigmoid(self.beta_raw_syn, name = 'decay_beta_syn') * 0.3 + 0.5
            self.beta = tf.concat(
                (
                    self.beta_sig,
                    self.beta_syn,
                ),
                1,
            )
            self.decayed_impact = BetaGammaDecay(self.ph_input, 
                                                beta = self.beta, 
                                                gamma = self.gamma, 
                                                num_dates=self.np_input.shape[0], 
                                                num_vehicles=self.ph_input.shape[1]).impact_by_signal_decayed
            self.decayed_impact += self.ph_input
            # sigmoid curve
            self.a_sig = tf.nn.sigmoid(self.a_raw_sig, name = 'sigmoid_a') * 4 + 0.1
            self.a_syn = tf.nn.sigmoid(self.a_raw_syn, name = 'sigmoid_a') * 0.05 + 0.001
            self.a = tf.concat(
                (
                    self.a_sig,
                    self.a_syn,
                ),
                1,
            )
            self.b = tf.nn.sigmoid(self.b_raw, name = 'sigmoid_b') * 3 + 1
            self.x_start = 0.7
            B_slope_multiplier = 0.01
            self.shifted_intercept = tf.constant(np.log(3) - np.log(7), dtype=tf.float32)
            offset = (self.a / self.b) * self.shifted_intercept
            self.curve_impact = self.a * ((1 / (1 + tf.exp(-(self.b / (self.a)) * (self.decayed_impact - offset)))) - self.x_start) + self.decayed_impact * self.b * B_slope_multiplier
            
            self.aux_loss = tf.sqrt(tf.reduce_mean(tf.square(tf.maximum(self.curve_impact[:,:self.num_significant] - self.ph_aux, 0))))
            self.aux_loss1 = tf.sqrt(tf.reduce_mean(tf.square(self.curve_impact[:,:self.num_significant] - self.ph_aux)))
            self.aux_loss2 = tf.sqrt(tf.reduce_mean(tf.square(tf.maximum(tf.reduce_sum(self.curve_impact[:,self.num_significant:])/tf.reduce_sum(self.curve_impact[:,:self.num_significant]) - 0.5, 0))))
            self.yhat = tf.reduce_sum(self.curve_impact, axis = 1, keepdims=True)
            self.loss = tf.sqrt(tf.reduce_mean(tf.square(self.yhat - self.ph_target))) + self.aux_loss2 / 5
            # self.loss = tf.sqrt(tf.reduce_mean(tf.square(self.yhat - self.ph_target))) + 10*self.aux_loss + 10*(self.aux_loss1) + (self.aux_loss2)


    
    def _train(self, num_epochs):
        self.cost_trace = []
        self.sess_ = tf.compat.v1.Session()
        self.sess_.run(self.init)
        progress = tqdm(range(num_epochs))
        for i in range(num_epochs):
            _, cur_loss = self.sess_.run([self.train_op, self.loss], self.feed_dict)
            self.cost_trace.append(cur_loss)
            progress.set_postfix(loss=np.average(self.cost_trace))
            progress.update(1)

### Model Run

In [None]:
all_vehicle_volumes = {}
all_vehicle_weights = {}
for vehicle in dataset.significant_vehicles:
    # vehicle = 'YOUTUBE'
    print(vehicle)
    spend_cols = []
    spend_cols.append(vehicle + '_spend')
    for pair in dataset.pairs:
        if vehicle in pair:
            spend_cols.append(pair[0] + "x" + pair[1] + '_spend')
    max_spend = data[spend_cols].max().max()
    input_data = data[spend_cols].copy()/max_spend
    target_data = data[vehicle + '_volume'].copy()/data[vehicle + '_volume'].max()
    aux_data = data[vehicle + '_volume'].copy()/data[vehicle + '_volume'].max()

    tf.keras.backend.clear_session()
    model = new_Synergy(input_shape=(data.shape[0],len(dataset.significant_vehicles)), 
                        data_input= input_data,
                        data_target= target_data,
                        num_significant=1,
                        aux_data=aux_data
                    )
    model._train(1000)

    results = pd.DataFrame(data={'y_true':model.np_target.reshape(-1), 'y_pred':model.sess_.run(model.yhat, model.feed_dict).reshape(-1)})
    results *= data['Total'].max()
    results.plot()
    plt.show()
    r2 = r2_score(results['y_true'], results['y_pred'])
    print(r2)

    volumes = pd.DataFrame(model.sess_.run(model.curve_impact, model.feed_dict), columns = [x[:-5]+'volume' for x in spend_cols]) * data[vehicle + '_volume'].max()
    volumes = volumes * data[vehicle+'_volume'].sum() / volumes.sum().sum()
    all_vehicle_volumes[vehicle] = volumes
    breakdown = volumes.sum() / data[vehicle+'_volume'].sum() * 100
    display(breakdown)
    weights = {
        'sigmoid_a' : model.sess_.run(model.a, model.feed_dict),
        'sigmoid_b' : model.sess_.run(model.b, model.feed_dict),
        'decay_beta' : model.sess_.run(model.beta, model.feed_dict),
        'decay_gamma' : model.sess_.run(model.gamma, model.feed_dict),
    }
    print(weights)
    all_vehicle_weights[vehicle] = weights

    print()
    # break

# Analysis

### Aggregate Level 

In [None]:
incoming_aggregate = np.zeros(shape=(len(dataset.significant_vehicles), len(dataset.significant_vehicles)))
incoming_aggregate = pd.DataFrame(incoming_aggregate, columns=dataset.significant_vehicles, index=dataset.significant_vehicles)

for vehicle in dataset.significant_vehicles:
    temp = all_vehicle_volumes[vehicle].copy()
    incoming_aggregate.loc[vehicle][vehicle] = temp[vehicle+'_volume'].sum().sum()
    for col in temp.columns:
        if col != vehicle+'_volume':
            other_veh = col[:-7].split('x')
            other_veh.remove(vehicle)
            other_veh = other_veh[0]
            incoming_aggregate.loc[vehicle][other_veh] = temp[col].sum().sum()

incoming_aggregate

In [None]:
sum_of_rows = incoming_aggregate.sum(axis=1)
sum_of_rows

In [None]:
outgoing_aggregate = incoming_aggregate.copy().T
for vehicle in dataset.significant_vehicles:
    outgoing_aggregate.loc[vehicle][vehicle] = sum_of_rows[vehicle] - outgoing_aggregate.loc[vehicle].sum() + outgoing_aggregate.loc[vehicle][vehicle]
    
outgoing_aggregate

In [None]:
outgoing_aggregate.sum(axis=1)

In [None]:
chart_1_aggregate = pd.DataFrame(data = np.zeros(shape=(len(dataset.significant_vehicles), 2)), columns=['self', 'incoming'], index=dataset.significant_vehicles)
for vehicle in dataset.significant_vehicles:
    chart_1_aggregate.loc[vehicle]['self'] = incoming_aggregate.loc[vehicle][vehicle]
    chart_1_aggregate.loc[vehicle]['incoming'] = incoming_aggregate.loc[vehicle].sum() - incoming_aggregate.loc[vehicle][vehicle]
chart_1_aggregate

In [None]:
chart_1_pp_aggregate = chart_1_aggregate.copy()
# divide each row by the sum of the row
for vehicle in dataset.significant_vehicles:
    if chart_1_pp_aggregate.loc[vehicle].sum() != 0:
        chart_1_pp_aggregate.loc[vehicle] /= chart_1_pp_aggregate.loc[vehicle].sum()
    else:
        chart_1_pp_aggregate.loc[vehicle] = 0
chart_1_pp_aggregate

In [None]:
chart_2_aggregate = None
for vehicle in dataset.significant_vehicles:
    temp = []
    temp.append([vehicle]*(len(dataset.significant_vehicles)-1)*2)
    temp.append(['Outgoing']*(len(dataset.significant_vehicles)-1) + ['Incoming']*(len(dataset.significant_vehicles)-1))
    temp.append([x for x in dataset.significant_vehicles if x != vehicle]*2)
    temp.append(list(outgoing_aggregate.loc[vehicle][[x for x in dataset.significant_vehicles if x != vehicle]].values) + list(incoming_aggregate.loc[vehicle][[x for x in dataset.significant_vehicles if x != vehicle]].values))
    if chart_2_aggregate is None:
        chart_2_aggregate = pd.DataFrame(np.array(temp).transpose())
    else:
        chart_2_aggregate = pd.concat([chart_2_aggregate, pd.DataFrame(np.array(temp).transpose())])
chart_2_aggregate.columns = ['Vehicle', 'Direction', 'Other Vehicle', 'Volume']
chart_2_aggregate['Volume'] = chart_2_aggregate['Volume'].astype(float).apply(lambda x: round(x, 2))
chart_2_aggregate

### Yearly Level

In [None]:
# from data, find the index range of each year
year_ranges = {}
for year in data['year'].unique():
    year_ranges[year] = (data[data['year'] == year].index[0], data[data['year'] == year].index[-1] + 1)
year_ranges

In [None]:
year_ranges.keys()

In [None]:
incoming_yearly = {}
for year in year_ranges.keys():
    incoming = np.zeros(shape=(len(dataset.significant_vehicles), len(dataset.significant_vehicles)))
    incoming = pd.DataFrame(incoming, columns=dataset.significant_vehicles, index=dataset.significant_vehicles)
    for vehicle in dataset.significant_vehicles:
        temp = all_vehicle_volumes[vehicle].copy()
        temp = temp[year_ranges[year][0]:year_ranges[year][1]]
        
        incoming.loc[vehicle][vehicle] = temp[vehicle+'_volume'].sum().sum()
        for col in temp.columns:
            if col != vehicle+'_volume':
                other_veh = col[:-7].split('x')
                other_veh.remove(vehicle)
                other_veh = other_veh[0]
                incoming.loc[vehicle][other_veh] = temp[col].sum().sum()
    incoming_yearly[year] = incoming
display(incoming_yearly)

In [None]:
####### THIS IS JUST A VERIFICATION! It should match with aggregate. #######

incoming_yearly_sum = pd.DataFrame(np.zeros(shape=(len(dataset.significant_vehicles), len(dataset.significant_vehicles))), columns=dataset.significant_vehicles, index=dataset.significant_vehicles)
for year in incoming_yearly.keys():
    incoming_yearly_sum += incoming_yearly[year]
incoming_yearly_sum

In [None]:
outgoing_yearly = {}
for year in year_ranges.keys():
    outgoing = incoming_yearly[year].copy().T
    for vehicle in dataset.significant_vehicles:
        outgoing.loc[vehicle][vehicle] = incoming_yearly[year].loc[vehicle].sum() - outgoing.loc[vehicle].sum() + outgoing.loc[vehicle][vehicle]
    outgoing_yearly[year] = outgoing
outgoing_yearly

In [None]:
####### THIS IS JUST A VERIFICATION! It should match with aggregate. #######

outgoing_yearly_sum = pd.DataFrame(np.zeros(shape=(len(dataset.significant_vehicles), len(dataset.significant_vehicles))), columns=dataset.significant_vehicles, index=dataset.significant_vehicles)
for year in outgoing_yearly.keys():
    outgoing_yearly_sum += outgoing_yearly[year]
outgoing_yearly_sum

In [None]:
chart_1_yearly = {}
for year in year_ranges.keys():
    chart_1 = pd.DataFrame(data = np.zeros(shape=(len(dataset.significant_vehicles), 2)), columns=['self', 'incoming'], index=dataset.significant_vehicles)
    for vehicle in dataset.significant_vehicles:
        chart_1.loc[vehicle]['self'] = incoming_yearly[year].loc[vehicle][vehicle]
        chart_1.loc[vehicle]['incoming'] = incoming_yearly[year].loc[vehicle].sum() - incoming_yearly[year].loc[vehicle][vehicle]
    chart_1_yearly[year] = chart_1
chart_1_yearly

In [None]:
chart_1_pp_yearly = {}
for year in year_ranges.keys():
    chart_1_pp = chart_1_yearly[year].copy()
    for vehicle in dataset.significant_vehicles:
        if chart_1_pp.loc[vehicle].sum() != 0:
            chart_1_pp.loc[vehicle] /= chart_1_pp.loc[vehicle].sum()
        else:
            chart_1_pp.loc[vehicle] = 0
    chart_1_pp_yearly[year] = chart_1_pp
chart_1_pp_yearly

In [None]:
chart_2_yearly = {}
for year in year_ranges.keys():
    chart_2 = None
    for vehicle in dataset.significant_vehicles:
        temp = []
        temp.append([vehicle]*(len(dataset.significant_vehicles)-1)*2)
        temp.append(['Outgoing']*(len(dataset.significant_vehicles)-1) + ['Incoming']*(len(dataset.significant_vehicles)-1))
        temp.append([x for x in dataset.significant_vehicles if x != vehicle]*2)
        temp.append(list(outgoing_aggregate.loc[vehicle][[x for x in dataset.significant_vehicles if x != vehicle]].values) + list(incoming_aggregate.loc[vehicle][[x for x in dataset.significant_vehicles if x != vehicle]].values))
        if chart_2 is None:
            chart_2 = pd.DataFrame(np.array(temp).transpose())
        else:
            chart_2 = pd.concat([chart_2, pd.DataFrame(np.array(temp).transpose())])
    chart_2.columns = ['Vehicle', 'Direction', 'Other Vehicle', 'Volume']
    chart_2['Volume'] = chart_2['Volume'].astype(float).apply(lambda x: round(x, 2))
    chart_2_yearly[year] = chart_2
chart_2_yearly

# Dump the Model Weights

In [None]:
file_path = 'models_ml_split_eqn/' + country + '_' + brand + '.pkl'

with open(file_path, 'wb') as file:
    pickle.dump(all_vehicle_weights, file)

# Dump All Analysis 

In [None]:
file_path = 'results_ml_split_eqn/' + country + '_' + brand + '.pkl'

with open(file_path, 'wb') as file:
    pickle.dump([incoming_aggregate, outgoing_aggregate, chart_1_aggregate, chart_1_pp_aggregate, chart_2_aggregate, incoming_yearly, outgoing_yearly, chart_1_yearly, chart_1_pp_yearly, chart_2_yearly], file)

In [None]:
file_path = 'results_ml_split_eqn/' + country + '_' + brand + '.xlsx'
data.to_excel(file_path, sheet_name='data', index=False)
with pd.ExcelWriter(file_path, engine = 'openpyxl', mode = 'a') as writer:
    incoming_aggregate.to_excel(writer, sheet_name='incoming_aggregate')
    outgoing_aggregate.to_excel(writer, sheet_name='outgoing_aggregate')
    chart_1_aggregate.to_excel(writer, sheet_name='chart_1_aggregate')
    chart_1_pp_aggregate.to_excel(writer, sheet_name='chart_1_pp_aggregate')
    chart_2_aggregate.to_excel(writer, sheet_name='chart_2_aggregate')
    incoming_yearly_concat = pd.DataFrame()
    for year in incoming_yearly.keys():
        temp = incoming_yearly[year].copy()
        temp.reset_index(inplace=True)
        temp.rename(columns={'index':year}, inplace=True)
        incoming_yearly_concat = pd.concat([incoming_yearly_concat, temp], axis=1)
    incoming_yearly_concat.to_excel(writer, sheet_name='incoming_yearly')
    outgoing_yearly_concat = pd.DataFrame()
    for year in outgoing_yearly.keys():
        temp = outgoing_yearly[year].copy()
        temp.reset_index(inplace=True)
        temp.rename(columns={'index':year}, inplace=True)
        outgoing_yearly_concat = pd.concat([outgoing_yearly_concat, temp], axis=1)
    outgoing_yearly_concat.to_excel(writer, sheet_name='outgoing_yearly')
    chart_1_yearly_concat = pd.DataFrame()
    for year in chart_1_yearly.keys():
        temp = chart_1_yearly[year].copy()
        temp.reset_index(inplace=True)
        temp.rename(columns={'index':year}, inplace=True)
        chart_1_yearly_concat = pd.concat([chart_1_yearly_concat, temp], axis=1)
    chart_1_yearly_concat.to_excel(writer, sheet_name='chart_1_yearly')
    chart_1_pp_yearly_concat = pd.DataFrame()
    for year in chart_1_pp_yearly.keys():
        temp = chart_1_pp_yearly[year].copy()
        temp.reset_index(inplace=True)
        temp.rename(columns={'index':year}, inplace=True)
        chart_1_pp_yearly_concat = pd.concat([chart_1_pp_yearly_concat, temp], axis=1)
    chart_1_pp_yearly_concat.to_excel(writer, sheet_name='chart_1_pp_yearly')
    chart_2_yearly_concat = pd.DataFrame()
    for year in chart_2_yearly.keys():
        temp = chart_2_yearly[year].copy()
        temp.reset_index(inplace=True)
        temp.rename(columns={'index':year}, inplace=True)
        chart_2_yearly_concat = pd.concat([chart_2_yearly_concat, temp], axis=1)
    chart_2_yearly_concat.to_excel(writer, sheet_name='chart_2_yearly')