# YouTube Public Figures / Flywheel Q4 2019

## Creative Testing Analysis

#### Import libraries

In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from authentication.authenticator import Authenticator
from sheets.sheetmanager import SheetManager

#### Initialize necessary strings

In [3]:
keys = 'credentials.json'
SCOPES = ['https://www.googleapis.com/auth/drive.readonly']

data_spreadsheetId = '1Yr-jmDKJixCcRRBK73yaWU-VtxZlcW5bnwm8wHqm538'
data_data_range = 'Data'

codebook_spreadsheetId = '1qZKHghzIRMkBQPNpRlSTE_PA8GLkf6CBImGgjcqY9QU'
codebook_data_range = 'Codebook'

cell_variable = 'Q140'

#### Authenticate and Initialize Manager to work with Google Sheets

In [4]:
authenticator = Authenticator(keys)
creds = authenticator.get_creds(SCOPES)
manager = SheetManager(creds)

#### Load in Codebook as DataFrame

In [5]:
codebook_df = manager.get_values(spreadsheetId=codebook_spreadsheetId,
                                 data_range=codebook_data_range)
codebook_df[['Variable', 'Type']] = codebook_df[['Variable', 'Type']].replace('', np.nan).fillna(method='ffill')

#### Load in Raw Data as DataFrame
Also save the weights column as a variable for easy use later on

In [85]:
data_df = manager.get_values(spreadsheetId=data_spreadsheetId,
                        data_range=data_data_range)
                        
weights = data_df['Default Weights']

#### Recoding
1. Add a column for a readable group/cell (rather than the numerical code)

In [81]:
df = data_df.copy()

group_series = df.merge(codebook_df.loc[codebook_df['Variable'] == cell_variable, ['Question/Answer', 'Code']].set_index('Code'), how='left', left_on=cell_variable, right_index=True)['Question/Answer']

group_series

0        Test cell 1: Asaginyo-beauty_30s
1       Test cell 7: Kattyanneru-food_30s
2          Test cell 3: BeautyHL_noVO_30s
3                 Test cell 4: FoodHL_30s
4         Test cell 8: Marilyn-beauty_30s
5            Test cell 5: FoodHL_noVO_30s
6               Test cell 2: BeautyHL_30s
7        Test cell 1: Asaginyo-beauty_30s
8        Test cell 1: Asaginyo-beauty_30s
9         Test cell 9: MuscleW-health_30s
10                Test cell 4: FoodHL_30s
11           Test cell 5: FoodHL_noVO_30s
12         Test cell 3: BeautyHL_noVO_30s
13              Test cell 2: BeautyHL_30s
14           Test cell 6: Haruan-food_30s
15           Test cell 5: FoodHL_noVO_30s
16       Test cell 1: Asaginyo-beauty_30s
17         Test cell 3: BeautyHL_noVO_30s
18         Test cell 3: BeautyHL_noVO_30s
19           Test cell 6: Haruan-food_30s
20       Test cell 1: Asaginyo-beauty_30s
21                                Control
22      Test cell 7: Kattyanneru-food_30s
23       Test cell 1: Asaginyo-bea

2. For checkbox/multi-select questions, replace 0 with NaN, replace 1 (Unchecked) with 0, and replace 2 (checked) with 1. This will help when we use the weights.

3. Then multiply in the weights

In [87]:
df = data_df.copy()

multi_select_questions = codebook_df[codebook_df['Question/Answer'].isin(['Not shown', 'Unchecked', 'Checked'])]['Variable'].unique().tolist()

df[multi_select_questions] = df[multi_select_questions].replace('', np.nan) - 1

multi_select_df = df[multi_select_questions].multiply(weights, axis=0).add_suffix('_R')
multi_select_df

Unnamed: 0,Q132.1_R,Q132.2_R,Q132.3_R,Q132.4_R,Q132.5_R,Q132.6_R,Q132.7_R,Q132.8_R,Q132.9_R,Q133.1_R,...,Q172.3_R,Q173.1_R,Q173.2_R,Q173.3_R,Q174.1_R,Q174.2_R,Q174.3_R,Q175.1_R,Q175.2_R,Q175.3_R
0,0.771605,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.771605,0.0,0.000000,...,,,,,,,,,,
1,0.000000,0.000000,0.000000,1.488095,0.000000,1.488095,1.488095,1.488095,0.0,0.000000,...,,1.488095,0.000000,0.0,,,,,,
2,0.000000,0.744048,0.744048,0.744048,0.000000,0.744048,0.000000,0.000000,0.0,0.000000,...,,,,,,,,,,
3,0.000000,0.000000,1.077586,1.077586,0.000000,1.077586,0.000000,0.000000,0.0,0.000000,...,,,,,,,,,,
4,0.000000,0.000000,0.000000,1.644737,0.000000,0.000000,0.000000,1.644737,0.0,0.000000,...,,,,,1.644737,1.644737,0.000000,,,
5,1.358696,0.000000,0.000000,0.000000,1.358696,1.358696,1.358696,0.000000,0.0,1.358696,...,,,,,,,,,,
6,0.000000,1.059322,0.000000,0.000000,0.000000,0.000000,1.059322,0.000000,0.0,0.000000,...,,,,,,,,,,
7,0.905797,0.000000,0.905797,0.905797,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,...,,,,,,,,,,
8,0.000000,0.000000,0.905797,0.905797,0.000000,0.905797,0.000000,0.000000,0.0,0.000000,...,,,,,,,,,,
9,0.672043,0.672043,0.672043,0.672043,0.000000,0.000000,0.672043,0.672043,0.0,0.000000,...,,,,,,,,0.672043,0.672043,0.0


4. For likert scale, code by top 2 box etc.

In [123]:
df = data_df.copy()

likert_dic = {
    'Q161': [0, 1],
    'Q176': [0, 1],
    'Q182': [0, 1],
    'Q183': [0, 1],
    'Q184': [0, 1],
    'Q185': [0, 1],
    'Q186': [0, 1],    
}

likert_questions = list(likert_dic.keys())

likert_df = df[likert_questions]

def test(series):
    series = series.where(series.isin(likert_dic[series.name]), 'X')
    series = series.mask(series.isin(likert_dic[series.name]), 1)
    series[series=='X'] = 0
    return(series)

likert_df = likert_df.apply(test, axis=0).mul(weights, axis=0).add_suffix('_R')

likert_df

Unnamed: 0,Q161_R,Q176_R,Q182_R,Q183_R,Q184_R,Q185_R,Q186_R
0,0.771605,0,0,0,0,0,0
1,1.4881,0,1.4881,0,0,0,0
2,0,0,0.744048,0.744048,0.744048,0,0.744048
3,1.07759,0,1.07759,0,1.07759,0,0
4,1.64474,0,0,0,0,0,0
5,1.3587,1.3587,1.3587,1.3587,1.3587,1.3587,0
6,0,0,1.05932,0,0,0,0
7,0.905797,0,0.905797,0.905797,0,0,0
8,0.905797,0,0.905797,0.905797,0.905797,0,0.905797
9,0,0.672043,0.672043,0.672043,0,0.672043,0.672043


4. Dummy encode the rest

In [127]:
likert_and_multi_select_questions = likert_questions.copy()
likert_and_multi_select_questions.extend(multi_select_questions)
likert_and_multi_select_questions.sort()
likert_and_multi_select_questions



['Q132.1',
 'Q132.2',
 'Q132.3',
 'Q132.4',
 'Q132.5',
 'Q132.6',
 'Q132.7',
 'Q132.8',
 'Q132.9',
 'Q133.1',
 'Q133.10',
 'Q133.2',
 'Q133.3',
 'Q133.4',
 'Q133.5',
 'Q133.6',
 'Q133.7',
 'Q133.8',
 'Q133.9',
 'Q161',
 'Q162.1',
 'Q162.10',
 'Q162.2',
 'Q162.3',
 'Q162.4',
 'Q162.5',
 'Q162.6',
 'Q162.7',
 'Q162.8',
 'Q162.9',
 'Q163.1',
 'Q163.10',
 'Q163.2',
 'Q163.3',
 'Q163.4',
 'Q163.5',
 'Q163.6',
 'Q163.7',
 'Q163.8',
 'Q163.9',
 'Q164.1',
 'Q164.2',
 'Q164.3',
 'Q164.4',
 'Q164.5',
 'Q164.6',
 'Q164.7',
 'Q164.8',
 'Q165.1',
 'Q165.2',
 'Q165.3',
 'Q165.4',
 'Q165.5',
 'Q165.6',
 'Q165.7',
 'Q165.8',
 'Q167.1',
 'Q167.2',
 'Q167.3',
 'Q168.1',
 'Q168.2',
 'Q168.3',
 'Q169.1',
 'Q169.2',
 'Q169.3',
 'Q170.1',
 'Q170.2',
 'Q170.3',
 'Q171.1',
 'Q171.2',
 'Q171.3',
 'Q172.1',
 'Q172.2',
 'Q172.3',
 'Q173.1',
 'Q173.2',
 'Q173.3',
 'Q174.1',
 'Q174.2',
 'Q174.3',
 'Q175.1',
 'Q175.2',
 'Q175.3',
 'Q176',
 'Q182',
 'Q183',
 'Q184',
 'Q185',
 'Q186']

5. Add it all together

In [None]:
#

Transform the data such that we have questions as rows, and also indexed by cell?

In [80]:
table = df[['Default Weights', 'Group', *multi_select_questions]].copy()

questions = [question for question in df.columns.tolist() if (question != 'Default Weights') and (question != cell_variable) and (question != 'Group')]


table[multi_select_questions] = table[multi_select_questions].multiply(table['Default Weights'], axis=0)

#print(len(table[multi_select_questions]), len(table['Default Weights']))

table = pd.pivot_table(table, values=multi_select_questions, columns=['Group'], aggfunc='sum')
table

Group,Control,Test cell 1: Asaginyo-beauty_30s,Test cell 2: BeautyHL_30s,Test cell 3: BeautyHL_noVO_30s,Test cell 4: FoodHL_30s,Test cell 5: FoodHL_noVO_30s,Test cell 6: Haruan-food_30s,Test cell 7: Kattyanneru-food_30s,Test cell 8: Marilyn-beauty_30s,Test cell 9: MuscleW-health_30s
Q132.1,132.592917,147.051433,135.741019,142.316017,135.893559,146.348770,151.056130,151.763960,145.278380,167.342857
Q132.2,130.549026,107.547089,135.083202,140.782134,138.339137,127.230511,132.570855,144.768268,143.356253,136.055725
Q132.3,110.383774,100.814304,108.749600,98.613886,116.088361,85.788010,89.458135,118.413883,94.314218,104.157032
Q132.4,141.621532,135.879325,130.412642,143.334790,145.129474,141.555356,131.215527,141.920681,137.341872,130.635759
Q132.5,89.810195,96.156613,79.386679,64.577089,85.095367,79.709288,83.186980,86.778172,98.108175,79.403440
Q132.6,167.331590,150.587395,157.223947,158.213661,157.442290,154.098874,156.489502,147.818507,156.046111,155.069586
Q132.7,97.465766,109.747840,119.970743,93.950840,111.031503,104.276607,111.841950,106.236300,119.573272,103.156215
Q132.8,129.367018,123.203654,143.609305,128.291499,137.534950,120.952611,135.830848,135.415733,139.992234,133.518803
Q132.9,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
Q133.1,61.522203,58.157664,50.628147,56.911838,68.596582,70.350765,70.193347,64.466515,66.368792,72.829934
