# YouTube Public Figures / Flywheel Q4 2019

## Creative Testing Analysis

#### Import libraries

In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from authentication.authenticator import Authenticator
from sheets.sheetmanager import SheetManager

#### Initialize necessary strings

In [45]:
keys = 'credentials.json'
SCOPES = ['https://www.googleapis.com/auth/drive.readonly']

data_spreadsheetId = '1Yr-jmDKJixCcRRBK73yaWU-VtxZlcW5bnwm8wHqm538'
data_data_range = 'Data'

codebook_spreadsheetId = '1qZKHghzIRMkBQPNpRlSTE_PA8GLkf6CBImGgjcqY9QU'
codebook_data_range = 'Codebook'

cell_variable = 'Q140'

#### Authenticate and Initialize Manager to work with Google Sheets

In [14]:
authenticator = Authenticator(keys)
creds = authenticator.get_creds(SCOPES)
manager = SheetManager(creds)

#### Load in Codebook as DataFrame

In [17]:
codebook_df = manager.get_values(spreadsheetId=codebook_spreadsheetId,
                                 data_range=codebook_data_range)
codebook_df[['Variable', 'Type']] = codebook_df[['Variable', 'Type']].replace('', np.nan).fillna(method='ffill')

Unnamed: 0,Variable,Attribute,Type,Question/Answer,Code
469,Q185,Response,CodedSelect,Much less appealing,4.0
459,Q184,Response,CodedSelect,You couldn't help but remember it was for Youtube,0.0
346,Q169.1,Response,CodedSelect,Checked,2.0
210,Q162.9,Response,CodedSelect,Checked,2.0
118,Q133.1,Question,CodedSelect,Which of the following topics do you regularly...,
306,Q165.5,Response,CodedSelect,Checked,2.0
86,Q132.2,Question,CodedSelect,Which of the following topics interest you? / ...,
486,Q191,Response,CodedSelect,Yes,0.0
181,Q162.2,Response,CodedSelect,Unchecked,1.0
404,Q174.1,Response,CodedSelect,Not shown,0.0


#### Load in Raw Data as DataFrame

In [47]:
data_df = manager.get_values(spreadsheetId=data_spreadsheetId,
                        data_range=data_data_range)
                        
data_df

Unnamed: 0,Default Weights,Q121,Q122,Q128,Q129,Q130,Q131,Q132.1,Q132.2,Q132.3,...,Q186,Q187,Q188,Q189,Q190,Q191,Q192,Q193,Q194,Q195
0,0.771605,15,1,0,2,0,0,2,1,1,...,,,,,,,,,,
1,1.488095,18,0,1,3,1,2,1,1,1,...,2,分からない,ない,1.0,1.0,1.0,1.0,0.0,1.0,1.0
2,0.744048,17,1,4,0,1,2,1,2,2,...,1,なりたい自分になる,具体的なことがわからない,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,1.077586,25,1,7,1,0,0,1,1,2,...,2,料理,落ち着かない,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,1.644737,16,0,2,1,1,0,1,1,1,...,2,特にない,特にない,1.0,1.0,1.0,1.0,1.0,1.0,1.0
5,1.358696,25,1,4,2,1,3,2,1,1,...,2,飯テロ動画のYouTuberの紹介,嫌いではないが、見ていてお腹がすきます,1.0,1.0,1.0,1.0,1.0,1.0,1.0
6,1.059322,27,0,4,0,1,2,1,2,1,...,2,メイクの方法。,メイクの、クローズアップ,1.0,1.0,1.0,1.0,1.0,1.0,1.0
7,0.905797,21,1,2,2,0,3,2,1,2,...,2,わかりません。,ユーチューバー,1.0,1.0,1.0,1.0,1.0,1.0,1.0
8,0.905797,21,1,3,2,0,0,1,1,2,...,1,特になし,嫌いな点はない,1.0,1.0,1.0,1.0,1.0,1.0,1.0
9,0.672043,20,1,2,2,1,1,2,2,2,...,1,器具を使わずにトレーニングできる,なし,1.0,1.0,1.0,1.0,1.0,1.0,1.0


Add a column for a readable group/cell (rather than the numerical code)
For checkbox questions, replace 0 with NaN, replace 1 (Unchecked) with 0, and replace 2 (checked) with 1. This will help when we use the weights.

In [49]:
df = data_df.copy()

df['Group'] = df.merge(codebook_df.loc[codebook_df['Variable'] == cell_variable, ['Question/Answer', 'Code']].set_index('Code'), how='left', left_on=cell_variable, right_index=True)['Question/Answer']


df

Unnamed: 0,Default Weights,Q121,Q122,Q128,Q129,Q130,Q131,Q132.1,Q132.2,Q132.3,...,Q187,Q188,Q189,Q190,Q191,Q192,Q193,Q194,Q195,Group
0,0.771605,15,1,0,2,0,0,2,1,1,...,,,,,,,,,,Test cell 1: Asaginyo-beauty_30s
1,1.488095,18,0,1,3,1,2,1,1,1,...,分からない,ない,1.0,1.0,1.0,1.0,0.0,1.0,1.0,Test cell 7: Kattyanneru-food_30s
2,0.744048,17,1,4,0,1,2,1,2,2,...,なりたい自分になる,具体的なことがわからない,1.0,1.0,1.0,1.0,1.0,1.0,1.0,Test cell 3: BeautyHL_noVO_30s
3,1.077586,25,1,7,1,0,0,1,1,2,...,料理,落ち着かない,1.0,1.0,1.0,1.0,1.0,1.0,1.0,Test cell 4: FoodHL_30s
4,1.644737,16,0,2,1,1,0,1,1,1,...,特にない,特にない,1.0,1.0,1.0,1.0,1.0,1.0,1.0,Test cell 8: Marilyn-beauty_30s
5,1.358696,25,1,4,2,1,3,2,1,1,...,飯テロ動画のYouTuberの紹介,嫌いではないが、見ていてお腹がすきます,1.0,1.0,1.0,1.0,1.0,1.0,1.0,Test cell 5: FoodHL_noVO_30s
6,1.059322,27,0,4,0,1,2,1,2,1,...,メイクの方法。,メイクの、クローズアップ,1.0,1.0,1.0,1.0,1.0,1.0,1.0,Test cell 2: BeautyHL_30s
7,0.905797,21,1,2,2,0,3,2,1,2,...,わかりません。,ユーチューバー,1.0,1.0,1.0,1.0,1.0,1.0,1.0,Test cell 1: Asaginyo-beauty_30s
8,0.905797,21,1,3,2,0,0,1,1,2,...,特になし,嫌いな点はない,1.0,1.0,1.0,1.0,1.0,1.0,1.0,Test cell 1: Asaginyo-beauty_30s
9,0.672043,20,1,2,2,1,1,2,2,2,...,器具を使わずにトレーニングできる,なし,1.0,1.0,1.0,1.0,1.0,1.0,1.0,Test cell 9: MuscleW-health_30s


Transform the data such that we have questions as rows

In [43]:
table = df.copy()

questions = [question for question in df.columns.tolist() if (question != 'Default Weights') and (question != cell_question) and (question != 'Group')]
table = pd.pivot_table(table, values=questions, columns=['Group'], aggfunc='sum')
table

Group,Control,Test cell 1: Asaginyo-beauty_30s,Test cell 2: BeautyHL_30s,Test cell 3: BeautyHL_noVO_30s,Test cell 4: FoodHL_30s,Test cell 5: FoodHL_noVO_30s,Test cell 6: Haruan-food_30s,Test cell 7: Kattyanneru-food_30s,Test cell 8: Marilyn-beauty_30s,Test cell 9: MuscleW-health_30s
Q121,4884.0,4888.0,4801.0,4967.0,4758.0,4735.0,4810.0,4822.0,4865.0,4873.0
Q122,150.0,150.0,150.0,150.0,150.0,150.0,150.0,150.0,150.0,150.0
Q128,787.0,793.0,773.0,799.0,804.0,791.0,779.0,806.0,789.0,804.0
Q129,345.0,321.0,356.0,337.0,341.0,320.0,314.0,354.0,358.0,316.0
Q130,140.0,120.0,116.0,104.0,126.0,116.0,134.0,130.0,124.0,128.0
Q131,414.0,413.0,430.0,404.0,397.0,403.0,385.0,428.0,373.0,444.0
Q132.1,388.0,402.0,394.0,399.0,389.0,405.0,408.0,411.0,401.0,420.0
Q132.2,379.0,357.0,381.0,389.0,382.0,374.0,385.0,391.0,392.0,388.0
Q132.3,371.0,361.0,369.0,359.0,374.0,352.0,354.0,380.0,358.0,367.0
Q132.4,394.0,388.0,384.0,395.0,401.0,396.0,390.0,397.0,393.0,384.0
