In [1]:
import numpy as np
import pandas as pd
from statsmodels.stats.proportion import proportions_ztest as ztest

from authentication.authenticator import Authenticator
from sheets.sheetmanager import SheetManager

In [2]:
keys = 'credentials.json'
SCOPES = ['https://www.googleapis.com/auth/drive']

data_spreadsheetId = '1ZSZGAtYyGasHIbeCZYBvnN-Xv46hUs6-u5Larbqb0ag'
data_data_range = 'Teads Data'

codebook_spreadsheetId = '1ZSZGAtYyGasHIbeCZYBvnN-Xv46hUs6-u5Larbqb0ag'
codebook_data_range = 'Teads Codebook!A3:J1000'

In [3]:
authenticator = Authenticator(keys)
creds = authenticator.get_creds(SCOPES)
manager = SheetManager(creds)

In [4]:
data_df = manager.get_values(spreadsheetId=data_spreadsheetId,
                        data_range=data_data_range)

codebook_df = manager.get_values(spreadsheetId=codebook_spreadsheetId,
                                data_range=codebook_data_range)

In [17]:
cb_df = codebook_df.copy()

cb_df.replace('', np.nan, inplace=True)

# create new column to indicate question or answer
cb_df['Q/A'] = np.nan
cb_df.loc[~cb_df['Item name'].isna(), 'Q/A'] = 'Q'
cb_df.loc[cb_df['Q/A'] != 'Q', 'Q/A'] = 'A'

# we really only need Item name, label, Choice number, Question / choices, Q/A
cols = ['Item name', 'label', 'Choice number', 'Question / choices', 'Q/A']
cb_df = cb_df[cols]




cb_df[~cb_df['Item name'].isna() & cb_df['label'].isna()]

Unnamed: 0,Item name,label,Choice number,Question / choices,Q/A
156,Q5,,,以下のYouTubeチャンネルの中から、聞いたことがあるものを全て選択してください。,Q
161,Q6,,,次のうち、あなたが好きな広告プラットフォームはどれですか。あてはまるものを全てお選びください。,Q
176,Q8,,,YouTubeに広告を掲載したいと思わない理由はどれですか。あてはまるものを全てお選びください。,Q


In [None]:
df = data_df.copy()

def recode_SA_A(question_list):
    for q in question_list:
        df[q+'_r'] = 0
        df.loc[((df[q] == 1) | (df[q] == 2)), q+'_r'] = 1

questions_to_recode = ['Q2', 'Q3', 'Q4']

recode_SA_A(questions_to_recode) 
        
df['group'] = np.nan
df.loc[df['CELL'] <= 8, 'group'] = 'EXP'
df.loc[df['CELL'] >= 9, 'group'] = 'CON'

df = df[[*[q+'_r' for q in questions_to_recode], 'group']]

con_base = len(df[df['group'] == 'CON'])
exp_base = len(df[df['group'] == 'EXP'])

df = df.groupby('group').sum().transpose()
df['CON_base'] = con_base
df['EXP_base'] = exp_base

df['CON_desired_%'] = df['CON']/df['CON_base']
df['EXP_desired_%'] = df['EXP']/df['EXP_base']

df['abs_lift_%'] = df['EXP_desired_%'] - df['CON_desired_%']

df['p-value'] = df.apply(lambda x: ztest(
                                        [x['EXP'], x['CON']],
                                        [x['EXP_base'], x['CON_base']])[1], axis = 1)

df

In [None]:
df.to_csv("result.csv")