In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
from db_utils import query_analytics_store


- [QuickSurveysResponses_15266417](https://meta.wikimedia.org/wiki/Schema:QuickSurveysResponses)
- [QuickSurveyInitiation_15278946](https://meta.wikimedia.org/wiki/Schema:QuickSurveyInitiation)
- [Survey](https://docs.google.com/spreadsheets/d/1JD8-knLmnFXVwXxJYx6w9RRmxZWasSLaSKvj85SgrzE/edit?ts=56c3ccd2#gid=1291145097)

In [2]:
response_table = 'log.QuickSurveysResponses_15266417'
impression_table = 'log.QuickSurveyInitiation_15278946'
host = 'analytics-store.eqiad.wmnet'

### EL Summary Stats

In [3]:
query = """
SELECT
    COUNT(*) as n
FROM log.QuickSurveyInitiation_15278946
WHERE
    event_eventName ='eligible'
"""

n_eligible = query_analytics_store(query, {}).iloc[0]['n']

query = """
SELECT
    COUNT(*) as n
FROM log.QuickSurveyInitiation_15278946
WHERE
    event_eventName ='impression'
"""

n_impressions = query_analytics_store(query, {}).iloc[0]['n']

query = """
SELECT
    COUNT(*) as n
FROM log.QuickSurveysResponses_15266417
"""
n_responses = query_analytics_store(query, {}).iloc[0]['n']

query = """
SELECT
    COUNT(*) as n
FROM log.QuickSurveysResponses_15266417
WHERE
    event_surveyResponseValue ='ext-quicksurveys-external-survey-yes-button'
"""
n_yes = query_analytics_store( query, {}).iloc[0]['n']

query = """
SELECT
    COUNT(*) as n
FROM log.QuickSurveysResponses_15266417
WHERE
    event_surveyResponseValue ='ext-quicksurveys-external-survey-no-button'
"""
n_no = query_analytics_store( query, {}).iloc[0]['n']



print('Eligible Pageviews: ', n_eligible)
print('Widget Impressions: ', n_impressions)
print('Clicks: ', n_responses)
print('Yeses: ', n_yes)
print('Nos: ', n_no)

Eligible Pageviews:  53600
Widget Impressions:  42134
Clicks:  469
Yeses:  135
Nos:  334


### Does each eligible pageview have a unique survey token?

In [4]:
query = """
SELECT COUNT(DISTINCT(event_surveyInstanceToken)) as n
FROM log.QuickSurveyInitiation_15278946
WHERE event_eventName ='eligible'
"""
print(query_analytics_store(query, {}).iloc[0]['n'])

53593


Pretty Close

### Does each widget impression have a unique survey token?

In [13]:
query = """
SELECT COUNT(DISTINCT(event_surveyInstanceToken)) as n
FROM log.QuickSurveyInitiation_15278946
WHERE event_eventName ='impression'
"""
print(query_analytics_store(query, {}).iloc[0]['n'])

41943


Pretty Close.

### Does each Widget Impression have a matching Pageview?

In [14]:
query = """
SELECT
    COUNT(DISTINCT(i.event_surveyInstanceToken)) as n
FROM
    (SELECT event_surveyInstanceToken
    FROM log.QuickSurveyInitiation_15278946
    WHERE
    event_eventName ='impression') i
JOIN
    (SELECT event_surveyInstanceToken
    FROM log.QuickSurveyInitiation_15278946
    WHERE
    event_eventName ='eligible') e
ON
    (i.event_surveyInstanceToken = e.event_surveyInstanceToken)
"""

df = query_analytics_store(query, {})
print(df.iloc[0]['n'])

41872


Close. It seems there are a small number of impressions without a matching pageview.

### Does each click have a matching Widget Impression?

In [15]:
query = """
SELECT *
FROM
    log.QuickSurveysResponses_15266417 r,
    log.QuickSurveyInitiation_15278946 i
WHERE
    r.event_surveyInstanceToken = i.event_surveyInstanceToken
    AND i.event_eventName ='impression'
""" 

d_click = query_analytics_store( query, {})
print(d_click.shape[0], len(set(d_click['event_surveyInstanceToken'])))

459 459


Each click has exactly one widget impression. One click without a matching rendered widget

### Does each Google Survey Response have a registered click

In [16]:
d_survey = pd.read_csv('responses.tsv', sep = '\t')
st = 'This is you survey ID. Please do not modify.'
ct = 'event_surveyInstanceToken'
df_survey_click = d_survey.merge(d_click, how = 'inner', right_on = ct, left_on = st)

In [17]:
print(df_survey_click.shape[0])

59


In [18]:
print(d_survey.shape[0])

81


Only 59 out of 81 survey responses have a matching click.

### Does each Google Survey Response have a registered impression?

In [19]:
query = """
SELECT
    event_surveyInstanceToken
FROM log.QuickSurveyInitiation_15278946
WHERE
    event_eventName ='impression'
"""

d_impression = query_analytics_store(query, {})
df_survey_impression = d_survey.merge(d_impression, how = 'inner', right_on = 'event_surveyInstanceToken', left_on = st)

In [20]:
print(df_survey_impression.shape[0])

61


Only 61 out of 81 survey responses have a matching click.

### Summary

In [21]:
print('Eligible Pageviews: ', n_eligible)
print('Widget Impressions: ', n_impressions)
print('Clicks: ', n_responses)
print('Yeses: ', n_yes)
print('Google Responses tracked in EL', df_survey_click.shape[0])
print('Google Responses', d_survey.shape[0])

Eligible Pageviews:  53373
Widget Impressions:  41957
Clicks:  463
Yeses:  134
Google Responses tracked in EL 59
Google Responses 81


- Don't know how many users where in the sample
- One in 300 pageviews results in a 'Yes'
- 62% of 'Yes' events lead to a survery response
- only 60% of survey responses are trackable in EL

### Digging Into Missing Data

In [22]:
df_survey_click2 = d_survey.merge(d_click, how = 'left', right_on = ct, left_on = st)

In [23]:
df_survey_click2 = df_survey_click2[[st,'Timestamp', ct, 'timestamp']].sort('Timestamp')
df_survey_click2.columns = ['Google Token', 'Google Timestamp', 'EL Token', 'EL Timestamp']

In [24]:
df_survey_click2['Google Token'].apply(len).value_counts()

16    81
Name: Google Token, dtype: int64

No obvious pattern...

### Are surveySessionToken tokens unique per IP

In [25]:
query = """
SELECT
    COUNT(DISTINCT(clientIP)) as n
FROM log.QuickSurveyInitiation_15278946
"""
nIPs = query_analytics_store( query, {}).iloc[0]['n']
print(nIPs)

17573


In [26]:
query = """
SELECT
    COUNT(DISTINCT(CONCAT(clientIP, userAgent))) as n
FROM log.QuickSurveyInitiation_15278946
"""
nIPUAs = query_analytics_store( query, {}).iloc[0]['n']
print(nIPUAs)

18017


In [27]:
query = """
SELECT
    COUNT(DISTINCT(event_surveySessionToken)) as n
FROM log.QuickSurveyInitiation_15278946
"""
sessiontokens = query_analytics_store( query, {}).iloc[0]['n']
print(sessiontokens)

21338
