In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
import pandas as pd
import numpy as np
from scipy import stats
import altair as alt

import ref_tools

# Load dataset CSVs

In [3]:
CSV_DIR = 'data/full_aggregated/'
CSV_MTURK_FNAME = 'mturk_aggregate.csv'
CSV_QUALT_FNAME = 'qualtrics_aggregate.csv'

In [4]:
# import mturk logs
df_mturk = pd.read_csv(CSV_DIR+CSV_MTURK_FNAME)

# import qualtrics results
df_qualt = pd.read_csv(CSV_DIR+CSV_QUALT_FNAME)
# clean up header...
df_qualt_header = df_qualt[:2]
df_qualt = df_qualt[2:]

In [None]:
df_mturk[:2]
df_mturk.shape

In [6]:
df_qualt[:5]
df_qualt.shape

Unnamed: 0,StartDate,EndDate,Status,Progress,Duration (in seconds),Finished,RecordedDate,ResponseId,DistributionChannel,UserLanguage,...,m3_seenbefore,m4_seenbefore,m5_seenbefore,Q_AmbiguousTextPresent,Q_AmbiguousTextQuestions,Q_StraightliningCount,Q_StraightliningPercentage,Q_StraightliningQuestions,Q_UnansweredPercentage,Q_UnansweredQuestions
2,2023-06-07 11:31:11,2023-06-07 11:43:35,IP Address,100,743,True,2023-06-07 11:43:35,R_r10tjvIn1gE5w7T,anonymous,EN,...,have not watched,have not watched,have not watched,1,QID29,0,0,,0.0,
3,2023-06-07 11:36:40,2023-06-07 11:43:54,IP Address,100,434,True,2023-06-07 11:43:55,R_1nO4NH1to65H7XK,anonymous,EN,...,have not watched,have not watched,have not watched,1,QID29,0,0,,0.0,
4,2023-06-07 11:32:26,2023-06-07 11:45:05,IP Address,100,758,True,2023-06-07 11:45:05,R_3mdScilZRJfwkPu,anonymous,EN,...,have watched,have watched,have watched,1,QID29,0,0,,0.0196078431372549,QID47
5,2023-06-07 11:36:07,2023-06-07 11:48:15,IP Address,100,727,True,2023-06-07 11:48:15,R_1pFCaWfuxBHrmLi,anonymous,EN,...,have not watched,have not watched,have not watched,1,QID29,0,0,,0.0,
6,2023-06-07 11:46:39,2023-06-07 11:49:21,IP Address,100,162,True,2023-06-07 11:49:22,R_1Nexpyo4uu2JY0u,anonymous,EN,...,have not watched,have not watched,have watched,1,"12_QID25,15_QID46,14_QID27,14_QID46,13_QID27,1...",0,0,,0.0,


(134, 142)

# Restructure dataset

## Filter dataset to paid responses only

In [7]:
### this uses extremely similar validation as payment in 2_payment.ipynb

def extract_args(url):
    url_args = url[url.index('?')+1:].split('&')
    return dict([tuple(e.split('=')) for e in url_args])

# for each of the mturk entries, verify that they entered their workerID into Qualtrics and that they entered the responseID into MTurk
df_analysis = pd.DataFrame()
for _, row in df_mturk.iterrows():
    row_args = extract_args(row['Input.SURVEY_LINK'])
    matched = df_qualt.loc[
        (df_qualt['workerid']==row['WorkerId'])
        & (df_qualt['ResponseId']==row['Answer.surveycode'])
        & (
            (df_qualt['param_x']==row_args['param_x'])
            & (df_qualt['param_a']==row_args['param_a'])
            & (df_qualt['param_b']==row_args['param_b'])
            & (df_qualt['param_c']==row_args['param_c'])
            & (df_qualt['param_d']==row_args['param_d'])
            & (df_qualt['param_e']==row_args['param_e'])
        )
        & (df_qualt['Progress']=='100')
    ]
    if matched.shape[0]==1:
        df_analysis = pd.concat([df_analysis, matched])

df_analysis[:5]
df_analysis.shape

Unnamed: 0,StartDate,EndDate,Status,Progress,Duration (in seconds),Finished,RecordedDate,ResponseId,DistributionChannel,UserLanguage,...,m3_seenbefore,m4_seenbefore,m5_seenbefore,Q_AmbiguousTextPresent,Q_AmbiguousTextQuestions,Q_StraightliningCount,Q_StraightliningPercentage,Q_StraightliningQuestions,Q_UnansweredPercentage,Q_UnansweredQuestions
31,2023-06-07 11:32:25,2023-06-07 12:16:43,IP Address,100,2657,True,2023-06-07 12:16:44,R_20UIlVy1cvOmECC,anonymous,EN,...,have not watched,have watched,have not watched,1,QID29,0,0,,0,
50,2023-06-07 12:28:20,2023-06-07 12:36:09,IP Address,100,469,True,2023-06-07 12:36:11,R_1nOygUc6Qe1tu60,anonymous,EN,...,have watched,have not watched,have not watched,1,QID29,0,0,,0,
20,2023-06-07 11:50:45,2023-06-07 12:05:27,IP Address,100,881,True,2023-06-07 12:05:27,R_1kTdcN8ApYyAlQR,anonymous,EN,...,have not watched,have not watched,have not watched,1,QID29,0,0,,0,
10,2023-06-07 11:30:50,2023-06-07 11:53:51,IP Address,100,1380,True,2023-06-07 11:53:52,R_1LAv6K8zSBJ73P7,anonymous,EN,...,have not watched,have not watched,have not watched,1,QID29,0,0,,0,
26,2023-06-07 11:30:39,2023-06-07 12:10:55,IP Address,100,2415,True,2023-06-07 12:10:56,R_1F2Lp0EjSEM9Aln,anonymous,EN,...,have not watched,have watched,have not watched,1,QID29,0,0,,0,


(120, 142)

## HvB fields

In [8]:
for letter, movienum in [('a', 1), ('b', 2), ('c', 3), ('d', 4), ('e', 5)]:
    old_fname = f'param_{letter}'
    new_fname = f'textsrc_M{movienum}'
    new_fname_binary = f'textsrc_M{movienum}_byhuman'
    print(f'{old_fname} -> {new_fname}, {new_fname_binary}')
    df_analysis[new_fname] = df_analysis[old_fname].apply(lambda x: f'A{x}')
    df_analysis[new_fname_binary] = df_analysis[old_fname].apply(lambda x: int(x)>0)

param_a -> textsrc_M1, textsrc_M1_byhuman
param_b -> textsrc_M2, textsrc_M2_byhuman
param_c -> textsrc_M3, textsrc_M3_byhuman
param_d -> textsrc_M4, textsrc_M4_byhuman
param_e -> textsrc_M5, textsrc_M5_byhuman


## Seen-before fields

In [9]:
for movienum in [1, 2, 3, 4, 5]:
    old_fname = f'm{movienum}_seenbefore'
    new_fname = f'prevwatchedbin_M{movienum}'
    print(f'{old_fname} -> {new_fname}')
    df_analysis[new_fname] = df_analysis[old_fname].apply(lambda x: x=='have watched')

m1_seenbefore -> prevwatchedbin_M1
m2_seenbefore -> prevwatchedbin_M2
m3_seenbefore -> prevwatchedbin_M3
m4_seenbefore -> prevwatchedbin_M4
m5_seenbefore -> prevwatchedbin_M5


## Ranking fields

In [10]:
for position, movienum in [('12', 1), ('13', 2), ('14', 3), ('15', 4), ('16', 5)]:
    old_fname = f'ranking_tests_{position}'
    new_fname = f'rank5_M{movienum}'
    print(f'{old_fname} -> {new_fname}')
    df_analysis[new_fname] = df_analysis[old_fname].apply(lambda x: int(x))

ranking_tests_12 -> rank5_M1
ranking_tests_13 -> rank5_M2
ranking_tests_14 -> rank5_M3
ranking_tests_15 -> rank5_M4
ranking_tests_16 -> rank5_M5


## Likert fields (score and text)

In [11]:
likert_convert_dict = {
    ('acc', 'acc'): {
        'Very accurate':5, 
        'Somewhat accurate':4, 
        'Mixed / Unsure':3, 
        'Somewhat inaccurate':2, 
        'Very inaccurate':1,
    },
    ('info', 'inf'): {
        'Very informative':5, 
        'Somewhat informative':4, 
        'Mixed / Unsure':3, 
        'Somewhat uninformative':2, 
        'Very uninformative':1,
    },
    ('per', 'per'): {
        'Very persuasive':5, 
        'Somewhat persuasive':4, 
        'Mixed / Unsure':3, 
        'Somewhat unconvincing':2, 
        'Very unconvincing':1
    },
    ('int', 'int'): {
        'Very interesting':5, 
        'Somewhat interesting':4, 
        'Mixed / Unsure':3,
        'Somewhat uninteresting':2,
        'Very uninteresting':1
    },
}
for aspect in likert_convert_dict.keys():
    aspect_old, aspect_new = aspect
    for position, movienum in [('12', 1), ('13', 2), ('14', 3), ('15', 4), ('16', 5)]:
        oldscore_fname = f'{position}_recctext_{aspect_old}_lik'
        newscore_fname = f'{aspect_new}_lik_M{movienum}'
        oldtext_fname = f'{position}_recctext_{aspect_old}_text'
        newtext_fname = f'{aspect_new}_text_M{movienum}'
        print(f'{oldscore_fname} -> {newscore_fname}')
        print(f'{oldtext_fname} -> {newtext_fname}')
        df_analysis[newscore_fname] = df_analysis[oldscore_fname].apply(lambda x: x if pd.isna(x) else likert_convert_dict[aspect][x])
        df_analysis[newtext_fname] = df_analysis[oldtext_fname].copy()

12_recctext_acc_lik -> acc_lik_M1
12_recctext_acc_text -> acc_text_M1
13_recctext_acc_lik -> acc_lik_M2
13_recctext_acc_text -> acc_text_M2
14_recctext_acc_lik -> acc_lik_M3
14_recctext_acc_text -> acc_text_M3
15_recctext_acc_lik -> acc_lik_M4
15_recctext_acc_text -> acc_text_M4
16_recctext_acc_lik -> acc_lik_M5
16_recctext_acc_text -> acc_text_M5
12_recctext_info_lik -> inf_lik_M1
12_recctext_info_text -> inf_text_M1
13_recctext_info_lik -> inf_lik_M2
13_recctext_info_text -> inf_text_M2
14_recctext_info_lik -> inf_lik_M3
14_recctext_info_text -> inf_text_M3
15_recctext_info_lik -> inf_lik_M4
15_recctext_info_text -> inf_text_M4
16_recctext_info_lik -> inf_lik_M5
16_recctext_info_text -> inf_text_M5
12_recctext_per_lik -> per_lik_M1
12_recctext_per_text -> per_text_M1
13_recctext_per_lik -> per_lik_M2
13_recctext_per_text -> per_text_M2
14_recctext_per_lik -> per_lik_M3
14_recctext_per_text -> per_text_M3
15_recctext_per_lik -> per_lik_M4
15_recctext_per_text -> per_text_M4
16_recctex

## shown-author fields

In [12]:
old_fname = f'param_x'
new_fname = f'showingauthor'
print(f'{old_fname} -> {new_fname}')
df_analysis[new_fname] = df_analysis[old_fname].apply(lambda x: x=='1')

param_x -> showingauthor


## 'read more' fields

In [13]:
for position, movienum in [('12', 1), ('13', 2), ('14', 3), ('15', 4), ('16', 5)]:
    old_fname = f'{position}_recctext_moreauthor'
    new_fname = f'M{movienum}_moreauthor'
    print(f'{old_fname} -> {new_fname}')
    df_analysis[new_fname] = df_analysis[old_fname].apply(lambda x: x=='Yes')

12_recctext_moreauthor -> M1_moreauthor
13_recctext_moreauthor -> M2_moreauthor
14_recctext_moreauthor -> M3_moreauthor
15_recctext_moreauthor -> M4_moreauthor
16_recctext_moreauthor -> M5_moreauthor


In [14]:
for aspect_old, aspect_new in [('acc', 'acc'), ('info', 'inf'), ('per', 'per'), ('int', 'int')]:
    for position, movienum in [('12', 1), ('13', 2), ('14', 3), ('15', 4), ('16', 5)]:
        old_fname = f'{position}_recctext_{aspect_old}_text'
        new_fname = f'{aspect_new}_text_M{movienum}'
        print(f'{old_fname} -> {new_fname}')
        df_analysis[new_fname] = df_analysis[old_fname].copy()

12_recctext_acc_text -> acc_text_M1
13_recctext_acc_text -> acc_text_M2
14_recctext_acc_text -> acc_text_M3
15_recctext_acc_text -> acc_text_M4
16_recctext_acc_text -> acc_text_M5
12_recctext_info_text -> inf_text_M1
13_recctext_info_text -> inf_text_M2
14_recctext_info_text -> inf_text_M3
15_recctext_info_text -> inf_text_M4
16_recctext_info_text -> inf_text_M5
12_recctext_per_text -> per_text_M1
13_recctext_per_text -> per_text_M2
14_recctext_per_text -> per_text_M3
15_recctext_per_text -> per_text_M4
16_recctext_per_text -> per_text_M5
12_recctext_int_text -> int_text_M1
13_recctext_int_text -> int_text_M2
14_recctext_int_text -> int_text_M3
15_recctext_int_text -> int_text_M4
16_recctext_int_text -> int_text_M5


## Normalize rankings for seen-before status

In [15]:
def l_normalizeunseenbefore(r):
    # print(r[['rank5_M1', 'prevwatchedbin_M1']])
    mx_rankings = list(r[[f'rank5_M{i+1}' for i in range(5)]])
    mx_seenbefore = list(r[[f'prevwatchedbin_M{i+1}' for i in range(5)]])
    mx_zipped = list(zip(mx_rankings, mx_seenbefore))
    mx_filtered = [(rank if not seen else None) for (rank, seen) in mx_zipped]
    if sum(mx_seenbefore)<4:
        mx_filtered_min = min([e for e in mx_filtered if e is not None])
        mx_normfiltered = [((e-mx_filtered_min) if e is not None else None) for e in mx_filtered]
        mx_normfiltered_max = max([e for e in mx_normfiltered if e is not None])
        if mx_normfiltered_max!=0:
            mx_normfiltered = [((e/mx_normfiltered_max) if e is not None else None) for e in mx_normfiltered]
    else:
        mx_normfiltered = [None]*5
    return dict([(f'rankNunseen_M{i+1}', mx_normfiltered[i]) for i in range(5)])

# remove seen-before movies from the ranking set
# then normalize rankings such that 0 is most-preferred, 1 is least-preferred in the remaining set
# seen movies get new ranking value None
# if exactly one movie was unseen, it is also removed from the ranking set
new_cols = df_analysis.apply(
    l_normalizeunseenbefore,
    axis='columns',
    result_type='expand',
)
df_analysis = df_analysis.join(new_cols)
print('added columns:', list(new_cols.columns))

added columns: ['rankNunseen_M1', 'rankNunseen_M2', 'rankNunseen_M3', 'rankNunseen_M4', 'rankNunseen_M5']


In [16]:
def l_normalizeseenbefore(r):
    # print(r[['rank5_M1', 'prevwatchedbin_M1']])
    mx_rankings = list(r[[f'rank5_M{i+1}' for i in range(5)]])
    mx_seenbefore = list(r[[f'prevwatchedbin_M{i+1}' for i in range(5)]])
    mx_zipped = list(zip(mx_rankings, mx_seenbefore))
    mx_filtered = [(rank if seen else None) for (rank, seen) in mx_zipped]
    if sum(mx_seenbefore)>1:
        mx_filtered_min = min([e for e in mx_filtered if e is not None])
        mx_normfiltered = [((e-mx_filtered_min) if e is not None else None) for e in mx_filtered]
        mx_normfiltered_max = max([e for e in mx_normfiltered if e is not None])
        if mx_normfiltered_max!=0:
            mx_normfiltered = [((e/mx_normfiltered_max) if e is not None else None) for e in mx_normfiltered]
    else:
        mx_normfiltered = [None]*5
    return dict([(f'rankNseen_M{i+1}', mx_normfiltered[i]) for i in range(5)])

# remove seen-before movies from the ranking set
# then normalize rankings such that 0 is most-preferred, 1 is least-preferred in the remaining set
# seen movies get new ranking value None
# if exactly one movie was unseen, it is also removed from the ranking set
new_cols = df_analysis.apply(
    l_normalizeseenbefore,
    axis='columns',
    result_type='expand',
)
df_analysis = df_analysis.join(new_cols)
print('added columns:', list(new_cols.columns))

added columns: ['rankNseen_M1', 'rankNseen_M2', 'rankNseen_M3', 'rankNseen_M4', 'rankNseen_M5']


## Verify dataset

In [17]:
df_analysis['Progress'].value_counts()

100    120
Name: Progress, dtype: int64

In [18]:
df_analysis['showingauthor'].value_counts()

False    60
True     60
Name: showingauthor, dtype: int64

In [19]:
df_analysis['textsrc_M1'].value_counts()
df_analysis['textsrc_M1_byhuman'].value_counts()

A0    60
A4    12
A5    12
A1    12
A2    12
A3    12
Name: textsrc_M1, dtype: int64

False    60
True     60
Name: textsrc_M1_byhuman, dtype: int64

In [20]:
df_analysis['textsrc_M2'].value_counts()
df_analysis['textsrc_M2_byhuman'].value_counts()

A0    60
A5    12
A4    12
A3    12
A2    12
A1    12
Name: textsrc_M2, dtype: int64

False    60
True     60
Name: textsrc_M2_byhuman, dtype: int64

In [21]:
df_analysis['textsrc_M3'].value_counts()
df_analysis['textsrc_M3_byhuman'].value_counts()

A0    60
A3    12
A1    12
A4    12
A5    12
A2    12
Name: textsrc_M3, dtype: int64

False    60
True     60
Name: textsrc_M3_byhuman, dtype: int64

In [22]:
df_analysis['textsrc_M4'].value_counts()
df_analysis['textsrc_M4_byhuman'].value_counts()

A0    60
A2    12
A3    12
A4    12
A5    12
A1    12
Name: textsrc_M4, dtype: int64

False    60
True     60
Name: textsrc_M4_byhuman, dtype: int64

In [23]:
df_analysis['textsrc_M5'].value_counts()
df_analysis['textsrc_M5_byhuman'].value_counts()

A0    60
A2    12
A1    12
A5    12
A3    12
A4    12
Name: textsrc_M5, dtype: int64

True     60
False    60
Name: textsrc_M5_byhuman, dtype: int64

In [24]:
# none of these are allowed to be 0 (that would be an indicator of at least one row marking normalized rankings for a 'saw all but one' responder)
(
    df_analysis['rankNunseen_M1'].apply(lambda x: 0 if pd.isna(x) else x)
    +df_analysis['rankNunseen_M2'].apply(lambda x: 0 if pd.isna(x) else x)
    +df_analysis['rankNunseen_M3'].apply(lambda x: 0 if pd.isna(x) else x)
    +df_analysis['rankNunseen_M4'].apply(lambda x: 0 if pd.isna(x) else x)
    +df_analysis['rankNunseen_M5'].apply(lambda x: 0 if pd.isna(x) else x)
).value_counts()

2.000000    49
2.250000    17
1.750000    13
1.000000    10
0.000000    10
2.000000     8
1.500000     5
2.500000     5
1.333333     2
1.666667     1
dtype: int64

In [25]:
df_analysis['rankNunseen_M1'].value_counts()
df_analysis['rankNunseen_M2'].value_counts()
df_analysis['rankNunseen_M3'].value_counts()
df_analysis['rankNunseen_M4'].value_counts()
df_analysis['rankNunseen_M5'].value_counts()

1.000000    21
0.000000    20
0.750000    12
0.333333     9
0.250000     7
0.500000     6
0.666667     4
Name: rankNunseen_M1, dtype: int64

0.000000    24
1.000000    24
0.500000    12
0.250000     9
0.333333     7
0.666667     7
0.750000     2
Name: rankNunseen_M2, dtype: int64

1.000000    24
0.000000    20
0.333333    14
0.666667     9
0.250000     6
0.500000     5
0.750000     4
Name: rankNunseen_M3, dtype: int64

0.000000    27
1.000000    13
0.750000    13
0.333333    12
0.666667    11
0.250000     6
0.500000     4
Name: rankNunseen_M4, dtype: int64

1.000000    28
0.000000    19
0.666667    13
0.500000    12
0.750000     6
0.250000     3
0.333333     3
Name: rankNunseen_M5, dtype: int64

In [26]:
# none of these are allowed to be 0 (that would be an indicator of at least one row marking normalized rankings for a 'saw all but one' responder)
(
    df_analysis['rankNseen_M1'].apply(lambda x: 0 if pd.isna(x) else x)
    +df_analysis['rankNseen_M2'].apply(lambda x: 0 if pd.isna(x) else x)
    +df_analysis['rankNseen_M3'].apply(lambda x: 0 if pd.isna(x) else x)
    +df_analysis['rankNseen_M4'].apply(lambda x: 0 if pd.isna(x) else x)
    +df_analysis['rankNseen_M5'].apply(lambda x: 0 if pd.isna(x) else x)
).value_counts()

0.000000    91
1.000000     9
2.500000     7
1.666667     3
1.333333     3
1.250000     2
1.500000     2
2.000000     1
1.750000     1
2.250000     1
dtype: int64

In [27]:
df_analysis['rankNseen_M1'].value_counts()
df_analysis['rankNseen_M2'].value_counts()
df_analysis['rankNseen_M3'].value_counts()
df_analysis['rankNseen_M4'].value_counts()
df_analysis['rankNseen_M5'].value_counts()

1.000000    8
0.000000    7
0.250000    3
0.750000    1
0.333333    1
0.500000    1
Name: rankNseen_M1, dtype: int64

0.500000    5
0.000000    4
1.000000    3
0.666667    1
0.333333    1
0.250000    1
0.750000    1
Name: rankNseen_M2, dtype: int64

1.000000    7
0.000000    6
0.250000    4
0.666667    2
0.500000    2
0.750000    1
Name: rankNseen_M3, dtype: int64

0.000000    7
1.000000    4
0.500000    3
0.333333    1
0.666667    1
0.250000    1
0.750000    1
Name: rankNseen_M4, dtype: int64

1.000000    7
0.000000    5
0.750000    4
0.250000    1
0.333333    1
Name: rankNseen_M5, dtype: int64

# Dataset shape

In [28]:
df_analysis[:5]
list(df_analysis.columns)

Unnamed: 0,StartDate,EndDate,Status,Progress,Duration (in seconds),Finished,RecordedDate,ResponseId,DistributionChannel,UserLanguage,...,rankNunseen_M1,rankNunseen_M2,rankNunseen_M3,rankNunseen_M4,rankNunseen_M5,rankNseen_M1,rankNseen_M2,rankNseen_M3,rankNseen_M4,rankNseen_M5
31,2023-06-07 11:32:25,2023-06-07 12:16:43,IP Address,100,2657,True,2023-06-07 12:16:44,R_20UIlVy1cvOmECC,anonymous,EN,...,1.0,0.333333,0.0,,0.666667,,,,,
50,2023-06-07 12:28:20,2023-06-07 12:36:09,IP Address,100,469,True,2023-06-07 12:36:11,R_1nOygUc6Qe1tu60,anonymous,EN,...,0.0,0.25,,1.0,0.5,,,,,
20,2023-06-07 11:50:45,2023-06-07 12:05:27,IP Address,100,881,True,2023-06-07 12:05:27,R_1kTdcN8ApYyAlQR,anonymous,EN,...,,0.0,0.333333,1.0,0.666667,,,,,
10,2023-06-07 11:30:50,2023-06-07 11:53:51,IP Address,100,1380,True,2023-06-07 11:53:52,R_1LAv6K8zSBJ73P7,anonymous,EN,...,,0.666667,0.0,0.333333,1.0,,,,,
26,2023-06-07 11:30:39,2023-06-07 12:10:55,IP Address,100,2415,True,2023-06-07 12:10:56,R_1F2Lp0EjSEM9Aln,anonymous,EN,...,0.666667,0.333333,0.0,,1.0,,,,,


['StartDate',
 'EndDate',
 'Status',
 'Progress',
 'Duration (in seconds)',
 'Finished',
 'RecordedDate',
 'ResponseId',
 'DistributionChannel',
 'UserLanguage',
 'consent_verify',
 'time_workerid_First Click',
 'time_workerid_Last Click',
 'time_workerid_Page Submit',
 'time_workerid_Click Count',
 'workerid',
 'time_genrepick_First Click',
 'time_genrepick_Last Click',
 'time_genrepick_Page Submit',
 'time_genrepick_Click Count',
 'genrepick',
 'time_ranking_First Click',
 'time_ranking_Last Click',
 'time_ranking_Page Submit',
 'time_ranking_Click Count',
 'ranking_tests_12',
 'ranking_tests_13',
 'ranking_tests_14',
 'ranking_tests_15',
 'ranking_tests_16',
 'ranking_seenbefore',
 '12_time_recctext_First Click',
 '12_time_recctext_Last Click',
 '12_time_recctext_Page Submit',
 '12_time_recctext_Click Count',
 '12_recctext_acc_lik',
 '12_recctext_acc_text',
 '12_recctext_info_lik',
 '12_recctext_info_text',
 '12_recctext_per_lik',
 '12_recctext_per_text',
 '12_recctext_int_lik',
 '1

In [None]:
with pd.option_context('display.max_columns', None):
    df_analysis[:2]

In [30]:
df_analysis[[
    # distinct ID
    'ResponseId',
    # Genre group / source movie that respondent picked
    'genrepick',
    # True iff showing whether the text was bot-generated, False otherwise
    'showingauthor',
    # For M1, M2, M3, M4, M5, 0 iff text was bot-generated, 1-5 otherwise for distinct human review sources
    'textsrc_M1', 'textsrc_M2', 'textsrc_M3', 'textsrc_M4', 'textsrc_M5',
    # For M1, M2, M3, M4, M5, False iff text was bot-generated, True otherwise for distinct human review sources
    'textsrc_M1_byhuman', 'textsrc_M2_byhuman', 'textsrc_M3_byhuman', 'textsrc_M4_byhuman', 'textsrc_M5_byhuman',
    # Names of M1, M2, M3, M4, M5
    'm1_title', 'm2_title', 'm3_title', 'm4_title', 'm5_title',
    # ranking movies (1 is best, 5 is worst)
    'rank5_M1', 'rank5_M2', 'rank5_M3', 'rank5_M4', 'rank5_M5',
    # ranking movies, NORMALIZED and with SEENBEFORE REMOVED (0 is best, 1 is worst, None is seen-before. If only one was seen, it is given 0)
    'rankNunseen_M1', 'rankNunseen_M2', 'rankNunseen_M3', 'rankNunseen_M4', 'rankNunseen_M5',
    # seen-before status of M1, M2, M3, M4, M5 (True if watched before, False otherwise)
    'prevwatchedbin_M1', 'prevwatchedbin_M2', 'prevwatchedbin_M3', 'prevwatchedbin_M4', 'prevwatchedbin_M5',
    # M1, M2, M3, M4, M5 Accuracy Likert scale responses (1 is least accurate, 5 is most accurate). Sometimes is NaN value
    'acc_lik_M1', 'acc_lik_M2', 'acc_lik_M3', 'acc_lik_M4', 'acc_lik_M5',
    # M1, M2, M3, M4, M5 Accuracy text responses. Sometimes empty
    'acc_text_M1', 'acc_text_M2', 'acc_text_M3', 'acc_text_M4', 'acc_text_M5',
    # M1, M2, M3, M4, M5 Informational Likert scale responses (1 is least informative, 5 is most informative)
    'inf_lik_M1', 'inf_lik_M2', 'inf_lik_M3', 'inf_lik_M4', 'inf_lik_M5',
    # M1, M2, M3, M4, M5 Informational text responses
    'inf_text_M1', 'inf_text_M2', 'inf_text_M3', 'inf_text_M4', 'inf_text_M5',
    # M1, M2, M3, M4, M5 Persuasive Likert scale responses (1 is least persuasive, 5 is most persuasive)
    'per_lik_M1', 'per_lik_M2', 'per_lik_M3', 'per_lik_M4', 'per_lik_M5',
    # M1, M2, M3, M4, M5 Persuasive text responses
    'per_text_M1', 'per_text_M2', 'per_text_M3', 'per_text_M4', 'per_text_M5',
    # M1, M2, M3, M4, M5 Interesting Likert scale responses (1 is least interesting, 5 is most interesting)
    'int_lik_M1', 'int_lik_M2', 'int_lik_M3', 'int_lik_M4', 'int_lik_M5',
    # M1, M2, M3, M4, M5 Interesting text responses
    'int_text_M1', 'int_text_M2', 'int_text_M3', 'int_text_M4', 'int_text_M5',
    # M1, M2, M3, M4, M5 WantToSeeMoreFromAuthor responses (True is yes, False otherwise)
    'M1_moreauthor', 'M2_moreauthor', 'M3_moreauthor', 'M4_moreauthor', 'M5_moreauthor',
    # final freetext box
    'freetext_final',
]]

# Refactor dataset into two export files

Unnamed: 0,ResponseId,genrepick,showingauthor,textsrc_M1,textsrc_M2,textsrc_M3,textsrc_M4,textsrc_M5,textsrc_M1_byhuman,textsrc_M2_byhuman,...,int_text_M2,int_text_M3,int_text_M4,int_text_M5,M1_moreauthor,M2_moreauthor,M3_moreauthor,M4_moreauthor,M5_moreauthor,freetext_final
31,R_20UIlVy1cvOmECC,Titanic,False,A0,A0,A0,A0,A2,False,False,...,The text was persuasive in that it defended S...,The text was interesting to read due to its hu...,The text was interesting to read due to its d...,The text was interesting to read due to its d...,False,False,False,False,False,NO
50,R_1nOygUc6Qe1tu60,Titanic,False,A0,A0,A0,A2,A0,False,False,...,The text was interesting to read due to its de...,The text was interesting to read due to its hu...,The text was interesting to read due to its de...,The text was interesting to read due to its ro...,False,False,False,False,False,NO
20,R_1kTdcN8ApYyAlQR,Jurassic Park,False,A0,A0,A0,A3,A1,False,False,...,The movie is somewhat interesting to watch,The movie is somewhat interesting to watch,The movie is somewhat interesting to watch,The text is somewhat interesting,False,False,False,False,False,none
10,R_1LAv6K8zSBJ73P7,The Dark Knight,False,A0,A0,A3,A0,A0,False,False,...,It's not too interesting and didn't deals abou...,"Takers is a repackaging of heat, for the young...",Bullock gives an admirable performance seems i...,It's not very interesting about stories,False,False,False,False,False,no
26,R_1F2Lp0EjSEM9Aln,Inception,False,A0,A0,A1,A0,A1,False,False,...,The film tells the true story of Forrest Tucker,It is not up to the level by not including any...,It is a interesting concept of reincarnation a...,It doesn't shows interesting content about story,False,False,False,False,False,nothing
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113,R_2c5SrT6P2gVIAjf,The Lord of the Rings: The Fellowship of the Ring,True,A5,A4,A1,A0,A0,True,True,...,It serves as a basic comprehension check rathe...,It serves solely as a basic comprehension che...,"However, beyond this particular context, the t...",It serves solely as a basic comprehension chec...,False,True,False,False,False,Thank you.
97,R_2B5HOU9saR1NQvP,Toy Story,True,A4,A3,A5,A0,A3,True,True,...,then Reading to makes you attractive,was interesting to read to be then good,Reading makes to we in you attractive.,The mice fly atop Orville the to be Albatross...,True,False,False,False,False,none
90,R_1rvk1iqCQSW4isB,The Sixth Sense,True,A2,A5,A1,A2,A0,True,True,...,The text was interesting to read as it had a g...,The text was interesting to read as it gave a ...,"The text was somewhat interesting to read, par...",No,True,True,True,True,True,No
135,R_1nVheUi08dzJB0V,Titanic,False,A0,A0,A0,A4,A4,False,False,...,both realistic,holly hunter,titanic overlook,romance,True,True,True,True,True,good


## Per-respondent export (focuses on one respondent per row)

In [31]:
df_perresponse = df_analysis.copy()
df_perresponse = df_perresponse[[
    # distinct response ID
    'ResponseId',
    # True iff showing whether the text was bot-generated, False otherwise
    'showingauthor',
    # Genre group / seed movie that respondent picked
    'genrepick',
    # Names of M1, M2, M3, M4, M5
    'm1_title', 'm2_title', 'm3_title', 'm4_title', 'm5_title',
    # response time across the entire survey
    'Duration (in seconds)',
    # response times across different segments
    'time_workerid_Page Submit', 'time_genrepick_Page Submit', 'time_ranking_Page Submit', '12_time_recctext_Page Submit', '13_time_recctext_Page Submit', '14_time_recctext_Page Submit', '15_time_recctext_Page Submit', '16_time_recctext_Page Submit',
    # For M1, M2, M3, M4, M5, 0 iff text was bot-generated, 1-5 otherwise for distinct human review sources
    'textsrc_M1', 'textsrc_M2', 'textsrc_M3', 'textsrc_M4', 'textsrc_M5',
    # For M1, M2, M3, M4, M5, False iff text was bot-generated, True otherwise for distinct human review sources
    'textsrc_M1_byhuman', 'textsrc_M2_byhuman', 'textsrc_M3_byhuman', 'textsrc_M4_byhuman', 'textsrc_M5_byhuman',
    # seen-before status of M1, M2, M3, M4, M5 (True if watched before, False otherwise)
    'prevwatchedbin_M1', 'prevwatchedbin_M2', 'prevwatchedbin_M3', 'prevwatchedbin_M4', 'prevwatchedbin_M5',
    # ranking movies (1 is best, 5 is worst)
    'rank5_M1', 'rank5_M2', 'rank5_M3', 'rank5_M4', 'rank5_M5',
    # ranking movies, NORMALIZED and with SEENBEFORE REMOVED (0 is best, 1 is worst, None is seen-before. If only one was seen, it is given 0)
    'rankNunseen_M1', 'rankNunseen_M2', 'rankNunseen_M3', 'rankNunseen_M4', 'rankNunseen_M5',
    # final freetext box
    'freetext_final',
]]
df_perresponse = df_perresponse.reset_index(drop=True)

df_perresponse[:5]
df_perresponse.to_csv('data/aggregated_perresponse.csv', sep=',', index=False)

Unnamed: 0,ResponseId,showingauthor,genrepick,m1_title,m2_title,m3_title,m4_title,m5_title,Duration (in seconds),time_workerid_Page Submit,...,rank5_M2,rank5_M3,rank5_M4,rank5_M5,rankNunseen_M1,rankNunseen_M2,rankNunseen_M3,rankNunseen_M4,rankNunseen_M5,freetext_final
0,R_20UIlVy1cvOmECC,False,Titanic,Anna and the King,Legend No. 17,Always,Far and Away,Love Story,2657,15.905,...,3,2,1,4,1.0,0.333333,0.0,,0.666667,NO
1,R_1nOygUc6Qe1tu60,False,Titanic,Anna and the King,Legend No. 17,Always,Far and Away,Love Story,469,15.197,...,2,4,5,3,0.0,0.25,,1.0,0.5,NO
2,R_1kTdcN8ApYyAlQR,False,Jurassic Park,The Poseidon Adventure,The Towering Inferno,Jason and the Argonauts,"20,000 Leagues Under the Sea",The 7th Voyage of Sinbad,881,2.949,...,1,2,4,3,,0.0,0.333333,1.0,0.666667,none
3,R_1LAv6K8zSBJ73P7,False,The Dark Knight,Batman: Mask of the Phantasm,Streets of Fire,Takers,The Net,Miss Bala,1380,7.616,...,4,2,3,5,,0.666667,0.0,0.333333,1.0,no
4,R_1F2Lp0EjSEM9Aln,False,Inception,When Nietzsche Wept,The Old Man & the Gun,Reminiscence,Infinite,The Forgotten,2415,6.834,...,2,1,5,4,0.666667,0.333333,0.0,,1.0,nothing


## Per-movie/respondent export (focuses on one respondent+movie pairing per row)

In [32]:
df_permovie = df_analysis.copy()
df_permovie = df_permovie[[
    # distinct response ID
    'ResponseId',
    # True iff showing whether the text was bot-generated, False otherwise
    'showingauthor',
    # Genre group / seed movie that respondent picked
    'genrepick',
    # Names of M1, M2, M3, M4, M5
    'm1_title', 'm2_title', 'm3_title', 'm4_title', 'm5_title',
    # For M1, M2, M3, M4, M5, 0 iff text was bot-generated, 1-5 otherwise for distinct human review sources
    'textsrc_M1', 'textsrc_M2', 'textsrc_M3', 'textsrc_M4', 'textsrc_M5',
    # For M1, M2, M3, M4, M5, False iff text was bot-generated, True otherwise for distinct human review sources
    'textsrc_M1_byhuman', 'textsrc_M2_byhuman', 'textsrc_M3_byhuman', 'textsrc_M4_byhuman', 'textsrc_M5_byhuman',
    # seen-before status of M1, M2, M3, M4, M5 (True if watched before, False otherwise)
    'prevwatchedbin_M1', 'prevwatchedbin_M2', 'prevwatchedbin_M3', 'prevwatchedbin_M4', 'prevwatchedbin_M5',
    # ranking movies (1 is best, 5 is worst)
    'rank5_M1', 'rank5_M2', 'rank5_M3', 'rank5_M4', 'rank5_M5',
    # ranking movies, NORMALIZED and with SEENBEFORE REMOVED (0 is best, 1 is worst, None is seen-before. If only one was seen, it is given 0)
    'rankNunseen_M1', 'rankNunseen_M2', 'rankNunseen_M3', 'rankNunseen_M4', 'rankNunseen_M5',
    # response times across different segments
    '12_time_recctext_Page Submit', '13_time_recctext_Page Submit', '14_time_recctext_Page Submit', '15_time_recctext_Page Submit', '16_time_recctext_Page Submit',
    # M1, M2, M3, M4, M5 Accuracy Likert scale responses (1 is least accurate, 5 is most accurate). Sometimes is NaN value
    'acc_lik_M1', 'acc_lik_M2', 'acc_lik_M3', 'acc_lik_M4', 'acc_lik_M5',
    # M1, M2, M3, M4, M5 Accuracy text responses. Sometimes empty
    'acc_text_M1', 'acc_text_M2', 'acc_text_M3', 'acc_text_M4', 'acc_text_M5',
    # M1, M2, M3, M4, M5 Informational Likert scale responses (1 is least informative, 5 is most informative)
    'inf_lik_M1', 'inf_lik_M2', 'inf_lik_M3', 'inf_lik_M4', 'inf_lik_M5',
    # M1, M2, M3, M4, M5 Informational text responses
    'inf_text_M1', 'inf_text_M2', 'inf_text_M3', 'inf_text_M4', 'inf_text_M5',
    # M1, M2, M3, M4, M5 Persuasive Likert scale responses (1 is least persuasive, 5 is most persuasive)
    'per_lik_M1', 'per_lik_M2', 'per_lik_M3', 'per_lik_M4', 'per_lik_M5',
    # M1, M2, M3, M4, M5 Persuasive text responses
    'per_text_M1', 'per_text_M2', 'per_text_M3', 'per_text_M4', 'per_text_M5',
    # M1, M2, M3, M4, M5 Interesting Likert scale responses (1 is least interesting, 5 is most interesting)
    'int_lik_M1', 'int_lik_M2', 'int_lik_M3', 'int_lik_M4', 'int_lik_M5',
    # M1, M2, M3, M4, M5 Interesting text responses
    'int_text_M1', 'int_text_M2', 'int_text_M3', 'int_text_M4', 'int_text_M5',
    # M1, M2, M3, M4, M5 WantToSeeMoreFromAuthor responses (True is yes, False otherwise)
    'M1_moreauthor', 'M2_moreauthor', 'M3_moreauthor', 'M4_moreauthor', 'M5_moreauthor',
]]
# split 5-movie rows into 1-movie rows
df_permovie_new = pd.DataFrame()
for _, r in df_permovie.iterrows():
    df_permovie_new = pd.concat([
        df_permovie_new,
        pd.DataFrame({
            'ResponseId': [r['ResponseId']]*5,
            'showingauthor': [r['showingauthor']]*5,
            'genrepick': [r['genrepick']]*5,
            'mx_slotnum': [f'S{i+1}' for i in range(5)],
            'mx_title': [r[f'm{i+1}_title'] for i in range(5)],
            'textsrc_Mx': [r[f'textsrc_M{i+1}'] for i in range(5)],
            'textsrc_Mx_byhuman': [r[f'textsrc_M{i+1}_byhuman'] for i in range(5)],
            'prevwatchedbin_Mx': [r[f'prevwatchedbin_M{i+1}'] for i in range(5)],
            'rank5_Mx': [r[f'rank5_M{i+1}'] for i in range(5)],
            'rankNunseen_Mx': [r[f'rankNunseen_M{i+1}'] for i in range(5)],
            'time_page_submit_mx': [r[f'1{i+2}_time_recctext_Page Submit'] for i in range(5)],
            'acc_lik_Mx': [r[f'acc_lik_M{i+1}'] for i in range(5)],
            'acc_text_Mx': [r[f'acc_text_M{i+1}'] for i in range(5)],
            'inf_lik_Mx': [r[f'inf_lik_M{i+1}'] for i in range(5)],
            'inf_text_Mx': [r[f'inf_text_M{i+1}'] for i in range(5)],
            'per_lik_Mx': [r[f'per_lik_M{i+1}'] for i in range(5)],
            'per_text_Mx': [r[f'per_text_M{i+1}'] for i in range(5)],
            'int_lik_Mx': [r[f'int_lik_M{i+1}'] for i in range(5)],
            'int_text_Mx': [r[f'int_text_M{i+1}'] for i in range(5)],
            'Mx_moreauthor': [r[f'M{i+1}_moreauthor'] for i in range(5)],
        })
    ])
df_permovie = df_permovie_new.reset_index(drop=True)

df_permovie[:10]
df_permovie.to_csv('data/aggregated_permovie.csv', sep=',', index=False)

Unnamed: 0,ResponseId,showingauthor,genrepick,mx_slotnum,mx_title,textsrc_Mx,textsrc_Mx_byhuman,prevwatchedbin_Mx,rank5_Mx,rankNunseen_Mx,time_page_submit_mx,acc_lik_Mx,acc_text_Mx,inf_lik_Mx,inf_text_Mx,per_lik_Mx,per_text_Mx,int_lik_Mx,int_text_Mx,Mx_moreauthor
0,R_20UIlVy1cvOmECC,False,Titanic,S1,Anna and the King,A0,False,False,5,1.0,42.549,,,4,The text was informative in that it provided c...,5,The text was persuasive in that it encouraged...,3,The text was interesting to read due to its d...,False
1,R_20UIlVy1cvOmECC,False,Titanic,S2,Legend No. 17,A0,False,False,3,0.333333,78.817,,,3,The text was factually accurate in that it re...,3,The text was informative in that it provided a...,3,The text was persuasive in that it defended S...,False
2,R_20UIlVy1cvOmECC,False,Titanic,S3,Always,A0,False,False,2,0.0,65.541,,,3,The text was factually accurate in that it ref...,3,The text was persuasive in that it defended Sp...,3,The text was interesting to read due to its hu...,False
3,R_20UIlVy1cvOmECC,False,Titanic,S4,Far and Away,A0,False,True,1,,169.534,4.0,The text was informative in that it provided d...,4,The text was persuasive in that it encouraged ...,3,The text was persuasive in that it encouraged ...,3,The text was interesting to read due to its d...,False
4,R_20UIlVy1cvOmECC,False,Titanic,S5,Love Story,A2,True,False,4,0.666667,56.301,,,4,The text was informative in that it provided d...,3,The text was persuasive in that it encouraged ...,3,The text was interesting to read due to its d...,False
5,R_1nOygUc6Qe1tu60,False,Titanic,S1,Anna and the King,A0,False,False,1,0.0,73.299,,,3,The text was informative in that it provided c...,4,The text was persuasive in that it encouraged...,3,The text was interesting to read due to its de...,False
6,R_1nOygUc6Qe1tu60,False,Titanic,S2,Legend No. 17,A0,False,False,2,0.25,54.848,,,3,The text was informative in that it provided ...,3,The text was persuasive in that it encouraged ...,3,The text was interesting to read due to its de...,False
7,R_1nOygUc6Qe1tu60,False,Titanic,S3,Always,A0,False,True,4,,116.643,4.0,The text was factually accurate in that it ref...,3,The text was informative in that it provided a...,4,The text was persuasive in that it defended Sp...,5,The text was interesting to read due to its hu...,False
8,R_1nOygUc6Qe1tu60,False,Titanic,S4,Far and Away,A2,True,False,5,1.0,51.976,,,3,The text was informative in that it provided d...,4,The text was informative in that it provided d...,3,The text was interesting to read due to its de...,False
9,R_1nOygUc6Qe1tu60,False,Titanic,S5,Love Story,A0,False,False,3,0.5,64.898,,,3,The text was factually accurate in that it not...,4,The text was persuasive in its description of...,3,The text was interesting to read due to its ro...,False


# Vis: Genre

In [33]:
df_analysis['genrepick'].value_counts()

Titanic                                              41
Jurassic Park                                        24
The Dark Knight                                      15
The Lord of the Rings: The Fellowship of the Ring    12
The Sixth Sense                                      11
Toy Story                                             6
Mission: Impossible                                   5
Inception                                             3
Back to the Future                                    3
Name: genrepick, dtype: int64

# Vis: Rankings

## Top ranking was bot v human?

In [34]:
def l_toprankwashuman(r):
    mx_rankings = list(r[['rank5_M1', 'rank5_M2', 'rank5_M3', 'rank5_M4', 'rank5_M5']])
    mx_rank1 = mx_rankings.index(min(mx_rankings))+1
    return r[f'textsrc_M{mx_rank1}_byhuman']

print('True is by human, False is by bot')
print()
# regardless of starting movie, look at top-ranked-1 movie, count how often it was bot vs human review
toprank_counts = df_analysis.apply(
    l_toprankwashuman, 
    axis='columns'
).value_counts()
toprank_counts
sample_human, sample_bot = toprank_counts[True], toprank_counts[False]
stats.binomtest(sample_bot, sample_human+sample_bot, p=0.5)

# split by starting movie, look at top-ranked-1 movie review source
for genre, genre_group in df_analysis.groupby(by='genrepick'):
    print('---')
    print(genre)
    print(genre_group.apply(l_toprankwashuman, axis='columns').value_counts())

True is by human, False is by bot



False    63
True     57
dtype: int64

BinomTestResult(k=63, n=120, alternative='two-sided', proportion_estimate=0.525, pvalue=0.6482600518210052)

---
Back to the Future
True     2
False    1
dtype: int64
---
Inception
True     2
False    1
dtype: int64
---
Jurassic Park
False    12
True     12
dtype: int64
---
Mission: Impossible
True     4
False    1
dtype: int64
---
The Dark Knight
True     9
False    6
dtype: int64
---
The Lord of the Rings: The Fellowship of the Ring
False    7
True     5
dtype: int64
---
The Sixth Sense
False    7
True     4
dtype: int64
---
Titanic
False    25
True     16
dtype: int64
---
Toy Story
True     3
False    3
dtype: int64


## Top ranking was bot v human, +unseen?

In [35]:
def l_topunseenrankwashuman(r):
    mx_rankings = list(r[['rank5_M1', 'rank5_M2', 'rank5_M3', 'rank5_M4', 'rank5_M5']])
    for i in range(5):
        if r[f'prevwatchedbin_M{i+1}']:
            mx_rankings[i] = 9999
    if sum(mx_rankings)>=9999*4:
        # if 4 or 5 of them were marked as seen before:
        return 'AllSeenBefore'
    mx_rank1 = mx_rankings.index(min(mx_rankings))+1
    return r[f'textsrc_M{mx_rank1}_byhuman']

print('True is by human, False is by bot')
print()
# regardless of starting movie, look at top-ranked-1 movie, count how often it was bot vs human review
toprank_counts = df_analysis.apply(
    l_topunseenrankwashuman, 
    axis='columns'
).value_counts()
toprank_counts
sample_human, sample_bot = toprank_counts[True], toprank_counts[False]
stats.binomtest(sample_bot, sample_human+sample_bot, p=0.5)

# split by starting movie, look at top-ranked-1 movie review source
for genre, genre_group in df_analysis.groupby(by='genrepick'):
    print('---')
    print(genre)
    print(genre_group.apply(l_toprankwashuman, axis='columns').value_counts())

True is by human, False is by bot



True             57
False            53
AllSeenBefore    10
dtype: int64

BinomTestResult(k=53, n=110, alternative='two-sided', proportion_estimate=0.4818181818181818, pvalue=0.7750029828306254)

---
Back to the Future
True     2
False    1
dtype: int64
---
Inception
True     2
False    1
dtype: int64
---
Jurassic Park
False    12
True     12
dtype: int64
---
Mission: Impossible
True     4
False    1
dtype: int64
---
The Dark Knight
True     9
False    6
dtype: int64
---
The Lord of the Rings: The Fellowship of the Ring
False    7
True     5
dtype: int64
---
The Sixth Sense
False    7
True     4
dtype: int64
---
Titanic
False    25
True     16
dtype: int64
---
Toy Story
True     3
False    3
dtype: int64


In [36]:
def l_topseenrankwashuman(r):
    mx_rankings = list(r[['rank5_M1', 'rank5_M2', 'rank5_M3', 'rank5_M4', 'rank5_M5']])
    for i in range(5):
        if not r[f'prevwatchedbin_M{i+1}']:
            mx_rankings[i] = 9999
    if sum(mx_rankings)>=9999*4:
        # if 4 or 5 of them were marked as unseen before:
        return 'AllUnseenBefore'
    mx_rank1 = mx_rankings.index(min(mx_rankings))+1
    return r[f'textsrc_M{mx_rank1}_byhuman']

print('True is by human, False is by bot')
print()
# regardless of starting movie, look at top-ranked-1 movie, count how often it was bot vs human review
toprank_counts = df_analysis.apply(
    l_topseenrankwashuman, 
    axis='columns'
).value_counts()
toprank_counts
sample_human, sample_bot = toprank_counts[True], toprank_counts[False]
stats.binomtest(sample_bot, sample_human+sample_bot, p=0.5)

# split by starting movie, look at top-ranked-1 movie review source
for genre, genre_group in df_analysis.groupby(by='genrepick'):
    print('---')
    print(genre)
    print(genre_group.apply(l_toprankwashuman, axis='columns').value_counts())

True is by human, False is by bot



AllUnseenBefore    91
False              17
True               12
dtype: int64

BinomTestResult(k=17, n=29, alternative='two-sided', proportion_estimate=0.5862068965517241, pvalue=0.45825831964612007)

---
Back to the Future
True     2
False    1
dtype: int64
---
Inception
True     2
False    1
dtype: int64
---
Jurassic Park
False    12
True     12
dtype: int64
---
Mission: Impossible
True     4
False    1
dtype: int64
---
The Dark Knight
True     9
False    6
dtype: int64
---
The Lord of the Rings: The Fellowship of the Ring
False    7
True     5
dtype: int64
---
The Sixth Sense
False    7
True     4
dtype: int64
---
Titanic
False    25
True     16
dtype: int64
---
Toy Story
True     3
False    3
dtype: int64


## Average ranking aggregated when showing bot v human?

In [37]:
# across all movies, did the human review(s) get higher average rating, vs bot reviews getting higher average rating within the ranking?
movie_rankings = {True:[], False:[]}
for movie_number in [1, 2, 3, 4, 5]:
    for moviename, movie_group in df_analysis.groupby(by=f'm{movie_number}_title'):
        for textsrc_fromhuman, textsrc_group in movie_group.groupby(by=f'textsrc_M{movie_number}_byhuman'):
            rankings = list(textsrc_group[f'rank5_M{movie_number}'])
            movie_rankings[textsrc_fromhuman] = movie_rankings[textsrc_fromhuman] + rankings
for k in movie_rankings:
    movie_rankings[k] = [e for e in movie_rankings[k] if not pd.isna(e)]
movie_ranking_avgs = {True:ref_tools.l_avgifnonnone(movie_rankings[True]), False:ref_tools.l_avgifnonnone(movie_rankings[False])}

print('True is by human, False is by bot')
print()
print('Lower = better preference in ranking')
print()
print(movie_ranking_avgs)
print('  sample sizes :', len(movie_rankings[True]), len(movie_rankings[False]))
print(' ', stats.ttest_ind(movie_rankings[True], movie_rankings[False]))
print(' ', stats.ranksums(movie_rankings[True], movie_rankings[False]))

True is by human, False is by bot

Lower = better preference in ranking

{True: 3.0533333333333332, False: 2.9466666666666668}
  sample sizes : 300 300
  Ttest_indResult(statistic=0.9228760457580811, pvalue=0.3564440058828735)
  RanksumsResult(statistic=0.9043433741870066, pvalue=0.36581335415118665)


In [38]:
# across all movies, did the human review(s) get higher average rating, vs bot reviews getting higher average rating within the ranking?
movie_rankings = {True:[], False:[]}
for movie_number in [1, 2, 3, 4, 5]:
    for moviename, movie_group in df_analysis.groupby(by=f'm{movie_number}_title'):
        for textsrc_fromhuman, textsrc_group in movie_group.groupby(by=f'textsrc_M{movie_number}_byhuman'):
            rankings = list(textsrc_group[f'rank5_M{movie_number}'])
            rankings_norm = [e-1 for e in rankings]
            rankings_norm = [e/4 for e in rankings_norm]
            movie_rankings[textsrc_fromhuman] = movie_rankings[textsrc_fromhuman] + rankings_norm
for k in movie_rankings:
    movie_rankings[k] = [e for e in movie_rankings[k] if not pd.isna(e)]
movie_ranking_avgs = {True:ref_tools.l_avgifnonnone(movie_rankings[True]), False:ref_tools.l_avgifnonnone(movie_rankings[False])}

print('True is by human, False is by bot')
print()
print('Lower = better preference in ranking')
print()
print(movie_ranking_avgs)
print('  sample sizes :', len(movie_rankings[True]), len(movie_rankings[False]))
print(' ', stats.ttest_ind(movie_rankings[True], movie_rankings[False]))
print(' ', stats.ranksums(movie_rankings[True], movie_rankings[False]))

True is by human, False is by bot

Lower = better preference in ranking

{True: 0.5133333333333333, False: 0.4866666666666667}
  sample sizes : 300 300
  Ttest_indResult(statistic=0.9228760457580811, pvalue=0.3564440058828735)
  RanksumsResult(statistic=0.9043433741870066, pvalue=0.36581335415118665)


## Average ranking aggregated when showing bot v human, +unseen?

In [39]:
# across all UNSEEN movies, did the human review(s) get higher average rating, vs bot reviews getting higher average rating within the ranking?
movie_rankings = {True:[], False:[]}
for movie_number in [1, 2, 3, 4, 5]:
    for moviename, movie_group in df_analysis.groupby(by=f'm{movie_number}_title'):
        for textsrc_fromhuman, textsrc_group in movie_group.groupby(by=f'textsrc_M{movie_number}_byhuman'):
            rankings = list(textsrc_group[f'rankNunseen_M{movie_number}'])
            movie_rankings[textsrc_fromhuman] = movie_rankings[textsrc_fromhuman] + rankings
for k in movie_rankings:
    movie_rankings[k] = [e for e in movie_rankings[k] if not pd.isna(e)]
movie_ranking_avgs = {True:ref_tools.l_avgifnonnone(movie_rankings[True]), False:ref_tools.l_avgifnonnone(movie_rankings[False])}

print('True is by human, False is by bot. Including ONLY UNSEEN')
print()
print('Lower = better preference in ranking')
print()
print(movie_ranking_avgs)
print('  sample sizes :', len(movie_rankings[True]), len(movie_rankings[False]))
print(' ', stats.ttest_ind(movie_rankings[True], movie_rankings[False]))
print(' ', stats.ranksums(movie_rankings[True], movie_rankings[False]))

True is by human, False is by bot. Including ONLY UNSEEN

Lower = better preference in ranking

{True: 0.515909090909091, False: 0.4889455782312926}
  sample sizes : 220 196
  Ttest_indResult(statistic=0.7114246595350147, pvalue=0.47722177183746317)
  RanksumsResult(statistic=0.6690631658877046, pvalue=0.5034551852181075)


In [40]:
# across all UNSEEN movies, did the human review(s) get higher average rating, vs bot reviews getting higher average rating within the ranking?
movie_rankings = {True:[], False:[]}
for movie_number in [1, 2, 3, 4, 5]:
    for moviename, movie_group in df_analysis.groupby(by=f'm{movie_number}_title'):
        for textsrc_fromhuman, textsrc_group in movie_group.groupby(by=f'textsrc_M{movie_number}_byhuman'):
            rankings = list(textsrc_group[f'rankNseen_M{movie_number}'])
            movie_rankings[textsrc_fromhuman] = movie_rankings[textsrc_fromhuman] + rankings
for k in movie_rankings:
    movie_rankings[k] = [e for e in movie_rankings[k] if not pd.isna(e)]
movie_ranking_avgs = {True:ref_tools.l_avgifnonnone(movie_rankings[True]), False:ref_tools.l_avgifnonnone(movie_rankings[False])}

print('True is by human, False is by bot. Including ONLY SEEN')
print()
print('Lower = better preference in ranking')
print()
print(movie_ranking_avgs)
print('  sample sizes :', len(movie_rankings[True]), len(movie_rankings[False]))
print(' ', stats.ttest_ind(movie_rankings[True], movie_rankings[False]))
print(' ', stats.ranksums(movie_rankings[True], movie_rankings[False]))

True is by human, False is by bot. Including ONLY SEEN

Lower = better preference in ranking

{True: 0.5444444444444444, False: 0.44999999999999996}
  sample sizes : 45 50
  Ttest_indResult(statistic=1.1210411460058196, pvalue=0.26515676749058875)
  RanksumsResult(statistic=1.050951949424901, pvalue=0.29328065913876233)


## Average ranking aggregated when showing bot v human, +author?

In [41]:
# across all movies, when author was shown vs not, did the human review(s) get higher average rating, vs bot reviews getting higher average rating within the ranking?
movie_rankings = {False:{True:[], False:[]}, True:{True:[], False:[]}}
for movie_number in [1, 2, 3, 4, 5]:
    for moviename, movie_group in df_analysis.groupby(by=f'm{movie_number}_title'):
        for textsrc_fromhuman, textsrc_group in movie_group.groupby(by=f'textsrc_M{movie_number}_byhuman'):
            for authorusednum, authormovie_group in textsrc_group.groupby(by='showingauthor'):
                rankings = list(authormovie_group[f'rank5_M{movie_number}'])
                movie_rankings[authorusednum][textsrc_fromhuman] = movie_rankings[authorusednum][textsrc_fromhuman] + rankings
movie_ranking_avgs = {
    False:{True:ref_tools.l_avgifnonnone(movie_rankings[False][True]), False:ref_tools.l_avgifnonnone(movie_rankings[False][False])},
    True:{True:ref_tools.l_avgifnonnone(movie_rankings[True][True]), False:ref_tools.l_avgifnonnone(movie_rankings[True][False])},
}

print('top-level False is not showing author, True is showing author')
print('sub-level True is by human, False is by bot')
print()
print('Lower = better preference in ranking')
print()
for showing_author in movie_ranking_avgs:
    print(showing_author, ':', movie_ranking_avgs[showing_author])
    sample_true = movie_rankings[showing_author][True]
    sample_false = movie_rankings[showing_author][False]
    print('  sample sizes :', len(sample_true), len(sample_false))
    print(' ', stats.ttest_ind(sample_true, sample_false))
    print(' ', stats.ranksums(sample_true, sample_false))
print()
# stats for whether knowing the authorship did impact this difference in numbers
print(stats.ttest_ind(movie_rankings[False][False], movie_rankings[True][False]))

top-level False is not showing author, True is showing author
sub-level True is by human, False is by bot

Lower = better preference in ranking

False : {True: 3.0, False: 3.0}
  sample sizes : 150 150
  Ttest_indResult(statistic=0.0, pvalue=1.0)
  RanksumsResult(statistic=0.0, pvalue=1.0)
True : {True: 3.1066666666666665, False: 2.8933333333333335}
  sample sizes : 150 150
  Ttest_indResult(statistic=1.305752035545053, pvalue=0.19264399992368497)
  RanksumsResult(statistic=1.2778719852282658, pvalue=0.20129456741160834)

Ttest_indResult(statistic=0.6476287065830449, pvalue=0.517723738374281)


## Average ranking aggregated when showing bot v human, +unseen, +author?

In [42]:
# across all movies, when author was shown vs not, did the human review(s) get higher average rating, vs bot reviews getting higher average rating within the ranking?
movie_rankings = {False:{True:[], False:[]}, True:{True:[], False:[]}}
for movie_number in [1, 2, 3, 4, 5]:
    for moviename, movie_group in df_analysis.groupby(by=f'm{movie_number}_title'):
        for textsrc_fromhuman, textsrc_group in movie_group.groupby(by=f'textsrc_M{movie_number}_byhuman'):
            for authorusednum, authormovie_group in textsrc_group.groupby(by='showingauthor'):
                rankings = list(authormovie_group[f'rankNunseen_M{movie_number}'])
                movie_rankings[authorusednum][textsrc_fromhuman] = movie_rankings[authorusednum][textsrc_fromhuman] + rankings
for a in movie_rankings:
    for k in movie_rankings[a]:
        movie_rankings[a][k] = [e for e in movie_rankings[a][k] if not pd.isna(e)]
movie_ranking_avgs = {
    False:{True:ref_tools.l_avgifnonnone(movie_rankings[False][True]), False:ref_tools.l_avgifnonnone(movie_rankings[False][False])},
    True:{True:ref_tools.l_avgifnonnone(movie_rankings[True][True]), False:ref_tools.l_avgifnonnone(movie_rankings[True][False])},
}

print('top-level False is not showing author, True is showing author')
print('sub-level True is by human, False is by bot')
print()
print('Lower = better preference in ranking')
print()
for showing_author in movie_ranking_avgs:
    print(showing_author, ':', movie_ranking_avgs[showing_author])
    sample_true = movie_rankings[showing_author][True]
    sample_false = movie_rankings[showing_author][False]
    print('  sample sizes :', len(sample_true), len(sample_false))
    print(' ', stats.ttest_ind(sample_true, sample_false))
    print(' ', stats.ranksums(sample_true, sample_false))
print()
# TODO stats for whether knowing the authorship did impact this difference in numbers

top-level False is not showing author, True is showing author
sub-level True is by human, False is by bot

Lower = better preference in ranking

False : {True: 0.501543209876543, False: 0.4984276729559748}
  sample sizes : 108 106
  Ttest_indResult(statistic=0.05952448432659296, pvalue=0.9525904366624548)
  RanksumsResult(statistic=0.032016553906542, pvalue=0.9744588495564898)
True : {True: 0.5297619047619048, False: 0.4777777777777777}
  sample sizes : 112 90
  Ttest_indResult(statistic=0.9402112508381228, pvalue=0.34824338101424224)
  RanksumsResult(statistic=0.9141749022030567, pvalue=0.36062494968316583)



## Average ranking per movie when showing bot v human?

In [43]:
# for each movie, did the human review(s) get higher average rating, vs bot reviews getting higher average rating within the ranking?
wins = {'bot':0, 'human':0, 'tie':0}
for movie_number in [1, 2, 3, 4, 5]:
    for moviename, movie_group in df_analysis.groupby(by=f'm{movie_number}_title'):
        movie_rankings = {True:None, False:None}
        for textsrc_fromhuman, textsrc_group in movie_group.groupby(by=f'textsrc_M{movie_number}_byhuman'):
            rankings = textsrc_group[f'rank5_M{movie_number}']
            movie_rankings[textsrc_fromhuman] = rankings
        movie_ranking_avgs = {True:ref_tools.l_avgifnonnone(movie_rankings[True]), False:ref_tools.l_avgifnonnone(movie_rankings[False])}
        if movie_ranking_avgs[True] is not None and movie_ranking_avgs[False] is not None:
            print(moviename)
            print('    bot  : ', movie_ranking_avgs[False])
            print('    human: ', movie_ranking_avgs[True])
            if movie_ranking_avgs[False]==movie_ranking_avgs[True]:
                print('  Tied')
                wins['tie'] += 1
            elif movie_ranking_avgs[False]<movie_ranking_avgs[True]:
                print('  Bot wins')
                wins['bot'] += 1
            else:
                print('  Human wins')
                wins['human'] += 1

print('---')
print(wins)

Anna and the King
    bot  :  2.411764705882353
    human:  2.8333333333333335
  Bot wins
Batman: Mask of the Phantasm
    bot  :  2.090909090909091
    human:  1.75
  Human wins
Miniscule: Valley of the Lost Ants
    bot  :  3.5
    human:  1.5
  Human wins
Real Genius
    bot  :  2.0
    human:  5.0
  Bot wins
Sleuth
    bot  :  3.0
    human:  3.4
  Bot wins
The Poseidon Adventure
    bot  :  4.0
    human:  2.769230769230769
  Human wins
The Saint
    bot  :  5.0
    human:  2.75
  Human wins
The Three Musketeers
    bot  :  3.125
    human:  4.75
  Bot wins
When Nietzsche Wept
    bot  :  2.5
    human:  4.0
  Bot wins
Legend No. 17
    bot  :  3.12
    human:  2.8125
  Human wins
Silverado
    bot  :  4.0
    human:  2.0
  Human wins
Streets of Fire
    bot  :  4.0
    human:  2.727272727272727
  Human wins
The 7th Voyage of Sinbad
    bot  :  3.142857142857143
    human:  2.6
  Human wins
The Sentinel
    bot  :  3.6666666666666665
    human:  3.5
  Human wins
The Towering Infer

## Average ranking per movie when showing bot v human, +unseen?

In [44]:
# for each movie, did the human review(s) get higher average rating, vs bot reviews getting higher average rating within the ranking?
wins = {'bot':0, 'human':0, 'tie':0}
for movie_number in [1, 2, 3, 4, 5]:
    for moviename, movie_group in df_analysis.groupby(by=f'm{movie_number}_title'):
        movie_rankings = {True:None, False:None}
        for textsrc_fromhuman, textsrc_group in movie_group.groupby(by=f'textsrc_M{movie_number}_byhuman'):
            rankings = textsrc_group[f'rankNunseen_M{movie_number}']
            movie_rankings[textsrc_fromhuman] = rankings
        movie_ranking_avgs = {True:ref_tools.l_avgifnonnone(movie_rankings[True]), False:ref_tools.l_avgifnonnone(movie_rankings[False])}
        if movie_ranking_avgs[True] is not None and movie_ranking_avgs[False] is not None:
            print(moviename)
            print('    bot  : ', movie_ranking_avgs[False])
            print('    human: ', movie_ranking_avgs[True])
            if movie_ranking_avgs[False]==movie_ranking_avgs[True]:
                print('  Tied')
                wins['tie'] += 1
            elif movie_ranking_avgs[False]<movie_ranking_avgs[True]:
                print('  Bot wins')
                wins['bot'] += 1
            else:
                print('  Human wins')
                wins['human'] += 1

print('---')
print(wins)

Anna and the King
    bot  :  0.3703703703703704
    human:  0.47916666666666663
  Bot wins
Batman: Mask of the Phantasm
    bot  :  0.3333333333333333
    human:  0.3333333333333333
  Tied
Miniscule: Valley of the Lost Ants
    bot  :  0.625
    human:  0.1111111111111111
  Human wins
Sleuth
    bot  :  0.38888888888888884
    human:  0.6833333333333333
  Bot wins
The Poseidon Adventure
    bot  :  0.8214285714285714
    human:  0.3854166666666667
  Human wins
The Saint
    bot  :  1.0
    human:  0.5
  Human wins
The Three Musketeers
    bot  :  0.5416666666666666
    human:  1.0
  Bot wins
When Nietzsche Wept
    bot  :  0.4583333333333333
    human:  0.75
  Bot wins
Legend No. 17
    bot  :  0.45312499999999994
    human:  0.40384615384615385
  Human wins
Streets of Fire
    bot  :  0.6666666666666666
    human:  0.4375
  Human wins
The 7th Voyage of Sinbad
    bot  :  0.47222222222222227
    human:  0.3333333333333333
  Human wins
The Towering Inferno
    bot  :  0.40625
    human

# Vis: Likert

In [45]:
# graph setup across all Likert scale responses
df_likert_fullcollection = pd.DataFrame()

## Likert responses aggregated when showing bot v human?

In [46]:
# likert scales across all movies, compare scores and qualitative points raised for human vs bot reviews
movie_scorings = {}
for likert_type in ['acc', 'inf', 'per', 'int']:
    movie_scorings[likert_type] = {True:[], False:[]}
for movie_number in [1, 2, 3, 4, 5]:
    for moviename, movie_group in df_analysis.groupby(by=f'm{movie_number}_title'):
        for textsrc_fromhuman, textsrc_group in movie_group.groupby(by=f'textsrc_M{movie_number}_byhuman'):
            for likert_type in ['acc', 'inf', 'per', 'int']:
                scorings = list(textsrc_group[f'{likert_type}_lik_M{movie_number}'])
                movie_scorings[likert_type][textsrc_fromhuman] = movie_scorings[likert_type][textsrc_fromhuman] + scorings
for lk in movie_scorings:
    for k in movie_scorings[lk]:
        movie_scorings[lk][k] = [int(e) for e in movie_scorings[lk][k] if not pd.isna(e)]
movie_scoring_avgs = {}
for likert_type in ['acc', 'inf', 'per', 'int']:
    movie_scoring_avgs[likert_type] = {
        True:ref_tools.l_avgifnonnone(movie_scorings[likert_type][True]), 
        False:ref_tools.l_avgifnonnone(movie_scorings[likert_type][False])
    }
# collect into df form
df_likert_fullcollection = pd.DataFrame() # TODO remove when satisfied
for likert_longtype, likert_type in likert_convert_dict.keys():
    for is_bot in [True, False]:
        df_likert_all = pd.DataFrame()
        reversed_textmap = {}
        for scale_longtext in likert_convert_dict[(likert_longtype, likert_type)]:
            counted = pd.DataFrame(movie_scorings[likert_type][not is_bot]).value_counts()
            # collect raw counts and percentages
            scale_value = likert_convert_dict[(likert_longtype, likert_type)][scale_longtext]
            reversed_textmap[scale_value] = scale_longtext
            present_count = counted[scale_value] if (scale_value in counted) else 0
            df_likert_all = pd.concat([
                df_likert_all,
                pd.DataFrame({
                    'x_filter': ['all'],
                    'x_isbot': [is_bot],
                    'x_likerttype': [likert_type],
                    'question': [('all', 'bot' if (is_bot) else 'human', likert_type)],
                    'type': [scale_longtext],
                    'value': [present_count],
                    'percentage': [present_count/sum(counted)],
                    'percentage_start': [0], # left blank for revision
                    'percentage_end': [0], # left blank for revision
                }),
            ])
        # convert percentage_start and percentage_end
        min_point = 0
        max_point = 0
        # value: 3 (balanced)
        percent = df_likert_all[df_likert_all['type']==reversed_textmap[3]]['percentage']
        min_point = round(-1*percent/2, 4)
        max_point = round(percent/2, 4)
        df_likert_all.loc[df_likert_all['type']==reversed_textmap[3], 'percentage_start'] = min_point
        df_likert_all.loc[df_likert_all['type']==reversed_textmap[3], 'percentage_end'] = max_point
        # value: 4 (add+1)
        percent = df_likert_all[df_likert_all['type']==reversed_textmap[4]]['percentage']
        df_likert_all.loc[df_likert_all['type']==reversed_textmap[4], 'percentage_start'] = max_point
        max_point = round(max_point + percent, 4)
        df_likert_all.loc[df_likert_all['type']==reversed_textmap[4], 'percentage_end'] = max_point
        # value: 5 (add+2)
        percent = df_likert_all[df_likert_all['type']==reversed_textmap[5]]['percentage']
        df_likert_all.loc[df_likert_all['type']==reversed_textmap[5], 'percentage_start'] = max_point
        max_point = round(max_point + percent, 4)
        df_likert_all.loc[df_likert_all['type']==reversed_textmap[5], 'percentage_end'] = max_point
        # value: 2 (add-1)
        percent = df_likert_all[df_likert_all['type']==reversed_textmap[2]]['percentage']
        df_likert_all.loc[df_likert_all['type']==reversed_textmap[2], 'percentage_end'] = min_point
        min_point = round(min_point - percent, 4)
        df_likert_all.loc[df_likert_all['type']==reversed_textmap[2], 'percentage_start'] = min_point
        # value: 1 (add-2)
        percent = df_likert_all[df_likert_all['type']==reversed_textmap[1]]['percentage']
        df_likert_all.loc[df_likert_all['type']==reversed_textmap[1], 'percentage_end'] = min_point
        min_point = round(min_point - percent, 4)
        df_likert_all.loc[df_likert_all['type']==reversed_textmap[1], 'percentage_start'] = min_point
        df_likert_fullcollection = pd.concat([
            df_likert_fullcollection,
            df_likert_all,
        ]).reset_index(drop=True)

print('True is by human, False is by bot')
print()
print('Higher = more accurate/informational/persuasive/interesting')
print()
for k in movie_scoring_avgs:
    print(k, ':', movie_scoring_avgs[k])
    sample_true = movie_scorings[k][True]
    sample_false = movie_scorings[k][False]
    print('  sample sizes :', len(sample_true), len(sample_false))
    print(' ', stats.ttest_ind(sample_true, sample_false))
    print(' ', stats.ranksums(sample_true, sample_false))
    
# significance: t-test, ranksums
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_ind.htm
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ranksums.html

True is by human, False is by bot

Higher = more accurate/informational/persuasive/interesting

acc : {True: 4.063291139240507, False: 4.284313725490196}
  sample sizes : 79 102
  Ttest_indResult(statistic=-2.041149015754929, pvalue=0.042704116284346956)
  RanksumsResult(statistic=-2.0395334258816535, pvalue=0.04139681871727277)
inf : {True: 3.9433333333333334, False: 4.053333333333334}
  sample sizes : 300 300
  Ttest_indResult(statistic=-1.462872311388874, pvalue=0.14402768826179393)
  RanksumsResult(statistic=-0.6363374471492947, pvalue=0.5245565023861781)
per : {True: 3.85, False: 3.966666666666667}
  sample sizes : 300 300
  Ttest_indResult(statistic=-1.4195038078063897, pvalue=0.15627323994567727)
  RanksumsResult(statistic=-0.7863548245860456, pvalue=0.4316596416978391)
int : {True: 3.9133333333333336, False: 4.05}
  sample sizes : 300 300
  Ttest_indResult(statistic=-1.708457337489385, pvalue=0.0880704390464105)
  RanksumsResult(statistic=-1.5291410230719358, pvalue=0.126229487

## Likert responses aggregated when showing bot v human, +unseen?

In [47]:
# likert scales across all movies, compare scores and qualitative points raised for human vs bot reviews
# Exclude "accuracy" because that was asked only for SEEN movies
movie_scorings = {}
for likert_type in ['acc', 'inf', 'per', 'int']:
    movie_scorings[likert_type] = {True:[], False:[]}
for movie_number in [1, 2, 3, 4, 5]:
    # only include UNSEEN movie entries
    df_analysis_mx_filtered = df_analysis[df_analysis[f'prevwatchedbin_M{movie_number}']==False]
    for moviename, movie_group in df_analysis_mx_filtered.groupby(by=f'm{movie_number}_title'):
        for textsrc_fromhuman, textsrc_group in movie_group.groupby(by=f'textsrc_M{movie_number}_byhuman'):
            for likert_type in ['acc', 'inf', 'per', 'int']:
                scorings = list(textsrc_group[f'{likert_type}_lik_M{movie_number}'])
                movie_scorings[likert_type][textsrc_fromhuman] = movie_scorings[likert_type][textsrc_fromhuman] + scorings
for lk in movie_scorings:
    for k in movie_scorings[lk]:
        movie_scorings[lk][k] = [e for e in movie_scorings[lk][k] if not pd.isna(e)]
movie_scoring_avgs = {}
for likert_type in ['acc', 'inf', 'per', 'int']:
    movie_scoring_avgs[likert_type] = {
        True:ref_tools.l_avgifnonnone(movie_scorings[likert_type][True]), 
        False:ref_tools.l_avgifnonnone(movie_scorings[likert_type][False])
    }
# collect into df form
for likert_longtype, likert_type in likert_convert_dict.keys():
    for is_bot in [True, False]:
        df_likert_all = pd.DataFrame()
        reversed_textmap = {}
        for scale_longtext in likert_convert_dict[(likert_longtype, likert_type)]:
            counted = pd.DataFrame(movie_scorings[likert_type][not is_bot]).value_counts()
            counted_sum = sum(counted)
            # collect raw counts and percentages
            scale_value = likert_convert_dict[(likert_longtype, likert_type)][scale_longtext]
            reversed_textmap[scale_value] = scale_longtext
            present_count = counted[scale_value] if (scale_value in counted) else 0
            df_likert_all = pd.concat([
                df_likert_all,
                pd.DataFrame({
                    'x_filter': ['unseen'],
                    'x_isbot': [is_bot],
                    'x_likerttype': [likert_type],
                    'question': [('unseen', 'bot' if (is_bot) else 'human', likert_type)],
                    'type': [scale_longtext],
                    'value': [present_count],
                    'percentage': [present_count/counted_sum if (counted_sum!=0) else 0],
                    'percentage_start': [0], # left blank for revision
                    'percentage_end': [0], # left blank for revision
                }),
            ])
        # convert percentage_start and percentage_end
        min_point = 0
        max_point = 0
        # value: 3 (balanced)
        percent = df_likert_all[df_likert_all['type']==reversed_textmap[3]]['percentage']
        min_point = round(-1*percent/2, 4)
        max_point = round(percent/2, 4)
        df_likert_all.loc[df_likert_all['type']==reversed_textmap[3], 'percentage_start'] = min_point
        df_likert_all.loc[df_likert_all['type']==reversed_textmap[3], 'percentage_end'] = max_point
        # value: 4 (add+1)
        percent = df_likert_all[df_likert_all['type']==reversed_textmap[4]]['percentage']
        df_likert_all.loc[df_likert_all['type']==reversed_textmap[4], 'percentage_start'] = max_point
        max_point = round(max_point + percent, 4)
        df_likert_all.loc[df_likert_all['type']==reversed_textmap[4], 'percentage_end'] = max_point
        # value: 5 (add+2)
        percent = df_likert_all[df_likert_all['type']==reversed_textmap[5]]['percentage']
        df_likert_all.loc[df_likert_all['type']==reversed_textmap[5], 'percentage_start'] = max_point
        max_point = round(max_point + percent, 4)
        df_likert_all.loc[df_likert_all['type']==reversed_textmap[5], 'percentage_end'] = max_point
        # value: 2 (add-1)
        percent = df_likert_all[df_likert_all['type']==reversed_textmap[2]]['percentage']
        df_likert_all.loc[df_likert_all['type']==reversed_textmap[2], 'percentage_end'] = min_point
        min_point = round(min_point - percent, 4)
        df_likert_all.loc[df_likert_all['type']==reversed_textmap[2], 'percentage_start'] = min_point
        # value: 1 (add-2)
        percent = df_likert_all[df_likert_all['type']==reversed_textmap[1]]['percentage']
        df_likert_all.loc[df_likert_all['type']==reversed_textmap[1], 'percentage_end'] = min_point
        min_point = round(min_point - percent, 4)
        df_likert_all.loc[df_likert_all['type']==reversed_textmap[1], 'percentage_start'] = min_point
        df_likert_fullcollection = pd.concat([
            df_likert_fullcollection,
            df_likert_all,
        ]).reset_index(drop=True)

print('True is by human, False is by bot')
print()
print('Higher = more accurate/informational/persuasive/interesting')
print()
for k in movie_scoring_avgs:
    print(k, ':', movie_scoring_avgs[k])
    sample_true = movie_scorings[k][True]
    sample_false = movie_scorings[k][False]
    print('  sample sizes :', len(sample_true), len(sample_false))
    print(' ', stats.ttest_ind(sample_true, sample_false))
    print(' ', stats.ranksums(sample_true, sample_false))
    
# significance: t-test, ranksums
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_ind.htm
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ranksums.html

True is by human, False is by bot

Higher = more accurate/informational/persuasive/interesting

acc : {True: None, False: None}
  sample sizes : 0 0
  Ttest_indResult(statistic=nan, pvalue=nan)
  RanksumsResult(statistic=nan, pvalue=nan)
inf : {True: 3.9728506787330318, False: 3.984848484848485}
  sample sizes : 221 198
  Ttest_indResult(statistic=-0.13255137791093904, pvalue=0.8946121237365097)
  RanksumsResult(statistic=0.578966506876321, pvalue=0.5626117729053852)
per : {True: 3.8778280542986425, False: 3.8636363636363638}
  sample sizes : 221 198
  Ttest_indResult(statistic=0.13883816371414057, pvalue=0.8896450591617542)
  RanksumsResult(statistic=0.8682477482743991, pvalue=0.3852587200391988)
int : {True: 3.8778280542986425, False: 3.904040404040404}
  sample sizes : 221 198
  Ttest_indResult(statistic=-0.26663522599858436, pvalue=0.7898818302269953)
  RanksumsResult(statistic=0.15595329494365662, pvalue=0.8760698359028715)


  z = (s - expected) / np.sqrt(n1*n2*(n1+n2+1)/12.0)


In [48]:
# likert scales across all movies, compare scores and qualitative points raised for human vs bot reviews
movie_scorings = {}
for likert_type in ['acc', 'inf', 'per', 'int']:
    movie_scorings[likert_type] = {True:[], False:[]}
for movie_number in [1, 2, 3, 4, 5]:
    # only include SEEN movie entries
    df_analysis_mx_filtered = df_analysis[df_analysis[f'prevwatchedbin_M{movie_number}']==True]
    for moviename, movie_group in df_analysis_mx_filtered.groupby(by=f'm{movie_number}_title'):
        for textsrc_fromhuman, textsrc_group in movie_group.groupby(by=f'textsrc_M{movie_number}_byhuman'):
            for likert_type in ['acc', 'inf', 'per', 'int']:
                scorings = list(textsrc_group[f'{likert_type}_lik_M{movie_number}'])
                movie_scorings[likert_type][textsrc_fromhuman] = movie_scorings[likert_type][textsrc_fromhuman] + scorings
for lk in movie_scorings:
    for k in movie_scorings[lk]:
        movie_scorings[lk][k] = [e for e in movie_scorings[lk][k] if not pd.isna(e)]
movie_scoring_avgs = {}
for likert_type in ['acc', 'inf', 'per', 'int']:
    movie_scoring_avgs[likert_type] = {
        True:ref_tools.l_avgifnonnone(movie_scorings[likert_type][True]), 
        False:ref_tools.l_avgifnonnone(movie_scorings[likert_type][False])
    }
# collect into df form
for likert_longtype, likert_type in likert_convert_dict.keys():
    for is_bot in [True, False]:
        df_likert_all = pd.DataFrame()
        reversed_textmap = {}
        for scale_longtext in likert_convert_dict[(likert_longtype, likert_type)]:
            counted = pd.DataFrame(movie_scorings[likert_type][not is_bot]).value_counts()
            counted_sum = sum(counted)
            # collect raw counts and percentages
            scale_value = likert_convert_dict[(likert_longtype, likert_type)][scale_longtext]
            reversed_textmap[scale_value] = scale_longtext
            present_count = counted[scale_value] if (scale_value in counted) else 0
            df_likert_all = pd.concat([
                df_likert_all,
                pd.DataFrame({
                    'x_filter': ['seen'],
                    'x_isbot': [is_bot],
                    'x_likerttype': [likert_type],
                    'question': [('seen', 'bot' if (is_bot) else 'human', likert_type)],
                    'type': [scale_longtext],
                    'value': [present_count],
                    'percentage': [present_count/counted_sum if (counted_sum!=0) else 0],
                    'percentage_start': [0], # left blank for revision
                    'percentage_end': [0], # left blank for revision
                }),
            ])
        # convert percentage_start and percentage_end
        min_point = 0
        max_point = 0
        # value: 3 (balanced)
        percent = df_likert_all[df_likert_all['type']==reversed_textmap[3]]['percentage']
        min_point = round(-1*percent/2, 4)
        max_point = round(percent/2, 4)
        df_likert_all.loc[df_likert_all['type']==reversed_textmap[3], 'percentage_start'] = min_point
        df_likert_all.loc[df_likert_all['type']==reversed_textmap[3], 'percentage_end'] = max_point
        # value: 4 (add+1)
        percent = df_likert_all[df_likert_all['type']==reversed_textmap[4]]['percentage']
        df_likert_all.loc[df_likert_all['type']==reversed_textmap[4], 'percentage_start'] = max_point
        max_point = round(max_point + percent, 4)
        df_likert_all.loc[df_likert_all['type']==reversed_textmap[4], 'percentage_end'] = max_point
        # value: 5 (add+2)
        percent = df_likert_all[df_likert_all['type']==reversed_textmap[5]]['percentage']
        df_likert_all.loc[df_likert_all['type']==reversed_textmap[5], 'percentage_start'] = max_point
        max_point = round(max_point + percent, 4)
        df_likert_all.loc[df_likert_all['type']==reversed_textmap[5], 'percentage_end'] = max_point
        # value: 2 (add-1)
        percent = df_likert_all[df_likert_all['type']==reversed_textmap[2]]['percentage']
        df_likert_all.loc[df_likert_all['type']==reversed_textmap[2], 'percentage_end'] = min_point
        min_point = round(min_point - percent, 4)
        df_likert_all.loc[df_likert_all['type']==reversed_textmap[2], 'percentage_start'] = min_point
        # value: 1 (add-2)
        percent = df_likert_all[df_likert_all['type']==reversed_textmap[1]]['percentage']
        df_likert_all.loc[df_likert_all['type']==reversed_textmap[1], 'percentage_end'] = min_point
        min_point = round(min_point - percent, 4)
        df_likert_all.loc[df_likert_all['type']==reversed_textmap[1], 'percentage_start'] = min_point
        df_likert_fullcollection = pd.concat([
            df_likert_fullcollection,
            df_likert_all,
        ]).reset_index(drop=True)

print('True is by human, False is by bot')
print()
print('Higher = more accurate/informational/persuasive/interesting')
print()
for k in movie_scoring_avgs:
    print(k, ':', movie_scoring_avgs[k])
    sample_true = movie_scorings[k][True]
    sample_false = movie_scorings[k][False]
    print('  sample sizes :', len(sample_true), len(sample_false))
    print(' ', stats.ttest_ind(sample_true, sample_false))
    print(' ', stats.ranksums(sample_true, sample_false))
    
# significance: t-test, ranksums
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_ind.htm
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ranksums.html

True is by human, False is by bot

Higher = more accurate/informational/persuasive/interesting

acc : {True: 4.063291139240507, False: 4.284313725490196}
  sample sizes : 79 102
  Ttest_indResult(statistic=-2.041149015754929, pvalue=0.042704116284346956)
  RanksumsResult(statistic=-2.0395334258816535, pvalue=0.04139681871727277)
inf : {True: 3.8607594936708862, False: 4.186274509803922}
  sample sizes : 79 102
  Ttest_indResult(statistic=-2.3972753549583308, pvalue=0.017546540009223895)
  RanksumsResult(statistic=-1.8879271543925544, pvalue=0.05903573052743016)
per : {True: 3.7721518987341773, False: 4.166666666666667}
  sample sizes : 79 102
  Ttest_indResult(statistic=-2.934163284879403, pvalue=0.0037826377904969107)
  RanksumsResult(statistic=-2.767529578598176, pvalue=0.005648291971253566)
int : {True: 4.012658227848101, False: 4.333333333333333}
  sample sizes : 79 102
  Ttest_indResult(statistic=-2.4223639969392283, pvalue=0.01641692766520364)
  RanksumsResult(statistic=-2.688865

## (aggregated) Likert graphs

In [49]:
charts_topcenter = {}
charts_side = {}
for ltype, lkey, ltitle in [
    ('acc', ('acc', 'acc'), 'Likert: Accurate'),
    ('inf', ('info', 'inf'), 'Likert: Informational'),
    ('per', ('per', 'per'), 'Likert: Persuasive'),
    ('int', ('int', 'int'), 'Likert: Interesting'),
]:
    charts_topcenter[ltitle] = {}
    charts_side[ltitle] = {}
    for gfilter in ['all', 'unseen', 'seen']:
        source = df_likert_fullcollection[
            df_likert_fullcollection['x_likerttype']==ltype
        ][
            df_likert_fullcollection['x_filter']==gfilter
        ].copy()
        source['question'] = source['x_isbot'].apply(lambda x: 'Bot' if x else 'Human')
        source = alt.pd.DataFrame(
            source
        )

        color_domain = list(likert_convert_dict[lkey].keys())
        color_domain.reverse()
        color_range = ['#1770ab', '#94c6da', '#cccccc', '#f3a583', '#c30d24']
        color_range.reverse()
        color_scale = alt.Scale(
            domain=color_domain,
            range=color_range,
        )

        y_axis = alt.Axis(
            title='',
            offset=5,
            ticks=False,
            minExtent=60,
            domain=False
        )

        charts_topcenter[ltitle][gfilter] =alt.Chart(
            source,
            title=alt.Title(ltitle, subtitle=[gfilter]),
        ).mark_bar().encode(
            alt.X('percentage_start:Q').axis(format='%').title('% of Responses'),
            x2='percentage_end:Q',
            y=alt.Y('question:N').axis(y_axis),
            color=alt.Color(
                'type:N', 
                legend=alt.Legend(
                    orient='none',
                    legendX=-130, legendY=-40,
                    direction='horizontal',
                    titleAnchor='middle',
                ),
            ).title('').scale(color_scale),
        )

        charts_side[ltitle][gfilter] =alt.Chart(
            source,
            title=alt.Title(ltitle, subtitle=[gfilter]),
        ).mark_bar().encode(
            alt.X('percentage_start:Q').axis(format='%').title('% of Responses'),
            x2='percentage_end:Q',
            y=alt.Y('question:N').axis(y_axis),
            color=alt.Color(
                'type:N',
            ).title('').scale(color_scale),
        )

  source = df_likert_fullcollection[


In [50]:
charts_topcenter['Likert: Accurate']['all']
charts_topcenter['Likert: Informational']['all']
charts_topcenter['Likert: Persuasive']['all']
charts_topcenter['Likert: Interesting']['all']

In [51]:
alt.hconcat(
    charts_side['Likert: Accurate']['all'],
    charts_side['Likert: Accurate']['seen'],
    charts_side['Likert: Accurate']['seen'],
)

In [52]:
alt.hconcat(
    charts_side['Likert: Informational']['all'],
    charts_side['Likert: Informational']['unseen'],
    charts_side['Likert: Informational']['seen'],
)

In [53]:
alt.hconcat(
    charts_side['Likert: Persuasive']['all'],
    charts_side['Likert: Persuasive']['unseen'],
    charts_side['Likert: Persuasive']['seen'],
)

In [54]:
alt.hconcat(
    charts_side['Likert: Interesting']['all'],
    charts_side['Likert: Interesting']['unseen'],
    charts_side['Likert: Interesting']['seen'],
)

## Likert responses aggregated when showing bot v human, +author?

In [55]:
# likert scales across all movies, when author was shown vs not, compare scores and qualitative points raised for human vs bot reviews
movie_scorings = {}
for likert_type in ['acc', 'inf', 'per', 'int']:
    movie_scorings[likert_type] = {False:{True:[], False:[]}, True:{True:[], False:[]}}
for movie_number in [1, 2, 3, 4, 5]:
    for moviename, movie_group in df_analysis.groupby(by=f'm{movie_number}_title'):
        for textsrc_fromhuman, textsrc_group in movie_group.groupby(by=f'textsrc_M{movie_number}_byhuman'):
            for authorusednum, authormovie_group in textsrc_group.groupby(by='showingauthor'):
                for likert_type in ['acc', 'inf', 'per', 'int']:
                    scorings = list(authormovie_group[f'{likert_type}_lik_M{movie_number}'])
                    movie_scorings[likert_type][authorusednum][textsrc_fromhuman] = movie_scorings[likert_type][authorusednum][textsrc_fromhuman] + scorings
for lk in movie_scorings:
    for a in movie_scorings[lk]:
        for k in movie_scorings[lk][a]:
            movie_scorings[lk][a][k] = [e for e in movie_scorings[lk][a][k] if not pd.isna(e)]
movie_scoring_avgs = {}
for likert_type in ['acc', 'inf', 'per', 'int']:
    movie_scoring_avgs[likert_type] = {
        False:{True:ref_tools.l_avgifnonnone(movie_scorings[likert_type][False][True]), False:ref_tools.l_avgifnonnone(movie_scorings[likert_type][False][False])},
        True:{True:ref_tools.l_avgifnonnone(movie_scorings[likert_type][True][True]), False:ref_tools.l_avgifnonnone(movie_scorings[likert_type][True][False])},
    }

print('top-level False is not showing author, True is showing author')
print('sub-level True is by human, False is by bot')
print()
print('Higher = more accurate/informational/persuasive/interesting')
print()
for k in movie_scoring_avgs:
    for showing_author in movie_scoring_avgs[k]:
        print(k, showing_author, ':', movie_scoring_avgs[k][showing_author])
        sample_true = movie_scorings[k][showing_author][True]
        sample_false = movie_scorings[k][showing_author][False]
        print('  sample sizes :', len(sample_true), len(sample_false))
        print(' ', stats.ttest_ind(sample_true, sample_false))
        print(' ', stats.ranksums(sample_true, sample_false))
    print()
# TODO stats for whether knowing the authorship did impact this difference in numbers

top-level False is not showing author, True is showing author
sub-level True is by human, False is by bot

Higher = more accurate/informational/persuasive/interesting

acc False : {True: 4.073170731707317, False: 4.238095238095238}
  sample sizes : 41 42
  Ttest_indResult(statistic=-1.1527335704505894, pvalue=0.2524103177596397)
  RanksumsResult(statistic=-1.1385298447559613, pvalue=0.2548993040305828)
acc True : {True: 4.052631578947368, False: 4.316666666666666}
  sample sizes : 38 60
  Ttest_indResult(statistic=-1.626203393519276, pvalue=0.10718472815232107)
  RanksumsResult(statistic=-1.600442688625461, pvalue=0.10950041139045419)

inf False : {True: 3.8666666666666667, False: 3.986666666666667}
  sample sizes : 150 150
  Ttest_indResult(statistic=-1.1316653143492517, pvalue=0.2586850025884428)
  RanksumsResult(statistic=-0.3381036294249787, pvalue=0.73528509555647)
inf True : {True: 4.02, False: 4.12}
  sample sizes : 150 150
  Ttest_indResult(statistic=-0.9403021063838409, pvalue

## Likert responses aggregated when showing bot v human, +unseen, +author?

In [56]:
# likert scales across all movies, when author was shown vs not, compare scores and qualitative points raised for human vs bot reviews
movie_scorings = {}
for likert_type in ['acc', 'inf', 'per', 'int']:
    movie_scorings[likert_type] = {False:{True:[], False:[]}, True:{True:[], False:[]}}
for movie_number in [1, 2, 3, 4, 5]:
    # only include UNSEEN movie entries
    df_analysis_mx_filtered = df_analysis[df_analysis[f'prevwatchedbin_M{movie_number}']==False]
    for moviename, movie_group in df_analysis_mx_filtered.groupby(by=f'm{movie_number}_title'):
        for textsrc_fromhuman, textsrc_group in movie_group.groupby(by=f'textsrc_M{movie_number}_byhuman'):
            for authorusednum, authormovie_group in textsrc_group.groupby(by='showingauthor'):
                for likert_type in ['acc', 'inf', 'per', 'int']:
                    scorings = list(authormovie_group[f'{likert_type}_lik_M{movie_number}'])
                    movie_scorings[likert_type][authorusednum][textsrc_fromhuman] = movie_scorings[likert_type][authorusednum][textsrc_fromhuman] + scorings
for lk in movie_scorings:
    for a in movie_scorings[lk]:
        for k in movie_scorings[lk][a]:
            movie_scorings[lk][a][k] = [e for e in movie_scorings[lk][a][k] if not pd.isna(e)]
movie_scoring_avgs = {}
for likert_type in ['acc', 'inf', 'per', 'int']:
    movie_scoring_avgs[likert_type] = {
        False:{True:ref_tools.l_avgifnonnone(movie_scorings[likert_type][False][True]), False:ref_tools.l_avgifnonnone(movie_scorings[likert_type][False][False])},
        True:{True:ref_tools.l_avgifnonnone(movie_scorings[likert_type][True][True]), False:ref_tools.l_avgifnonnone(movie_scorings[likert_type][True][False])},
    }

print('top-level False is not showing author, True is showing author')
print('sub-level True is by human, False is by bot')
print()
print('Higher = more accurate/informational/persuasive/interesting')
print()
for k in movie_scoring_avgs:
    for showing_author in movie_scoring_avgs[k]:
        print(k, showing_author, ':', movie_scoring_avgs[k][showing_author])
        sample_true = movie_scorings[k][showing_author][True]
        sample_false = movie_scorings[k][showing_author][False]
        print('  sample sizes :', len(sample_true), len(sample_false))
        print(' ', stats.ttest_ind(sample_true, sample_false))
        print(' ', stats.ranksums(sample_true, sample_false))
    print()
# did knowing the authorship did impact this difference in numbers?

top-level False is not showing author, True is showing author
sub-level True is by human, False is by bot

Higher = more accurate/informational/persuasive/interesting

acc False : {True: None, False: None}
  sample sizes : 0 0
  Ttest_indResult(statistic=nan, pvalue=nan)
  RanksumsResult(statistic=nan, pvalue=nan)
acc True : {True: None, False: None}
  sample sizes : 0 0
  Ttest_indResult(statistic=nan, pvalue=nan)
  RanksumsResult(statistic=nan, pvalue=nan)

inf False : {True: 3.8990825688073394, False: 3.962962962962963}
  sample sizes : 109 108
  Ttest_indResult(statistic=-0.5145855442383108, pvalue=0.6073712384803306)
  RanksumsResult(statistic=0.11028270899239732, pvalue=0.9121851698377145)
inf True : {True: 4.044642857142857, False: 4.011111111111111}
  sample sizes : 112 90
  Ttest_indResult(statistic=0.252672928943342, pvalue=0.8007804684191985)
  RanksumsResult(statistic=0.6284195685342867, pvalue=0.5297291212457006)

per False : {True: 3.889908256880734, False: 3.925925925925

  z = (s - expected) / np.sqrt(n1*n2*(n1+n2+1)/12.0)


In [57]:
# likert scales across all movies, when author was shown vs not, compare scores and qualitative points raised for human vs bot reviews
movie_scorings = {}
for likert_type in ['acc', 'inf', 'per', 'int']:
    movie_scorings[likert_type] = {False:{True:[], False:[]}, True:{True:[], False:[]}}
for movie_number in [1, 2, 3, 4, 5]:
    # only include SEEN movie entries
    df_analysis_mx_filtered = df_analysis[df_analysis[f'prevwatchedbin_M{movie_number}']==True]
    for moviename, movie_group in df_analysis_mx_filtered.groupby(by=f'm{movie_number}_title'):
        for textsrc_fromhuman, textsrc_group in movie_group.groupby(by=f'textsrc_M{movie_number}_byhuman'):
            for authorusednum, authormovie_group in textsrc_group.groupby(by='showingauthor'):
                for likert_type in ['acc', 'inf', 'per', 'int']:
                    scorings = list(authormovie_group[f'{likert_type}_lik_M{movie_number}'])
                    movie_scorings[likert_type][authorusednum][textsrc_fromhuman] = movie_scorings[likert_type][authorusednum][textsrc_fromhuman] + scorings
for lk in movie_scorings:
    for a in movie_scorings[lk]:
        for k in movie_scorings[lk][a]:
            movie_scorings[lk][a][k] = [e for e in movie_scorings[lk][a][k] if not pd.isna(e)]
movie_scoring_avgs = {}
for likert_type in ['acc', 'inf', 'per', 'int']:
    movie_scoring_avgs[likert_type] = {
        False:{True:ref_tools.l_avgifnonnone(movie_scorings[likert_type][False][True]), False:ref_tools.l_avgifnonnone(movie_scorings[likert_type][False][False])},
        True:{True:ref_tools.l_avgifnonnone(movie_scorings[likert_type][True][True]), False:ref_tools.l_avgifnonnone(movie_scorings[likert_type][True][False])},
    }

print('top-level False is not showing author, True is showing author')
print('sub-level True is by human, False is by bot')
print()
print('Higher = more accurate/informational/persuasive/interesting')
print()
for k in movie_scoring_avgs:
    for showing_author in movie_scoring_avgs[k]:
        print(k, showing_author, ':', movie_scoring_avgs[k][showing_author])
        sample_true = movie_scorings[k][showing_author][True]
        sample_false = movie_scorings[k][showing_author][False]
        print('  sample sizes :', len(sample_true), len(sample_false))
        print(' ', stats.ttest_ind(sample_true, sample_false))
        print(' ', stats.ranksums(sample_true, sample_false))
    print()

top-level False is not showing author, True is showing author
sub-level True is by human, False is by bot

Higher = more accurate/informational/persuasive/interesting

acc False : {True: 4.073170731707317, False: 4.238095238095238}
  sample sizes : 41 42
  Ttest_indResult(statistic=-1.1527335704505894, pvalue=0.2524103177596397)
  RanksumsResult(statistic=-1.1385298447559613, pvalue=0.2548993040305828)
acc True : {True: 4.052631578947368, False: 4.316666666666666}
  sample sizes : 38 60
  Ttest_indResult(statistic=-1.626203393519276, pvalue=0.10718472815232107)
  RanksumsResult(statistic=-1.600442688625461, pvalue=0.10950041139045419)

inf False : {True: 3.7804878048780486, False: 4.0476190476190474}
  sample sizes : 41 42
  Ttest_indResult(statistic=-1.3002663908746652, pvalue=0.1971973232448903)
  RanksumsResult(statistic=-0.8151873688452682, pvalue=0.41496506691002755)
inf True : {True: 3.9473684210526314, False: 4.283333333333333}
  sample sizes : 38 60
  Ttest_indResult(statistic=

## Likert responses per movie when showing bot v human?

In [58]:
# likert scales for each review/movie, compare scores and qualitative points raised for human vs bot reviews
wins = {}
for likert_type in ['acc', 'inf', 'per', 'int']:
    wins[likert_type] = {'bot':0, 'human':0, 'tie':0}
for movie_number in [1, 2, 3, 4, 5]:
    for moviename, movie_group in df_analysis.groupby(by=f'm{movie_number}_title'):
        movie_scorings = {}
        for likert_type in ['acc', 'inf', 'per', 'int']:
            movie_scorings[likert_type] = {True:None, False:None}
        for textsrc_fromhuman, textsrc_group in movie_group.groupby(by=f'textsrc_M{movie_number}_byhuman'):
            for likert_type in ['acc', 'inf', 'per', 'int']:
                scorings = textsrc_group[f'{likert_type}_lik_M{movie_number}']
                movie_scorings[likert_type][textsrc_fromhuman] = scorings
        movie_scoring_avgs = {}
        for likert_type in ['acc', 'inf', 'per', 'int']:
            movie_scoring_avgs[likert_type] = {True:ref_tools.l_avgifnonnone(movie_scorings[likert_type][True]), False:ref_tools.l_avgifnonnone(movie_scorings[likert_type][False])}
        for likert_type in ['acc', 'inf', 'per', 'int']:
            if movie_scoring_avgs[likert_type][True] is not None and movie_scoring_avgs[likert_type][False] is not None:
                print(likert_type, '::', moviename)
                print('    bot  : ', movie_scoring_avgs[likert_type][False])
                print('    human: ', movie_scoring_avgs[likert_type][True])
                if movie_scoring_avgs[likert_type][False]==movie_scoring_avgs[likert_type][True]:
                    print('  Tied')
                    wins[likert_type]['tie'] += 1
                elif movie_scoring_avgs[likert_type][False]>movie_scoring_avgs[likert_type][True]:
                    print('  Bot wins')
                    wins[likert_type]['bot'] += 1
                else:
                    print('  Human wins')
                    wins[likert_type]['human'] += 1

print('---')
print(wins)

acc :: Anna and the King
    bot  :  4.25
    human:  4.25
  Tied
inf :: Anna and the King
    bot  :  3.9411764705882355
    human:  4.0
  Human wins
per :: Anna and the King
    bot  :  3.8823529411764706
    human:  4.0
  Human wins
int :: Anna and the King
    bot  :  3.8823529411764706
    human:  3.75
  Bot wins
acc :: Batman: Mask of the Phantasm
    bot  :  5.0
    human:  4.333333333333333
  Bot wins
inf :: Batman: Mask of the Phantasm
    bot  :  4.090909090909091
    human:  4.25
  Human wins
per :: Batman: Mask of the Phantasm
    bot  :  4.545454545454546
    human:  4.75
  Human wins
int :: Batman: Mask of the Phantasm
    bot  :  4.090909090909091
    human:  4.5
  Human wins
inf :: Miniscule: Valley of the Lost Ants
    bot  :  4.0
    human:  4.25
  Human wins
per :: Miniscule: Valley of the Lost Ants
    bot  :  4.5
    human:  4.0
  Bot wins
int :: Miniscule: Valley of the Lost Ants
    bot  :  4.0
    human:  4.0
  Tied
inf :: Real Genius
    bot  :  4.0
    human: 

# Vis: MoreAuthor

## MoreAuthor when showing bot v human?

In [59]:
# across all movies, how many 'want to see more = yes' answers did human vs bot reviews get?
movie_seemore = {True:[], False:[]}
for movie_number in [1, 2, 3, 4, 5]:
    for moviename, movie_group in df_analysis.groupby(by=f'm{movie_number}_title'):
        for textsrc_fromhuman, textsrc_group in movie_group.groupby(by=f'textsrc_M{movie_number}_byhuman'):
            seemores = list(textsrc_group[f'M{movie_number}_moreauthor'])
            movie_seemore[textsrc_fromhuman] = movie_seemore[textsrc_fromhuman] + seemores
for k in movie_seemore:
    movie_seemore[k] = [1 if e else 0 for e in movie_seemore[k]]
movie_seemore_avgs = {True:ref_tools.l_avgifnonnone(movie_seemore[True]), False:ref_tools.l_avgifnonnone(movie_seemore[False])}

print('True is by human, False is by bot')
print()
print('Higher = wanting to see more from the same author')
print()
print(movie_seemore_avgs)
print('  sample sizes :', len(movie_seemore[True]), len(movie_seemore[False]))
print(' ', stats.ttest_ind(movie_seemore[True], movie_seemore[False]))
print(' ', stats.ranksums(movie_seemore[True], movie_seemore[False]))

True is by human, False is by bot

Higher = wanting to see more from the same author

{True: 0.59, False: 0.57}
  sample sizes : 300 300
  Ttest_indResult(statistic=0.4955655512702128, pvalue=0.6203830220600057)
  RanksumsResult(statistic=0.4239109566501593, pvalue=0.6716307532296977)


## MoreAuthor when showing bot v human, +unseen?

In [60]:
# across all movies, how many 'want to see more = yes' answers did human vs bot reviews get?
# for UNSEEN movies only
movie_seemore = {True:[], False:[]}
for movie_number in [1, 2, 3, 4, 5]:
    df_analysis_mx_filtered = df_analysis[df_analysis[f'prevwatchedbin_M{movie_number}']==False]
    for moviename, movie_group in df_analysis_mx_filtered.groupby(by=f'm{movie_number}_title'):
        for textsrc_fromhuman, textsrc_group in movie_group.groupby(by=f'textsrc_M{movie_number}_byhuman'):
            seemores = list(textsrc_group[f'M{movie_number}_moreauthor'])
            movie_seemore[textsrc_fromhuman] = movie_seemore[textsrc_fromhuman] + seemores
for k in movie_seemore:
    movie_seemore[k] = [1 if e else 0 for e in movie_seemore[k]]
movie_seemore_avgs = {True:ref_tools.l_avgifnonnone(movie_seemore[True]), False:ref_tools.l_avgifnonnone(movie_seemore[False])}

print('True is by human, False is by bot')
print()
print('Higher = wanting to see more from the same author')
print()
print(movie_seemore_avgs)
print('  sample sizes :', len(movie_seemore[True]), len(movie_seemore[False]))
print(' ', stats.ttest_ind(movie_seemore[True], movie_seemore[False]))
print(' ', stats.ranksums(movie_seemore[True], movie_seemore[False]))

True is by human, False is by bot

Higher = wanting to see more from the same author

{True: 0.5927601809954751, False: 0.5454545454545454}
  sample sizes : 221 198
  Ttest_indResult(statistic=0.9753697327319979, pvalue=0.3299423613612985)
  RanksumsResult(statistic=0.8363298459413709, pvalue=0.4029693671443967)


In [61]:
# across all movies, how many 'want to see more = yes' answers did human vs bot reviews get?
# for SEEN movies only
movie_seemore = {True:[], False:[]}
for movie_number in [1, 2, 3, 4, 5]:
    df_analysis_mx_filtered = df_analysis[df_analysis[f'prevwatchedbin_M{movie_number}']==True]
    for moviename, movie_group in df_analysis_mx_filtered.groupby(by=f'm{movie_number}_title'):
        for textsrc_fromhuman, textsrc_group in movie_group.groupby(by=f'textsrc_M{movie_number}_byhuman'):
            seemores = list(textsrc_group[f'M{movie_number}_moreauthor'])
            movie_seemore[textsrc_fromhuman] = movie_seemore[textsrc_fromhuman] + seemores
for k in movie_seemore:
    movie_seemore[k] = [1 if e else 0 for e in movie_seemore[k]]
movie_seemore_avgs = {True:ref_tools.l_avgifnonnone(movie_seemore[True]), False:ref_tools.l_avgifnonnone(movie_seemore[False])}

print('True is by human, False is by bot')
print()
print('Higher = wanting to see more from the same author')
print()
print(movie_seemore_avgs)
print('  sample sizes :', len(movie_seemore[True]), len(movie_seemore[False]))
print(' ', stats.ttest_ind(movie_seemore[True], movie_seemore[False]))
print(' ', stats.ranksums(movie_seemore[True], movie_seemore[False]))

True is by human, False is by bot

Higher = wanting to see more from the same author

{True: 0.5822784810126582, False: 0.6176470588235294}
  sample sizes : 79 102
  Ttest_indResult(statistic=-0.4797967833014937, pvalue=0.6319576378060059)
  RanksumsResult(statistic=-0.40762063560748335, pvalue=0.6835522119885951)


## MoreAuthor when showing bot v human, +author?

In [62]:
# across all movies, how many 'want to see more = yes' answers did human vs bot reviews get?
movie_seemore = {False:{True:[], False:[]}, True:{True:[], False:[]}}
for movie_number in [1, 2, 3, 4, 5]:
    for moviename, movie_group in df_analysis_mx_filtered.groupby(by=f'm{movie_number}_title'):
        for textsrc_fromhuman, textsrc_group in movie_group.groupby(by=f'textsrc_M{movie_number}_byhuman'):
            for authorusednum, authormovie_group in movie_group.groupby(by='showingauthor'):
                seemores = list(textsrc_group[f'M{movie_number}_moreauthor'])
                movie_seemore[authorusednum][textsrc_fromhuman] = movie_seemore[authorusednum][textsrc_fromhuman] + seemores
for a in movie_seemore:
    for k in movie_seemore[a]:
        movie_seemore[a][k] = [1 if e else 0 for e in movie_seemore[a][k]]
movie_seemore_avgs = {
    False:{True:ref_tools.l_avgifnonnone(movie_seemore[False][True]), False:ref_tools.l_avgifnonnone(movie_seemore[False][False])},
    True:{True:ref_tools.l_avgifnonnone(movie_seemore[True][True]), False:ref_tools.l_avgifnonnone(movie_seemore[True][False])},
}

print('top-level False is not showing author, True is showing author')
print('sub-level True is by human, False is by bot')
print()
print('Higher = wanting to see more from the same author')
print()
for showing_author in movie_seemore_avgs:
    print(showing_author, ':', movie_seemore_avgs[showing_author])
    sample_true = movie_seemore[showing_author][True]
    sample_false = movie_seemore[showing_author][False]
    print('  sample sizes :', len(sample_true), len(sample_false))
    print(' ', stats.ttest_ind(sample_true, sample_false))
    print(' ', stats.ranksums(sample_true, sample_false))
print()
# did knowing the authorship did impact this difference in numbers?

top-level False is not showing author, True is showing author
sub-level True is by human, False is by bot

Higher = wanting to see more from the same author

False : {True: 0.7012987012987013, False: 0.6626506024096386}
  sample sizes : 77 83
  Ttest_indResult(statistic=0.5213338965541524, pvalue=0.6028641529276315)
  RanksumsResult(statistic=0.421754994055971, pvalue=0.6732038591039793)
True : {True: 0.6756756756756757, False: 0.6627906976744186}
  sample sizes : 74 86
  Ttest_indResult(statistic=0.17158840898621522, pvalue=0.8639806870881573)
  RanksumsResult(statistic=0.14031253022651508, pvalue=0.8884130645741746)



## MoreAuthor when showing bot v human, +unseen, +author?

In [63]:
# across all movies, how many 'want to see more = yes' answers did human vs bot reviews get?
# for UNSEEN movies only
movie_seemore = {False:{True:[], False:[]}, True:{True:[], False:[]}}
for movie_number in [1, 2, 3, 4, 5]:
    df_analysis_mx_filtered = df_analysis[df_analysis[f'prevwatchedbin_M{movie_number}']==False]
    for moviename, movie_group in df_analysis_mx_filtered.groupby(by=f'm{movie_number}_title'):
        for textsrc_fromhuman, textsrc_group in movie_group.groupby(by=f'textsrc_M{movie_number}_byhuman'):
            for authorusednum, authormovie_group in movie_group.groupby(by='showingauthor'):
                seemores = list(textsrc_group[f'M{movie_number}_moreauthor'])
                movie_seemore[authorusednum][textsrc_fromhuman] = movie_seemore[authorusednum][textsrc_fromhuman] + seemores
for a in movie_seemore:
    for k in movie_seemore[a]:
        movie_seemore[a][k] = [1 if e else 0 for e in movie_seemore[a][k]]
movie_seemore_avgs = {
    False:{True:ref_tools.l_avgifnonnone(movie_seemore[False][True]), False:ref_tools.l_avgifnonnone(movie_seemore[False][False])},
    True:{True:ref_tools.l_avgifnonnone(movie_seemore[True][True]), False:ref_tools.l_avgifnonnone(movie_seemore[True][False])},
}

print('top-level False is not showing author, True is showing author')
print('sub-level True is by human, False is by bot')
print()
print('Higher = wanting to see more from the same author')
print()
for showing_author in movie_seemore_avgs:
    print(showing_author, ':', movie_seemore_avgs[showing_author])
    sample_true = movie_seemore[showing_author][True]
    sample_false = movie_seemore[showing_author][False]
    print('  sample sizes :', len(sample_true), len(sample_false))
    print(' ', stats.ttest_ind(sample_true, sample_false))
    print(' ', stats.ranksums(sample_true, sample_false))
print()
# did knowing the authorship did impact this difference in numbers?

top-level False is not showing author, True is showing author
sub-level True is by human, False is by bot

Higher = wanting to see more from the same author

False : {True: 0.591743119266055, False: 0.5431472081218274}
  sample sizes : 218 197
  Ttest_indResult(statistic=0.9969549943136633, pvalue=0.3193701016981631)
  RanksumsResult(statistic=0.8552144913058041, pvalue=0.39243241517932315)
True : {True: 0.5927601809954751, False: 0.5482233502538071}
  sample sizes : 221 197
  Ttest_indResult(statistic=0.9172939535253465, pvalue=0.359520109863368)
  RanksumsResult(statistic=0.7863251655193771, pvalue=0.43167701285544413)



In [64]:
# across all movies, how many 'want to see more = yes' answers did human vs bot reviews get?
# for SEEN movies only
movie_seemore = {False:{True:[], False:[]}, True:{True:[], False:[]}}
for movie_number in [1, 2, 3, 4, 5]:
    df_analysis_mx_filtered = df_analysis[df_analysis[f'prevwatchedbin_M{movie_number}']==True]
    for moviename, movie_group in df_analysis_mx_filtered.groupby(by=f'm{movie_number}_title'):
        for textsrc_fromhuman, textsrc_group in movie_group.groupby(by=f'textsrc_M{movie_number}_byhuman'):
            for authorusednum, authormovie_group in movie_group.groupby(by='showingauthor'):
                seemores = list(textsrc_group[f'M{movie_number}_moreauthor'])
                movie_seemore[authorusednum][textsrc_fromhuman] = movie_seemore[authorusednum][textsrc_fromhuman] + seemores
for a in movie_seemore:
    for k in movie_seemore[a]:
        movie_seemore[a][k] = [1 if e else 0 for e in movie_seemore[a][k]]
movie_seemore_avgs = {
    False:{True:ref_tools.l_avgifnonnone(movie_seemore[False][True]), False:ref_tools.l_avgifnonnone(movie_seemore[False][False])},
    True:{True:ref_tools.l_avgifnonnone(movie_seemore[True][True]), False:ref_tools.l_avgifnonnone(movie_seemore[True][False])},
}

print('top-level False is not showing author, True is showing author')
print('sub-level True is by human, False is by bot')
print()
print('Higher = wanting to see more from the same author')
print()
for showing_author in movie_seemore_avgs:
    print(showing_author, ':', movie_seemore_avgs[showing_author])
    sample_true = movie_seemore[showing_author][True]
    sample_false = movie_seemore[showing_author][False]
    print('  sample sizes :', len(sample_true), len(sample_false))
    print(' ', stats.ttest_ind(sample_true, sample_false))
    print(' ', stats.ranksums(sample_true, sample_false))
print()

top-level False is not showing author, True is showing author
sub-level True is by human, False is by bot

Higher = wanting to see more from the same author

False : {True: 0.5974025974025974, False: 0.6082474226804123}
  sample sizes : 77 97
  Ttest_indResult(statistic=-0.14441927073928934, pvalue=0.8853384796133268)
  RanksumsResult(statistic=-0.12271436154184673, pvalue=0.9023332909585844)
True : {True: 0.5945945945945946, False: 0.6060606060606061}
  sample sizes : 74 99
  Ttest_indResult(statistic=-0.15150669203238984, pvalue=0.8797546245227955)
  RanksumsResult(statistic=-0.12886406652139457, pvalue=0.8974652103046127)



# Text per Likert question

In [65]:
print('Qualtrics: // Was this recommendation text factually accurate (vs. inaccurate)?')
print('Qualtrics: // Was this recommendation text informative (vs. uninformative)?')
print('Qualtrics: // Was this recommendation text persuasive (vs. unconvincing)?')
print('Qualtrics: // Was this recommendation text interesting to read (vs. uninteresting)?')

Qualtrics: // Was this recommendation text factually accurate (vs. inaccurate)?
Qualtrics: // Was this recommendation text informative (vs. uninformative)?
Qualtrics: // Was this recommendation text persuasive (vs. unconvincing)?
Qualtrics: // Was this recommendation text interesting to read (vs. uninteresting)?


In [66]:
list(df_analysis['per_text_M1'])

[" The text was persuasive in that it encouraged viewers to watch the 1999 version of the movie, as it suggested it was the best adaptation yet. It also highlighted the beauty of the film's visuals and the strength of the story itself.\n",
 " The text was persuasive in that it encouraged viewers to watch the 1999 version of the movie, as it suggested it was the best adaptation yet. It also highlighted the beauty of the film's visuals and the strength of the story itself.",
 'The movie is somewhat persuasive',
 'It is a first animated batman movie',
 'some reviewers mention that disappointing melodramatic',
 'The Poseidon Adventure promises to take you on a thrilling journey and leave you on the edge of your seat.',
 'What sets this disaster film apart is the characters - from married couples to a minister and a hypochondriac, each..."',
 'it has razor-sharp dialogue and unbelievable twists',
 "It provides a general overview and description of the movie, focusing on the director's work,

# Final free text

In [67]:
print('Qualtrics: // Do you have any further comments you would like to share with us?')
list(df_analysis['freetext_final'])

Qualtrics: // Do you have any further comments you would like to share with us?


['NO',
 'NO',
 'none',
 'no',
 'nothing',
 'none',
 'None',
 'no thank you. hope i could help. have a great day :)',
 "No, but it's all my own opinion. thanks.",
 nan,
 'none',
 nan,
 'none',
 'Thank you.',
 'Not that I can think of.',
 'no',
 'none',
 'na',
 'NO',
 'nothing',
 'none',
 'NO',
 'GOOD',
 'No',
 'none',
 'Thank you.',
 'no comments',
 'none',
 'none',
 'Thank you.',
 'No.',
 'good survey',
 'good',
 nan,
 'nothing to share  the stud was good ',
 "it was a good questionnaire about movie and it's reviews",
 'Overall good I saw the Batman film before. And know on this list which I might try one day or those would avoid it.',
 'NOTHING',
 'Thank you.',
 'no',
 'I tried to give my opinion, please respect it. Thanks',
 nan,
 nan,
 'good',
 'good',
 nan,
 'none',
 'None ',
 'no',
 'None',
 'NO',
 'good',
 'Nothing to add really.',
 'nothing',
 'no',
 'none',
 'none',
 'None',
 'NONE',
 nan,
 'Thank you.',
 'no',
 nan,
 'Thank you.',
 'no',
 'no',
 'Very Good Work, Thank You.',
 