# Left, Right, Gender

## Participant Demographics and Background

### Library Imports

In [1]:
import pandas as pd
import numpy as np
pd.set_option('display.max_rows', 500)

## Background

In [2]:
background_file = f"Qualtrics/CLEAN/Background.xlsx"
df_background = pd.read_excel(background_file)
    
df_background.rename(columns={
    "LRG ID": "ID",
    "ID - Final": "PID",
    "What is your age? - 4 - Text": "Age",
    "With which gender identity do you most identify? - Selected Choice": "Gender",
    "What is the highest level of education you have either completed or are currently pursuing?": "Education",
    "What is your major field of study (if you are a student) or field of occupation?": "Field",
    "What is your level of experience looking at data in a visual form (e.g., scatterplot, bar chart, etc)? - I have no experience:I consider myself an expert": "VIS Literacy"
}, inplace=True)

### Age

In [3]:
print("1 = Prefer Not To Say\n")
print("Others (below)")
print("Median: ", df_background[~df_background.Age.str.isalpha().notnull()]["Age"].astype('int').median())
df_background[~df_background.Age.str.isalpha().notnull()]["Age"].astype('int').describe()

1 = Prefer Not To Say

Others (below)
Median:  39.0


count    49.000000
mean     40.326531
std      11.565732
min      24.000000
25%      31.000000
50%      39.000000
75%      49.000000
max      69.000000
Name: Age, dtype: float64

### Gender

In [4]:
df_background[["ID", "Gender"]].groupby("Gender").count()

Unnamed: 0_level_0,ID
Gender,Unnamed: 1_level_1
Female,28
Male,21
agender,1


### VIS Literacy

In [5]:
print("Median: ", df_background["VIS Literacy"].astype('int').median())
df_background["VIS Literacy"].astype('int').describe()

Median:  3.0


count    50.000000
mean      3.080000
std       0.899887
min       1.000000
25%       3.000000
50%       3.000000
75%       4.000000
max       5.000000
Name: VIS Literacy, dtype: float64

### Education

In [6]:
df_background[["ID", "Education"]].groupby("Education").count()

Unnamed: 0_level_0,ID
Education,Unnamed: 1_level_1
Associate's Degree,7
Bachelor's Degree,20
High School,6
Master's Degree,3
Post-graduate Degree,2
Some College,12


### Field

In [7]:
df_background[["ID", "Field"]].groupby("Field").count()

Unnamed: 0_level_0,ID
Field,Unnamed: 1_level_1
Analyst,1
Art,1
Biology,2
Bookkeeper,1
Business,3
Business Admin,1
Business owner and manager,1
Computer Science,3
Computer engineering,1
Criminal Justice,1


## Pre-Survey - Politics

In [8]:
background_file = f"Qualtrics/CLEAN/Pre-Survey-Politics.xlsx"
df_pre_survey = pd.read_excel(background_file)
    
df_pre_survey.rename(columns={
    "LRG ID": "ID",
    "ID - Final": "PID",
    "What types of political elections have you voted in? (Select all that apply or None of the above)": "Political Elections",
    "Candidates from which political party are you most likely to vote for in the next US political election?": "Political Affiliation",
    "Where on the left-right political spectrum do your political views most closely fall?": "Political Spectrum",
}, inplace=True)

In [9]:
df_pre_survey.columns

Index(['Recorded Date', 'Duration (in seconds)', 'PID', 'ID',
       'Political Elections', 'Which of the following is NOT a fruit?',
       'Political Affiliation', 'Political Spectrum'],
      dtype='object')

### Types of Political Elections Voted in

In [10]:
df_pre_survey[["ID", "Political Elections"]].groupby("Political Elections").count()

Unnamed: 0_level_0,ID
Political Elections,Unnamed: 1_level_1
US Presidential election,7
"US Presidential election,US State election",7
"US Presidential election,US State election,US Local election",35
US State election,1


### Political Affiliation

In [11]:
df_pre_survey[["ID", "Political Affiliation"]].groupby("Political Affiliation").count()

Unnamed: 0_level_0,ID
Political Affiliation,Unnamed: 1_level_1
Democratic,35
Republican,15


### Political Spectrum

In [12]:
df_pre_survey[["ID", "Political Spectrum"]].groupby("Political Spectrum").count()

Unnamed: 0_level_0,ID
Political Spectrum,Unnamed: 1_level_1
Conservative,2
Liberal,17
Moderate,8
Moderate Conservative,9
Moderate Liberal,14


## Pre-Survey Movies

In [13]:
background_file = f"Qualtrics/CLEAN/Pre-Survey-Movies.xlsx"
df_pre_survey = pd.read_excel(background_file)
    
df_pre_survey.rename(columns={
    "LRG ID": "ID",
    "ID - Final": "PID",
    "How much importance do you place on movies personally?": "Importance on Movies",
    "How often do you watch movies? (Choose the period that most closely matches your experience)": "Frequency of Watching Movies",
    "What genre(s) of movies do you watch? (Select all that apply)": "Genres Watched",
    "What rating service(s) do you use when determining movie quality? (Select all that apply) - Selected Choice": "Preferred Rating Service",
}, inplace=True)

In [14]:
df_pre_survey.columns

Index(['Recorded Date', 'Duration (in seconds)', 'PID', 'ID',
       'Importance on Movies', 'Frequency of Watching Movies',
       'Which domain did you complete the practice task about?',
       'Genres Watched', 'Preferred Rating Service'],
      dtype='object')

### Importance on Movies

In [15]:
df_pre_survey[["ID", "Importance on Movies"]].groupby("Importance on Movies").count()

Unnamed: 0_level_0,ID
Importance on Movies,Unnamed: 1_level_1
Large importance,12
Little importance,15
Moderate importance,20
No importance whatsoever,2
They are one of the most important parts of my life,1


### Frequency of Watching Movies

In [16]:
df_pre_survey[["ID", "Frequency of Watching Movies"]].groupby("Frequency of Watching Movies").count()

Unnamed: 0_level_0,ID
Frequency of Watching Movies,Unnamed: 1_level_1
Daily,4
Monthly,16
Weekly,30


### Preferred Rating Service

In [17]:
df_pre_survey[["ID", "Preferred Rating Service"]].groupby("Preferred Rating Service").count()

Unnamed: 0_level_0,ID
Preferred Rating Service,Unnamed: 1_level_1
I don't use rating services,8
Internet Movie Database (IMDB),16
"Internet Movie Database (IMDB),I don't use rating services",1
Rotten Tomatoes,10
"Rotten Tomatoes,Internet Movie Database (IMDB)",13
"Rotten Tomatoes,Internet Movie Database (IMDB),Fandango",1
"Rotten Tomatoes,Internet Movie Database (IMDB),Google Reviews",1


### Genres Watched

In [18]:
df_pre_survey[["ID", "Genres Watched"]].groupby("Genres Watched").count()

Unnamed: 0_level_0,ID
Genres Watched,Unnamed: 1_level_1
Action,1
"Action,Comedy,Documentary",3
"Action,Comedy,Documentary,Drama",1
"Action,Comedy,Documentary,Drama,Musical,Thriller",8
"Action,Comedy,Documentary,Drama,Thriller",10
"Action,Comedy,Documentary,Thriller",1
"Action,Comedy,Drama,Thriller",2
"Action,Comedy,Thriller",2
"Action,Documentary,Drama,Musical",1
"Action,Documentary,Drama,Musical,Thriller",1
