### Abstract

What are the demographics of the Slate Star Codex readership? What is the demographic breakdown grouped by who each respondent voted for in the 2020 Democratic Primary?

In [1]:
# Load data
import pandas as pd
import chardet
# Detect encoding with chardet
with open('2020ssc_public.csv', 'rb') as fraw:
    file_content = fraw.read(50000)
chardet.detect(file_content) 
ssc = pd.read_csv('2020ssc_public.csv', encoding = 'Windows-1252', low_memory = False)
print(ssc.shape)

(7338, 50)


In [2]:
# Only look at American samples
ssc = ssc[ssc['Country'].str.match('United States')]
print(len(ssc))

4361


In [3]:
# Replace whitespace with NaN
import numpy as np
ssc = ssc.replace(r'^\s*$', np.nan, regex = True)

In [7]:
ssc

Unnamed: 0,PreviousSurveys,Public,Age,Country,Race,Sex,Gender,SexualOrientation,RelationshipStyle,RelationshipStatus,...,AmericanParties,PoliticalChange,GlobalWarming,Immigration,MinimumWage,Feminism,HumanBiodiversity,BasicIncome,DonaldTrump,DemocraticNominee
9,Yes,"Yes, you can release my anonymous answers publ...",,United States,White (non-Hispanic),Male,M (cisgender),Heterosexual,Uncertain / no preference,Single,...,Democratic Party,No significant change,1,4,2,1,2,5,2,Andrew Yang
10,Yes,"Yes, you can release my anonymous answers publ...",,United States,White (non-Hispanic),Male,M (cisgender),Heterosexual,Prefer monogamous,Single,...,Democratic Party,Become significantly further left,1,5,4,4,2,4,1,Elizabeth Warren
11,Yes,"Yes, you can release my anonymous answers publ...",,United States,White (non-Hispanic),Male,M (cisgender),Heterosexual,Prefer monogamous,Married,...,Not registered for a party,Some other change not easily described in thos...,3,5,1,3,2,3,1,Andrew Yang
12,No,"Yes, you can release my anonymous answers publ...",,United States,Black,Male,M (cisgender),Heterosexual,Prefer monogamous,Single,...,Not registered for a party,No significant change,,2,5,2,3,4,4,Andrew Yang
16,No,"Yes, you can release my anonymous answers publ...",80,United States,White (non-Hispanic),Male,M (cisgender),Heterosexual,Prefer monogamous,Married,...,Democratic Party,Become significantly further left,1,5,3,5,3,2,1,Pete Buttigieg
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7331,Yes,"Yes, you can release my anonymous answers publ...",20,United States,White (non-Hispanic),Male,M (cisgender),Heterosexual,Prefer monogamous,Relationship,...,Not registered for a party,Become significantly further right,2,5,4,5,1,3,1,Tulsi Gabbard
7332,No,"Yes, you can release my anonymous answers publ...",20,United States,White (non-Hispanic),Male,M (cisgender),Heterosexual,Prefer monogamous,Relationship,...,Not registered for a party,Some other change not easily described in thos...,3,4,4,2,2,3,2,
7333,No,"Yes, you can release my anonymous answers publ...",20,United States,White (non-Hispanic),Male,M (cisgender),Heterosexual,Prefer monogamous,Relationship,...,Democratic Party,No significant change,2,5,3,4,2,3,1,
7334,No,"Yes, you can release my anonymous answers publ...",20,United States,Asian (Indian subcontinent),Male,M (cisgender),Heterosexual,Prefer monogamous,Single,...,Not registered for a party,No significant change,1,5,3,4,3,5,2,Andrew Yang


In [None]:
# Age figure
import seaborn
from matplotlib import pyplot
age = ssc.Age.apply(pd.to_numeric, errors = 'coerce')
# Remove outliers to make figure look nicer
def OlderThan50(age):
    if age > 50:
        return 50
    else:
        return age
dim = (2.5, 5)
fig, ax = pyplot.subplots(figsize = dim)
seaborn.boxplot(x = age.map(OlderThan50), orient = 'v')

In [None]:
# Race figure
dim = (10, 5)
fig, ax = pyplot.subplots(figsize = dim)
ax.set_xticklabels(ax.get_xticklabels(), rotation = 22.5)
ax = seaborn.countplot(x = ssc.Race, order = ssc.Race.value_counts().index) # Sort by value_count

In [None]:
# Sex figure
dim = (5, 5)
fig, ax = pyplot.subplots(figsize = dim)
ax.set_xticklabels(ax.get_xticklabels(), rotation = 22.5)
ax = seaborn.countplot(x = ssc.Sex, order = ssc.Sex.value_counts().index)

In [None]:
# Gender figure
dim = (10, 5)
fig, ax = pyplot.subplots(figsize = dim)
ax.set_xticklabels(ax.get_xticklabels(), rotation = 22.5)
ax = seaborn.countplot(x = ssc.Gender, order = ssc.Gender.value_counts().index)

In [None]:
# Orientation figure
dim = (10, 5)
fig, ax = pyplot.subplots(figsize = dim)
ax.set_xticklabels(ax.get_xticklabels(), rotation = 22.5)
ax = seaborn.countplot(x = ssc.SexualOrientation, order = ssc.SexualOrientation.value_counts().index)

In [None]:
# Relationship figure
dim = (5, 5)
fig, ax = pyplot.subplots(figsize = dim)
ax.set_xticklabels(ax.get_xticklabels(), rotation = 22.5)
ax = seaborn.countplot(x = ssc.RelationshipStatus, order = ssc.RelationshipStatus.value_counts().index)

In [None]:
# Children figure
children = ssc.Children.apply(pd.to_numeric, errors = 'coerce')
# Remove nonsense non-integer values
children = children[~children.between(0, 1, inclusive = False)]
# Remove nonsense outliers
children = children[children < 10]

dim = (5, 5)
fig, ax = pyplot.subplots(figsize = dim)
ax.set_xticklabels(ax.get_xticklabels(), rotation = 22.5)
ax = seaborn.countplot(x = children)

In [None]:
# Profession figure
dim = (20, 5)
fig, ax = pyplot.subplots(figsize = dim)
ax.set_xticklabels(ax.get_xticklabels(), rotation = 22.5)
ax = seaborn.countplot(x = ssc.Profession, order = ssc.Profession.value_counts().iloc[:10].index) # Only look at top ten

In [None]:
# Religion figure
dim = (10, 5)
fig, ax = pyplot.subplots(figsize = dim)
ax.set_xticklabels(ax.get_xticklabels(), rotation = 22.5)
ax = seaborn.countplot(x = ssc.ReligiousViews, order = ssc.ReligiousViews.value_counts().iloc[:10].index)

In [None]:
# Religious background figure
dim = (20, 5)
fig, ax = pyplot.subplots(figsize = dim)
ax.set_xticklabels(ax.get_xticklabels(), rotation = 22.5)
ax = seaborn.countplot(x = ssc.ReligiousBackground, order = ssc.ReligiousBackground.value_counts().iloc[:10].index)

In [None]:
# IQ figure
iq = ssc.IQ.apply(pd.to_numeric, errors = 'coerce')
# Remove nonsense outliers
iq = iq[iq > 50]

print("n =", len(iq), "/", len(ssc))
dim = (2.5, 5)
fig, ax = pyplot.subplots(figsize = dim)
seaborn.boxplot(x = iq, orient = 'v')

In [None]:
# SAT verbal figure
verbal = ssc.SATscoreverbalreading.apply(pd.to_numeric, errors = 'coerce')
# Remove impossible scores
verbal = verbal[verbal >= 200]
verbal = verbal[verbal <= 800]

print("n =", len(verbal), "/", len(ssc))
fig, ax = pyplot.subplots(figsize = dim)
seaborn.boxplot(x = verbal, orient = 'v')

In [None]:
# SAT math figure
math = ssc.SATscoremath.apply(pd.to_numeric, errors = 'coerce')
# Remove impossible scores
math = math[math >= 200]
math = math[math <= 800]

print("n =", len(math), "/", len(ssc))
fig, ax = pyplot.subplots(figsize = dim)
seaborn.boxplot(x = math, orient = 'v')

In [None]:
# Democratic primary figure
dim = (20, 5)
fig, ax = pyplot.subplots(figsize = dim)
ax.set_xticklabels(ax.get_xticklabels(), rotation = 22.5)
ax = seaborn.countplot(x = ssc.DemocraticNominee, order = ssc.DemocraticNominee.value_counts().iloc[:10].index)

In [None]:
# Means per quantitative varaible per candidate
# Convert columns to numeric
ssc.SATscoremath = math
ssc.SATscoreverbalreading = verbal
ssc.IQ = iq
ssc.Age = age
ssc.Children = children
# Remove non-top responses
nonTop = ssc.DemocraticNominee.value_counts().iloc[6:].index
top = ssc[~ssc['DemocraticNominee'].isin(nonTop)]
# Get means
grouped = top.groupby(
    ['DemocraticNominee']
).agg(
    {
        'SATscoremath': ['mean'],
        'SATscoreverbalreading': ['mean'],
        'IQ': ['mean'],
        'Age': ['mean'],
        'Children': ['mean']
    }
)

grouped.round(1)

In [None]:
# Means per qualitative varaible per candidate
# Get most frequent responses
groupedQual = top.groupby(['DemocraticNominee']).agg(lambda x:x.value_counts().index[0])
groupedQual[['ReligiousBackground', 'ReligiousViews', 'Profession', 'RelationshipStatus', \
      'SexualOrientation', 'Gender', 'Sex', 'Race']]