In [1]:
import pandas as pd
from scipy import stats
from scipy import special
import numpy as np
import altair as alt

np.random.seed(seed=23) 

# Simulation

## Predictors

- `work_from_home` is a Boolean variable indicating whether the cat can work from home, which is 1 if they can work from home and 0 if not.
- `whisker_length` is the length of the cat's whiskers as a positive number,
- `trust_in_government` is a value from 0 to 100 indicating the level of trust put in the government,
- `fifth_generation` is a Boolean variable indicating whether the cat thinks 5G is a government conspiracy, which is 1 if they think 5G is a conspiracy and 0 if not.

## Responses

- `support_lockdown` is a Likert scale response about whether you support the lockdown measures, this is coded from "strongly against", "against", "neutral", "support", "strongly support" as 0--5.
- `will_vaccinate` is a `Maybe Bool` indicating if the cat will accept a vaccination, this is coded as follows: "yes" as 1, "no answer" as 0 and "no" as -1.

In [2]:
def random_cat_record():
    work_from_home = stats.bernoulli.rvs(0.4)
    
    hidden = stats.norm.rvs()
    height = stats.norm.rvs(loc = 24 + hidden, scale = 0.5)
    loudness = np.log(stats.expon.rvs(scale = 10 + 5 * (4 + max(hidden,0))) + 5)
    whisker_length = 0.3 * loudness + 0.3 * height + 0.1 * stats.norm.rvs(scale = 2)
    
    conspiracy_level = stats.uniform.rvs()
    trust_in_government = 0.5 + 50 + 0.25 * 100 * (1 - conspiracy_level) + 0.25 * stats.uniform.rvs()
    fifth_generation = stats.bernoulli.rvs(0.2 * conspiracy_level)
    
    prob_vaccinate = special.expit(-0.1 + 
                                   work_from_home + 
                                   0.2 * whisker_length + 
                                   0.1 * (trust_in_government - 50) + 
                                   (-3) * fifth_generation)
    
    will_answer = stats.bernoulli.rvs(1 - 0.2 * conspiracy_level)
    if bool(will_answer):
        will_vaccinate_given_answer = stats.bernoulli.rvs(prob_vaccinate)
        will_vaccinate = 1 if bool(will_vaccinate_given_answer) else (-1)
    else:
        will_vaccinate = 0
    
    prob_likert = special.expit(-1 + 
                                work_from_home + 
                                0.2 * whisker_length + 
                                0.2 * (trust_in_government - 50) + 
                                (-3) * fifth_generation + 
                                stats.norm.rvs(scale = 5))
    
    if prob_likert < 0.3:
        support_lockdown = 0
    elif prob_likert < 0.45:
        support_lockdown = 1
    elif prob_likert < 0.55:
        support_lockdown = 2
    elif prob_likert < 0.70:
        support_lockdown = 3
    else:
        support_lockdown = 4
    
    return {
        "work_from_home": work_from_home,
        "whisker_length": whisker_length,
        "trust_in_government": trust_in_government,
        "fifth_generation": fifth_generation,
        "will_vaccinate": will_vaccinate,
        "support_lockdown": support_lockdown
    }

In [3]:
cat_df = pd.DataFrame([random_cat_record() for _ in range(1000)])

col_names = cat_df.columns.to_list()

In [4]:
cat_df.to_csv("cat-opinions.csv", index = False)

In [5]:
cat_df.corr()

Unnamed: 0,work_from_home,whisker_length,trust_in_government,fifth_generation,will_vaccinate,support_lockdown
work_from_home,1.0,-0.022976,0.006148,0.025118,0.060566,0.064481
whisker_length,-0.022976,1.0,0.044197,-0.05009,0.049822,-0.003602
trust_in_government,0.006148,0.044197,1.0,-0.175242,0.293528,0.31642
fifth_generation,0.025118,-0.05009,-0.175242,1.0,-0.384903,-0.180478
will_vaccinate,0.060566,0.049822,0.293528,-0.384903,1.0,0.143476
support_lockdown,0.064481,-0.003602,0.31642,-0.180478,0.143476,1.0


In [6]:
alt.Chart(cat_df).mark_bar().encode(
    alt.X("will_vaccinate:O"),
    y='count()',
)

In [7]:
alt.Chart(cat_df).mark_bar().encode(
    alt.X("support_lockdown:O"),
    y='count()',
)