In [44]:
import numpy as np
import pandas as pd
import random

# User Input Generator

The purpose of this ipynb is to create functions that will allow the generation of (fake) user inputs based on a given `agreement` value. The `agreement` value is a probabilty (between 0 and 1, inclusive).

## Functions

### A single question with multiple (pseudo) user answers -- aka multiple users


Inputs
- `choices`: the list of possible answer choices as strings
- `consensus`: the consensus answer(s) as a list. Must be included in `choices`
- `agreement`: the list of probabilities that each individual user agrees with the consensus (between 0 and 1, a double)
- `numAnswers`: the number of possible answers to the question. A tuple of 2 integers, which correspond to the range of the number of possible answers. Cannot be greater than the total number of choices. Cannot have 0 answer choices possible. Example: (1, 1) means that there is 1 answer; (2, 5) means that there is 2 to 5 answers.

Output
- the user answers as a list of lists

Note: `SingleQuestionSingleUser` has been condensed into this one function since `agreement` of length 1 is equivalent to 1 user.

In [45]:
def SingleQuestionMultUsers(choices, consensus, agreement, numAnswers):
    answers = []
    for a in agreement:
        # TODO: Rethink where to put the checks/function design so that they are only run once for optimal runtime
        choicesCopy = choices.copy()
        if type(numAnswers[0]) != int or type(numAnswers[1]) != int:
            raise ValueError("Number of answers must be integers.")
        if numAnswers[0] > numAnswers[1] or numAnswers[1] > len(choicesCopy) or numAnswers[0] <= 0:
            raise ValueError("Please check the range of number of possible answers.")

        for c in consensus:
            if not c in choicesCopy:
                raise ValueError("Value(s) in `consensus` must be in `choices`.")

        value = np.random.random()
        if value <= a:
            z = consensus
        else:
            numAns = np.random.randint(numAnswers[0], numAnswers[1] + 1)
            choicesCopy.remove(random.sample(consensus, 1)[0])
            if numAns > len(choicesCopy):
                numAns = len(choicesCopy)
            z = random.sample(choicesCopy, numAns)
        
        answers.append(z)
        
    return answers

### Multiple Questions & Multiple Users

Loop over `SingleQuestionMultUsers`.

Inputs
- `choices`: the list of lists of possible answer choices as strings
- `consensus`: the consensus answer(s) as a list of lists. Must be included in `choices`
- `agreement`: the list of probabilities that each individual user agrees with the consensus (between 0 and 1, a double)
- `numQuestions`: the number of questions, should be equivalent to `len(choices)` and `len(consensus)`
- `numAnswers`: the number of possible answers to the question. A list of tuples of 2 integers, which correspond to the range of the number of possible answers. Cannot be greater than the total number of choices. Cannot have 0 answer choices possible. Example: (1, 1) means that there is 1 answer; (2, 5) means that there is 2 to 5 answers.

Output
- the user answers as a list of lists of lists

In [46]:
def MultQuestionMultUsers(choices, consensus, agreement, numQuestions, numAnswers):
    if numQuestions != len(choices) or len(choices) != len(consensus) or len(consensus) != len(numAnswers):
        raise ValueError("Number of questions is not reflected in either `choices` or `consensus` or `numAnswers` or some combination.")
    
    answers = []
    for i in range(numQuestions):
        answers.append(SingleQuestionMultUsers(choices[i], consensus[i], agreement, numAnswers[i]))
    return answers

### ToDataFrame

Function to convert the result of `MultQuestionMultUsers` into a Pandas DataFrame for easier reading & analysis.

Input:
- `questionNames`: names of the questions, aka the question labels
- `user`: list of user id's
- `data`: the output of `MultQuestionMultUsers`

Output: 
- a pandas DataFrame

In [47]:
def toDataFrame(questions, choices, user, data):
    if len(questions) != len(choices): 
        raise ValueError("Number of questions do not match for `questions` and `choices`")
        
    questionLabels = []
    for i in range(len(questions)):
        for c in range(len(choices[i])):
            questionLabels.append(questions[i] + "." + str(c+1))

    df = pd.DataFrame(data, index = questions, columns = user).T

    dfClean = pd.DataFrame([], index = user, columns = questionLabels)

    for q in questions:
        for c in choices:
            for u in user:
                chosen = df[q][u]
                for ch in chosen:
                    questionNumber = questions.index(q)
                    questionChoiceNumber = choices[questionNumber].index(ch) + 1
                    dfClean[q + "." + str(questionChoiceNumber)][u] = 1

    dfClean.fillna(0, inplace = True)
    
    return dfClean

### Notes

- How to redesign functions so that the checks are only done once. 
- Consider the output of MultQMultU: a list of lists of lists is . . . very confusing
- Look up how to write a pandas dataframe as a csv
- Does not consider questions that are left unanswered.

## Example Pseudo Data

In [50]:
choices = [["1", "2", "3", "4", "5"], ["yes", "no", "maybe"], ["of course", "no way"], ["a fallacy", "possible fallacy", "no fallacy"]]
consensus = [["1", "2"], ["yes"], ["of course"], ["a fallacy", "possible fallacy"]]
agreement = [0.1, 0.5, 0.4, 0.8, 0.88, 0.09, 0.35, 0.4, 0.77, 0.8, 0.5, 0.5, 0.4, 0.8, 0.88, 0.89, 0.35, 0.4, 0.77, 0.8]
numQuestions = 4
numAnswers = [(1, 3), (1, 1), (1, 1), (1, 1)]
data = MultQuestionMultUsers(choices, consensus, agreement, numQuestions, numAnswers)

questions = ["Q1", "Q2", "Q3", "Q4"]
user = ["uuid01", "uuid02", "uuid03", "uuid04", "uuid05", "uuid06", "uuid07", "uuid08", "uuid09", "uuid10", 
        "uuid11", "uuid12", "uuid13", "uuid14", "uuid15", "uuid16", "uuid17", "uuid18", "uuid19", "uuid20"]
toDataFrame(questions, choices, user, data)

Unnamed: 0,Q1.1,Q1.2,Q1.3,Q1.4,Q1.5,Q2.1,Q2.2,Q2.3,Q3.1,Q3.2,Q4.1,Q4.2,Q4.3
uuid01,1,0,0,1,0,0,1,0,0,1,0,1,0
uuid02,1,1,0,0,0,0,0,1,1,0,0,0,1
uuid03,0,0,0,1,0,0,0,1,0,1,0,0,1
uuid04,1,0,1,1,0,1,0,0,1,0,1,1,0
uuid05,1,1,0,0,0,1,0,0,1,0,1,1,0
uuid06,0,0,1,0,0,0,1,0,1,0,0,1,0
uuid07,0,0,1,0,1,0,1,0,0,1,1,1,0
uuid08,1,1,0,0,0,0,1,0,1,0,0,0,1
uuid09,1,1,0,0,0,1,0,0,1,0,0,1,0
uuid10,1,1,0,0,0,1,0,0,1,0,1,1,0
