## Functions

In [49]:
def make_multiple_choice(question, choices, which=1, randomize=True, aota=False, 
    nota=False, none_prob=0.2, number=None):
    """
    Create a multiple choice question randomizing order
        number : question number on test (for formatting)
        question : string
        choices : list of string 
        which : which choice is correct (defaults to first in list)
            if -1 then "None of the above" is correct and a correct answer 
                wasn't provided
            if 0 then "All of the above" is correct 
        randomize : randomize order of options, making questions unique
        aota : include "All of the above" as an option
        nota : include "None of the above" as an option 
        none_prob : probability that "None of the above" is right (If a correct 
            answer is supplied, it will be removed with this probability.) 


    Note: 
       - "All of the above" always appears before "None of the above" in 
         options and after random shuffle
       - If a correct answer is removed and the answer made "None of the above",
         and there are only 3 remaining options, then "All of the above" will 
         also be added 
    
    """
    import random
    import string

    AOTA = "All of the above"
    NOTA = "None of the above"
    
    # Select right answer if given
    if which > 0:
        correct = choices[which-1]
    elif which == 0:
        correct = AOTA
    else: # which == -1:
        correct = NOTA
        
    # Randomize order of options before appending 'All ...' or 'None ...'
    if randomize:
        random.shuffle(choices)

    # Append 'All of the above'
    if aota or which == 0 :
        choices.append(AOTA)

    # Append 'None of the above'
    if nota or which == -1:
        choices.append(NOTA)

    # Remove the correct answer with probability none_prob
    if which > 0 and nota and none_prob > 0:
        if random.random() <= none_prob:
            choices.remove(correct)
            correct = NOTA
            # if not enough options after removing the correct answer, add ALL
            if (not aota) and len(choices) <= 4:
                choices.insert(len(choices)-1, AOTA)
    
    # get correct answer
    answer = string.ascii_lowercase[choices.index(correct)]


    # format if number present
    if number is None:
        blank = "\n"
    else:
        # format the question
        if(number > 9):
            blank = "\n    "
        else:
            blank = "\n   "
        
        question = str(number) + ". " + question

    # Format the question with options                
    for i, choice in enumerate(choices):
        question += blank + string.ascii_lowercase[i] + ") " + choice

    return question, answer


In [54]:
def make_true_false(question, answer, number=None):
    """
    Format a True/False question
    """

    # format if number present
    if number is None:
        blank = "\n"
    else:
        # format the question
        if(number > 9):
            blank = "\n    "
        else:
            blank = "\n   "
        
        question = str(number) + ". " + question
        
    question += blank + "a) True"
    question += blank + "b) False"

    if answer.lower() in ['t', 'true']:
        answer = "a"
    else:
        answer = "b"

    return question, answer

## Query MySQL Database

In [3]:
import sqlalchemy as db
import pandas as pd

engine = db.create_engine('mysql://root:root@127.0.0.1:8306/certification')
conn = engine.connect()

In [64]:
query = "select * from question_templates"
#query = "select * from question_templates where id in (1,3)"
#query = "select * from question_templates where type = 'Multiple Choice'"


result = conn.execute(query).fetchall()
df = pd.DataFrame(result)
df.columns = result[0].keys()

In [65]:
df

Unnamed: 0,id,question,answer,correct,randomize,type,tags,aota,nota,epsilon,enabled,created_at,last_modified
0,1,"For regression problems, which type of samplin...","[""Stratified"", ""Systematic"", ""Cluster"", ""Random""]",4,1,Multiple Choice,"Medium,Experiments",0,0,0.0001,0,2020-04-19 11:01:48,2020-04-19 11:01:48
1,2,Recipes need to be added to Driverless AI each...,False,1,1,True/False,Easy,0,0,0.0001,0,2020-04-19 11:05:43,2020-04-19 11:05:43
2,3,Which graph will show your variables which cou...,"[""Skewed Histograms"", ""Outliers"", ""Spikey Hist...",1,1,Multiple Choice,,0,0,0.0001,0,2020-04-19 11:05:43,2020-04-19 11:05:43


### Example formatting a true/false question

In [66]:
qnum = 1
w = 1
q, a = make_true_false(number = qnum, question = df['question'][w], answer = df['answer'][w])
print(q)
print(a)

1. Recipes need to be added to Driverless AI each time you want to use it for an experiment.
   a) True
   b) False
b


In [67]:
qnum = 1
w = 1
q, a = make_true_false(question = df['question'][w], answer = df['answer'][w])
print(q)
print(a)

Recipes need to be added to Driverless AI each time you want to use it for an experiment.
a) True
b) False
b


### Example formatting a multiple choice question

In [68]:
w=0
qnum = 5
q, a = make_multiple_choice(
        question = df['question'][w],
        choices = eval(df['answer'][w]),
        which = df['correct'][w],
        randomize = df['randomize'][w],
        aota = df['aota'][w], 
        nota = df['nota'][w])

print(q)
print(a)

For regression problems, which type of sampling will Driverless AI perform at the start of an experiment?
a) Random
b) Stratified
c) Systematic
d) Cluster
a


In [69]:
w = 2 
qnum = 5
q, a = make_multiple_choice(
        number = qnum, 
        question = df['question'][w],
        choices = eval(df['answer'][w]),
        which = df['correct'][w],
        randomize = df['randomize'][w],
        aota = df['aota'][w]==1, 
        nota = df['nota'][w]==1)

print(q)
print(a)

5. Which graph will show your variables which could be good candidates for transformation before being used in modeling?
   a) Correlation Graph
   b) Outliers
   c) Skewed Histograms
   d) Spikey Histograms
c


## Looping over multiple rows

In [70]:
for w, id in enumerate(df['id']): 

    if df['type'][w] == 'Multiple Choice':
        q, a = make_multiple_choice(
            number = w + 1, 
            question = df['question'][w],
            choices = eval(df['answer'][w]),
            which = df['correct'][w],
            randomize = df['randomize'][w],
            aota = df['aota'][w], 
            nota = df['nota'][w]
        )
    elif df['type'][w] == 'True/False':      
        q, a = make_true_false(
            number = w + 1,
            question = df['question'][w], 
            answer = df['answer'][w]
        )
        
    print(q)
    print(a)
    print("\n")

1. For regression problems, which type of sampling will Driverless AI perform at the start of an experiment?
   a) Stratified
   b) Cluster
   c) Systematic
   d) Random
d


2. Recipes need to be added to Driverless AI each time you want to use it for an experiment.
   a) True
   b) False
b


3. Which graph will show your variables which could be good candidates for transformation before being used in modeling?
   a) Skewed Histograms
   b) Spikey Histograms
   c) Outliers
   d) Correlation Graph
a




In [73]:
for w, id in enumerate(df['id']): 

    if df['type'][w] == 'Multiple Choice':
        q, a = make_multiple_choice(
            question = df['question'][w],
            choices = eval(df['answer'][w]),
            which = df['correct'][w],
            randomize = df['randomize'][w],
            aota = df['aota'][w], 
            nota = df['nota'][w]
        )
    elif df['type'][w] == 'True/False':      
        q, a = make_true_false(
            question = df['question'][w], 
            answer = df['answer'][w]
        )
        
    print(q)
    print(a)
    print("\n")

For regression problems, which type of sampling will Driverless AI perform at the start of an experiment?
a) Stratified
b) Systematic
c) Cluster
d) Random
d


Recipes need to be added to Driverless AI each time you want to use it for an experiment.
a) True
b) False
b


Which graph will show your variables which could be good candidates for transformation before being used in modeling?
a) Outliers
b) Correlation Graph
c) Skewed Histograms
d) Spikey Histograms
c




In [None]:
for i, id in enumerate(df['id']): 

In [63]:
raw = ["For regression problems, which type of sampling will Driverless AI perform at the start of an experiment?",
    "Random", "Stratified", "Cluster", "Systematic"]


In [30]:
raw

['For regression problems, which type of sampling will Driverless AI perform at the start of an experiment?',
 'Random',
 'Stratified',
 'Cluster',
 'Systematic']

In [35]:
q, a = make_multiple_choice(raw[0], raw[1:5], which=1, all=True, none=True, none_prob=.99)

In [36]:
print(q); print ("\nThe correct answer is " + a)

For regression problems, which type of sampling will Driverless AI perform at the start of an experiment?
a) Stratified
b) Cluster
c) Systematic
d) All of the above
e) None of the above

The correct answer is e
