## Functions

In [7]:
def make_multiple_choice(question, choices, which=1, randomize=True, aota=False, 
    nota=False, none_prob=0.2, number=None):
    """
    Create a multiple choice question randomizing order
        number : question number on test (for formatting)
        question : string
        choices : list of string 
        which : which choice is correct (defaults to first in list)
            if -1 then "None of the above" is correct and a correct answer 
                wasn't provided
            if 0 then "All of the above" is correct 
        randomize : randomize order of options, making questions unique
        aota : include "All of the above" as an option
        nota : include "None of the above" as an option 
        none_prob : probability that "None of the above" is right (If a correct 
            answer is supplied, it will be removed with this probability.) 


    Note: 
       - "All of the above" always appears before "None of the above" in 
         options and after random shuffle
       - If a correct answer is removed and the answer made "None of the above",
         and there are only 3 remaining options, then "All of the above" will 
         also be added 
    
    """
    import random
    import string

    AOTA = "All of the above"
    NOTA = "None of the above"
    
    # Select right answer if given
    if which > 0:
        correct = choices[which-1]
    elif which == 0:
        correct = AOTA
    else: # which == -1:
        correct = NOTA
        
    # Randomize order of options before appending 'All ...' or 'None ...'
    if randomize:
        random.shuffle(choices)

    # Append 'All of the above'
    if aota or which == 0 :
        choices.append(AOTA)

    # Append 'None of the above'
    if nota or which == -1:
        choices.append(NOTA)

    # Remove the correct answer with probability none_prob
    if which > 0 and nota and none_prob > 0:
        if random.random() <= none_prob:
            choices.remove(correct)
            correct = NOTA
            # if not enough options after removing the correct answer, add ALL
            if (not aota) and len(choices) <= 4:
                choices.insert(len(choices)-1, AOTA)
    
    # get correct answer
    answer = string.ascii_lowercase[choices.index(correct)]


    # format if number present
    if number is None:
        blank = "\n"
    else:
        # format the question
        if(number > 9):
            blank = "\n    "
        else:
            blank = "\n   "
        
        question = str(number) + ". " + question

    # Format the question with options                
    for i, choice in enumerate(choices):
        question += blank + string.ascii_lowercase[i] + ") " + choice

    return question, answer


In [8]:
def make_true_false(question, answer, number=None):
    """
    Format a True/False question
    """

    # format if number present
    if number is None:
        blank = "\n"
    else:
        # format the question
        if(number > 9):
            blank = "\n    "
        else:
            blank = "\n   "
        
        question = str(number) + ". " + question
        
    question += blank + "a) True"
    question += blank + "b) False"

    if answer.lower() in ['t', 'true']:
        answer = "a"
    else:
        answer = "b"

    return question, answer

## Query MySQL Database

In [1]:
import sqlalchemy as db
import pandas as pd

In [2]:
#engine = db.create_engine('mysql://root:root@127.0.0.1:8306/certification')
engine = db.create_engine('mysql://root:root@127.0.0.1:8889/certification')
conn = engine.connect()

In [3]:
# Create a MetaData instance
metadata = db.MetaData()

# reflect db schma to MetaData
metadata.reflect(bind=engine)

In [3]:
import sqlalchemy as db
import pandas as pd

#engine = db.create_engine('mysql://root:root@127.0.0.1:8306/certification')
engine = db.create_engine('mysql://root:root@127.0.0.1:8889/certification')
conn = engine.connect()

# Create a MetaData instance
metadata = db.MetaData()

# reflect db schma to MetaData
metadata.reflect(bind=engine)

In [4]:
query = "select * from question_templates"
#query = "select * from question_templates where id in (1,3)"
#query = "select * from question_templates where type = 'Multiple Choice'"

result = conn.execute(query).fetchall()
df = pd.DataFrame(result)
df.columns = result[0].keys()

In [5]:
df

Unnamed: 0,id,question,answer,correct,randomize,type,tags,aota,nota,epsilon,enabled,created_at,last_modified
0,1,Recipes need to be added to Driverless AI each...,False,1,1,True/False,Easy,0,0,0.0001,0,2020-05-06 05:31:34,2020-05-06 05:31:34
1,2,Which graph will show your variables which cou...,"[""Skewed Histograms"", ""Outliers"", ""Spikey Hist...",1,1,Multiple Choice,,0,0,0.0001,0,2020-05-06 05:31:34,2020-05-06 05:31:34


### Example formatting a true/false question
#### With numbering

In [9]:
qnum = 1
w = 0
q, a = make_true_false(number = qnum, question = df['question'][w], answer = df['answer'][w])
print(q)
print(a)

1. Recipes need to be added to Driverless AI each time you want to use it for an experiment.
   a) True
   b) False
b


#### Without numbering

In [10]:
qnum = 1
w = 0
q, a = make_true_false(question = df['question'][w], answer = df['answer'][w])
print(q)
print(a)

Recipes need to be added to Driverless AI each time you want to use it for an experiment.
a) True
b) False
b


### Example formatting a multiple choice question
#### With numbering

In [11]:
w = 1 
qnum = 5
q, a = make_multiple_choice(
        number = qnum, 
        question = df['question'][w],
        choices = eval(df['answer'][w]),
        which = df['correct'][w],
        randomize = df['randomize'][w],
        aota = df['aota'][w]==1, 
        nota = df['nota'][w]==1)

print(q)
print(a)

5. Which graph will show your variables which could be good candidates for transformation before being used in modeling?
   a) Spikey Histograms
   b) Skewed Histograms
   c) Outliers
   d) Correlation Graph
b


#### Without numbering

In [12]:
w=1
qnum = 5
q, a = make_multiple_choice(
        question = df['question'][w],
        choices = eval(df['answer'][w]),
        which = df['correct'][w],
        randomize = df['randomize'][w],
        aota = df['aota'][w], 
        nota = df['nota'][w])

print(q)
print(a)

Which graph will show your variables which could be good candidates for transformation before being used in modeling?
a) Correlation Graph
b) Spikey Histograms
c) Skewed Histograms
d) Outliers
c


## Looping over multiple rows
### With numbering

In [10]:
for w, id in enumerate(df['id']): 

    if df['type'][w] == 'Multiple Choice':
        q, a = make_multiple_choice(
            number = w + 1, 
            question = df['question'][w],
            choices = eval(df['answer'][w]),
            which = df['correct'][w],
            randomize = df['randomize'][w],
            aota = df['aota'][w], 
            nota = df['nota'][w]
        )
    elif df['type'][w] == 'True/False':      
        q, a = make_true_false(
            number = w + 1,
            question = df['question'][w], 
            answer = df['answer'][w]
        )
        
    print(q)
    print(a)
    print("\n")

1. For regression problems, which type of sampling will Driverless AI perform at the start of an experiment?
   a) Cluster
   b) Random
   c) Systematic
   d) Stratified
b


2. Recipes need to be added to Driverless AI each time you want to use it for an experiment.
   a) True
   b) False
b


3. Which graph will show your variables which could be good candidates for transformation before being used in modeling?
   a) Correlation Graph
   b) Spikey Histograms
   c) Skewed Histograms
   d) Outliers
c




In [11]:
for w, id in enumerate(df['id']): 

    if df['type'][w] == 'Multiple Choice':
        q, a = make_multiple_choice(
            question = df['question'][w],
            choices = eval(df['answer'][w]),
            which = df['correct'][w],
            randomize = df['randomize'][w],
            aota = df['aota'][w], 
            nota = df['nota'][w]
        )
    elif df['type'][w] == 'True/False':      
        q, a = make_true_false(
            question = df['question'][w], 
            answer = df['answer'][w]
        )
        
    print(q)
    print(a)
    print("\n")

For regression problems, which type of sampling will Driverless AI perform at the start of an experiment?
a) Cluster
b) Systematic
c) Random
d) Stratified
c


Recipes need to be added to Driverless AI each time you want to use it for an experiment.
a) True
b) False
b


Which graph will show your variables which could be good candidates for transformation before being used in modeling?
a) Outliers
b) Correlation Graph
c) Skewed Histograms
d) Spikey Histograms
c




## Write to database table

In [12]:
engine.table_names()

['question_templates', 'questions']

In [13]:
questions = metadata.tables['questions']
print(questions.columns.keys())

['id', 'question', 'answer', 'item', 'test_id', 'template_id']


In [14]:
test_id = 5
item = 1

ins = questions.insert().values(
    question = q,
    answer = a, 
    item = item, 
    test_id = test_id, 
    template_id = df['id'][w]
)

conn.execute(ins)

<sqlalchemy.engine.result.ResultProxy at 0x12a05da90>

In [19]:
import random

## Save the seed in tests
seed = random.randint(1,1e10)
seed

1563586409

In [13]:

test_id = 99
w = 0
q, a = make_multiple_choice(
            question = df['question'][w],
            choices = eval(df['answer'][w]),
            which = df['correct'][w],
            randomize = df['randomize'][w],
            aota = df['aota'][w], 
            nota = df['nota'][w]
)

NameError: name 'df' is not defined

In [15]:
random.state()


NameError: name 'random' is not defined

In [21]:
for w, id in enumerate(df['id']): 

    if df['type'][w] == 'Multiple Choice':
        q, a = make_multiple_choice(
            question = df['question'][w],
            choices = eval(df['answer'][w]),
            which = df['correct'][w],
            randomize = df['randomize'][w],
            aota = df['aota'][w], 
            nota = True
        )
    elif df['type'][w] == 'True/False':      
        q, a = make_true_false(
            question = df['question'][w], 
            answer = df['answer'][w]
        )
        
    print(q)
    print(a)
    print("\n")

For regression problems, which type of sampling will Driverless AI perform at the start of an experiment?
a) Cluster
b) Stratified
c) Random
d) Systematic
e) None of the above
c


Recipes need to be added to Driverless AI each time you want to use it for an experiment.
a) True
b) False
b


Which graph will show your variables which could be good candidates for transformation before being used in modeling?
a) Skewed Histograms
b) Spikey Histograms
c) Outliers
d) Correlation Graph
e) None of the above
a




In [23]:
random.seed(seed)

In [24]:
for w, id in enumerate(df['id']): 

    if df['type'][w] == 'Multiple Choice':
        q, a = make_multiple_choice(
            question = df['question'][w],
            choices = eval(df['answer'][w]),
            which = df['correct'][w],
            randomize = df['randomize'][w],
            aota = df['aota'][w], 
            nota = True
        )
    elif df['type'][w] == 'True/False':      
        q, a = make_true_false(
            question = df['question'][w], 
            answer = df['answer'][w]
        )
        
    print(q)
    print(a)
    print("\n")

For regression problems, which type of sampling will Driverless AI perform at the start of an experiment?
a) Cluster
b) Stratified
c) Random
d) Systematic
e) None of the above
c


Recipes need to be added to Driverless AI each time you want to use it for an experiment.
a) True
b) False
b


Which graph will show your variables which could be good candidates for transformation before being used in modeling?
a) Skewed Histograms
b) Spikey Histograms
c) Outliers
d) Correlation Graph
e) None of the above
a




## Quick import from Excel File

In [16]:
import pylightxl as xl
db = xl.readxl('Questions.xlsx')

In [18]:
db.ws_names

['Sheet1']

In [22]:
db = db.ws('Sheet1')

In [27]:
db.size

[1037, 8]

In [23]:
db.address('A3')

'The graphs shown on the AutoVisualization page are the same for all datasets'

In [24]:
db.row(2)

['Question Text',
 'Answers set (multiple choice / checkbox / matching / true-false)',
 'Correct',
 'Randomize',
 'Type',
 'Tags',
 'Difficulty (optional)',
 'Contributor']

In [25]:
r = 3
# Question
db.index(row=r, col=1)

'The graphs shown on the AutoVisualization page are the same for all datasets'

In [26]:
db.row(r)

['The graphs shown on the AutoVisualization page are the same for all datasets',
 '"False"',
 '',
 '',
 'True/False',
 '"AutoViz"',
 '',
 'Chemere Davis']

In [None]:
db.ws('Sheet1').index(row=1,col=1)

db.ws('Sheet1').row(1)
db.ws('Sheet1').col(1)

for row in db.ws('Sheet1').rows:
    print(row)

In [28]:
engine.table_names()

['datasets',
 'exp_definitions',
 'experiments',
 'question_templates',
 'questions',
 'results',
 'student_answers',
 'students',
 'tag_list',
 'tags',
 'test_definitions',
 'tests']

In [29]:
templates = metadata.tables['question_templates']
print(templates.columns.keys())

['id', 'question', 'answer', 'correct', 'randomize', 'type', 'tags', 'aota', 'nota', 'epsilon', 'enabled', 'created_at', 'last_modified']


In [30]:
raw = db.row(r)

In [35]:
raw[4]

'True/False'

In [None]:
ins = templates.insert().values(
    question = raw[0],
    answer = raw[1], 
    correct = raw[2], 
    randomize = raw[3], 
    type = raw[4],
    template_id = df['id'][w]
)

conn.execute(ins)