# Generating Image-Based Questions

This notebook explains how to generate image-based questions that tests students understanding of phonemes. The first part shows how to construct a simple question structure with solutions and answer choices using Chat GPT. The second part shows how to web-scrape the images for the answer choices 

### Import the necessary libraries 

In [1]:
import pandas as pd
import regex as re
import random
import numpy as np

### Prompt Chat GPT

Paste the following prompts into Chat GPT: 

- "Generate a table with two columns, the first corresponding to all the English phonemes and the second is a long list of one-syllable nouns that contain that phoneme"

### Load in the Chat GPT output 

In [2]:
phoneme_list = pd.read_csv("/work/CHATGPT/phonemes-list.csv")

### Clean up the loaded dataframe  

In [3]:
phoneme_list = phoneme_list.rename(columns={"Example Nouns": "Examples"})

#Drop the rows that don't contain enough examples
phoneme_list= phoneme_list[phoneme_list.Examples.str.contains("Few common nouns") == False]

# turn the examples into an array to make the future code easier 
phoneme_list["Word List"] = phoneme_list["Examples"].str.split(",")
phoneme_list["Word List"]

phoneme_list

Unnamed: 0,English Phoneme,Examples,Word List
0,/p/,"pen, pig, pot, pan, pad","[pen, pig, pot, pan, pad]"
1,/b/,"bed, bat, bag, box, bus","[bed, bat, bag, box, bus]"
2,/t/,"tap, tent, tag, toy, tin","[tap, tent, tag, toy, tin]"
3,/d/,"dog, desk, doll, drum, duck","[dog, desk, doll, drum, duck]"
4,/k/,"cat, cup, coat, kite, key","[cat, cup, coat, kite, key]"
5,/g/,"gate, gum, goat, girl, gap","[gate, gum, goat, girl, gap]"
6,/f/,"fan, fish, fork, fox, feet","[fan, fish, fork, fox, feet]"
7,/v/,"van, vase, veil, vine, vest","[van, vase, veil, vine, vest]"
8,/s/,"sun, sock, seed, soap, sink","[sun, sock, seed, soap, sink]"
9,/z/,"zoo, zest, zeal, zinc, zone","[zoo, zest, zeal, zinc, zone]"


### Create an empty dataframe to populate with our questions  

In [4]:
#create an empty dataframe to populate with our questions  
generated_english_questions = pd.DataFrame(columns=['Generated Question', 'Answer Choices', 
                                 'Answer', 'Phoneme', 'Image'])
generated_english_questions


Unnamed: 0,Generated Question,Answer Choices,Answer,Phoneme,Image


### Generate questions for each word 

In [5]:
# For each word in the phenomes list, generate a question in which that word is the solution and other possible answer choices 
q_index = 0
for index, row in phoneme_list.iterrows():
    phoneme = row['English Phoneme']
    for word_index in range(len(row['Word List'])):
        generated_english_questions.loc[q_index, "Generated Question"] = "Select the correct " + phoneme + " sound word in the picture"
        generated_english_questions.loc[q_index, "Answer"] = row['Word List'][word_index]
        answer_choices = random.sample(row['Word List'][:word_index] + row['Word List'][word_index+1:], min(3, len(row['Word List']))) + [row['Word List'][word_index]]
        random.shuffle(answer_choices) 
        generated_english_questions.loc[q_index, "Answer Choices"] = answer_choices
        generated_english_questions.loc[q_index, "Phoneme"] = phoneme
        q_index += 1

In [6]:
generated_english_questions

Unnamed: 0,Generated Question,Answer Choices,Answer,Phoneme,Image
0,Select the correct /p/ sound word in the picture,"[ pad, pan, pig, pen]",pen,/p/,
1,Select the correct /p/ sound word in the picture,"[ pot, pan, pen, pig]",pig,/p/,
2,Select the correct /p/ sound word in the picture,"[ pig, pan, pen, pot]",pot,/p/,
3,Select the correct /p/ sound word in the picture,"[ pad, pen, pot, pan]",pan,/p/,
4,Select the correct /p/ sound word in the picture,"[ pig, pad, pen, pot]",pad,/p/,
...,...,...,...,...,...
180,Select the correct /ʊə/ sound word in the picture,"[boor, tour, poor, cure]",boor,/ʊə/,
181,Select the correct /ʊə/ sound word in the picture,"[boor, sure, poor, cure]",cure,/ʊə/,
182,Select the correct /ʊə/ sound word in the picture,"[ poor, boor, cure, sure]",poor,/ʊə/,
183,Select the correct /ʊə/ sound word in the picture,"[ cure, sure, tour, poor]",tour,/ʊə/,


### Function to complete everything above

In [None]:
def image_based_qs(phenome_df):

    """
    Create a table of questions from ChatGPT word problems with the specified question type.

    Args:
    phenome_df (Dataframe): Dataframe of the phenome list output that we downloaded from Chat GPT 

    Returns (dataframe): 
        the final question table with the columns: 'Generated Question', 'Answer Choices', 
                                 'Answer', 'Phoneme', 'Image' 
    """

    generated_english_questions = pd.DataFrame(columns=['Generated Question', 'Answer Choices', 
                                 'Answer', 'Phoneme', 'Image'])
    generated_english_questions

    q_index = 0
    for index, row in phoneme_list.iterrows():
        phoneme = row['English Phoneme']
        for word_index in range(len(row['Word List'])):
            generated_english_questions.loc[q_index, "Generated Question"] = "Select the correct " + phoneme + " sound word in the picture"
            generated_english_questions.loc[q_index, "Answer"] = row['Word List'][word_index]
            answer_choices = random.sample(row['Word List'][:word_index] + row['Word List'][word_index+1:], min(3, len(row['Word List']))) + [row['Word List'][word_index]]
            random.shuffle(answer_choices) 
            generated_english_questions.loc[q_index, "Answer Choices"] = answer_choices
            generated_english_questions.loc[q_index, "Phoneme"] = phoneme
            q_index += 1
    
    return dataframe

### Get a list of images to web scrape

In [7]:
images_to_webscrap = generated_english_questions["Answer"].unique()
images_to_webscrap

array(['pen', ' pig', ' pot', ' pan', ' pad', 'bed', ' bat', ' bag',
       ' box', ' bus', 'tap', ' tent', ' tag', ' toy', ' tin', 'dog',
       ' desk', ' doll', ' drum', ' duck', 'cat', ' cup', ' coat',
       ' kite', ' key', 'gate', ' gum', ' goat', ' girl', ' gap', 'fan',
       ' fish', ' fork', ' fox', ' feet', 'van', ' vase', ' veil',
       ' vine', ' vest', 'sun', ' sock', ' seed', ' soap', ' sink', 'zoo',
       ' zest', ' zeal', ' zinc', ' zone', 'shoe', ' ship', ' shell',
       ' shed', ' shark', 'thumb', ' thief', ' thorn', ' thread', ' bath',
       'chair', ' cheese', ' child', ' church', ' chest', 'gym', ' judge',
       ' gem', ' jet', ' jam', 'map', ' milk', ' man', ' moon', ' mat',
       'nut', ' nest', ' nose', ' net', ' nail', 'ring', ' lung', ' fang',
       ' king', ' wing', 'hat', ' hand', ' hen', ' house', ' hill', 'leg',
       ' leaf', ' lid', ' lock', ' lamp', 'rat', ' rose', ' rock',
       ' ring', ' rake', 'yam', ' yarn', ' yard', ' yawn', ' yeti',
  

### Webscraping images

Please note that this code works best on terminal. not Deepnote 

In [8]:
from bs4 import BeautifulSoup
import requests

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import urllib
import os

# Create a subfolder for the images if it doesn't exist
if not os.path.exists("ques_img"):
    os.mkdir("ques_img")

options = webdriver.ChromeOptions() # opens browser
# options.add_argument("--headless=new") # makes it faster, but you won't see what's happening
options.add_experimental_option("detach", True) 
service = Service(ChromeDriverManager().install()) # installs the chrome that this webdriver will use
driver = webdriver.Chrome(options=options, service=service)

objects = ['pen', ' pig', ' pot', ' pan', ' pad', 'bed', ' bat', ' bag',
       ' box', ' bus', 'tap', ' tent', ' tag', ' toy', ' tin', 'dog',
       ' desk', ' doll', ' drum', ' duck', 'cat', ' cup', ' coat',
       ' kite', ' key', 'gate', ' gum', ' goat', ' girl', ' gap', 'fan',
       ' fish', ' fork', ' fox', ' feet', 'van', ' vase', ' veil',
       ' vine', ' vest', 'sun', ' sock', ' seed', ' soap', ' sink', 'zoo',
       ' zest', ' zeal', ' zinc', ' zone', 'shoe', ' ship', ' shell',
       ' shed', ' shark', 'thumb', ' thief', ' thorn', ' thread', ' bath',
       'chair', ' cheese', ' child', ' church', ' chest', 'gym', ' judge',
       ' gem', ' jet', ' jam', 'map', ' milk', ' man', ' moon', ' mat',
       'nut', ' nest', ' nose', ' net', ' nail', 'ring', ' lung', ' fang',
       ' king', ' wing', 'hat', ' hand', ' hen', ' house', ' hill', 'leg',
       ' leaf', ' lid', ' lock', ' lamp', 'rat', ' rose', ' rock',
       ' ring', ' rake', 'yam', ' yarn', ' yard', ' yawn', ' yeti',
       'well', ' wolf', ' worm', ' wall', 'bee', ' pea', ' sea', ' ski',
       'bit', ' pin', ' kit', ' leg', ' pen', ' hat', ' rat', 'cup',
       ' bun', ' hut', ' bug', 'sofa', ' comma', ' doctor', ' zebra',
       ' pizza', 'car', ' star', ' bar', ' jar', ' farm', 'fork',
       ' horse', ' storm', ' cord', ' torch', 'book', ' foot', ' hood',
       ' bush', 'moon', ' spoon', ' tube', ' flute', ' prune', 'toy',
       ' boy', ' coil', ' foil', ' oyster', 'bike', ' pie', ' hive',
       ' prize', 'cow', ' mouse', ' gown', ' pouch', 'boat', ' loaf',
       ' cone', 'boor', ' cure', ' poor', ' tour', ' sure']

session_object = requests.Session()
pics_wbpage = session_object.get("https://www.freepik.com/search?format=search&query=clipart&selection=1") 
pics_soup = BeautifulSoup(pics_wbpage.content, "html.parser") 

driver.get("https://www.freepik.com/search?format=search&query=clipart&selection=1")

search_bar = driver.find_element(
     By.XPATH,"/html/body/header/div/form/div[1]/input")

for object in objects:
    print(object)
    driver.get("https://www.freepik.com/search?format=search&query=clipart&selection=1")

    search_bar = driver.find_element(
     By.XPATH,"/html/body/header/div/form/div[1]/input")
    
    clipart = driver.find_element(
     By.XPATH,"/html/body/main/div[3]/div/div[2]/h1")
    search_bar.send_keys(" ", object, Keys.RETURN)
    driver.execute_script("arguments[0].scrollIntoView();", clipart)
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.XPATH, "/html/body/main/div[3]/div/div[2]/section"))
    )
    
    # Scroll to the first image element
    driver.execute_script("window.scrollTo(0, document.querySelector('.showcase__content.tags-links img').getBoundingClientRect().top + window.pageYOffset - 100)")
    
    # Wait for the first image to be present
    img = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.XPATH, "/html/body/main/div[3]/div/div[2]/section/figure[1]/div/a/img"))
    )

    # download the image as object name so it's easy to access
    src = img.get_attribute('src')
    response = urllib.request.urlopen(src)
    with open(f"{object}.png", "wb") as f: #f"ques_img/{object}.png" can be used for storing images in a subfolder
        f.write(response.read()) 


[WDM] - Downloading: 100%|██████████| 6.75M/6.75M [00:00<00:00, 204MB/s]


WebDriverException: Message: unknown error: Chrome failed to start: exited abnormally.
  (unknown error: DevToolsActivePort file doesn't exist)
  (The process started from chrome location /usr/bin/chromium is no longer running, so ChromeDriver is assuming that Chrome has crashed.)
Stacktrace:
#0 0x561b3ce5ffe3 <unknown>
#1 0x561b3cb9ed36 <unknown>
#2 0x561b3cbc7b20 <unknown>
#3 0x561b3cbc3a9b <unknown>
#4 0x561b3cc05af7 <unknown>
#5 0x561b3cc0511f <unknown>
#6 0x561b3cbfc693 <unknown>
#7 0x561b3cbcf03a <unknown>
#8 0x561b3cbd017e <unknown>
#9 0x561b3ce21dbd <unknown>
#10 0x561b3ce25c6c <unknown>
#11 0x561b3ce2f4b0 <unknown>
#12 0x561b3ce26d63 <unknown>
#13 0x561b3cdf9c35 <unknown>
#14 0x561b3ce4a138 <unknown>
#15 0x561b3ce4a2c7 <unknown>
#16 0x561b3ce58093 <unknown>
#17 0x7f196c13efa3 start_thread


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=08b91cc6-deda-4616-92c9-e073a9a8b5c8' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>