In [None]:
import os
import time
import requests
import base64
import pandas as pd


from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

from dotenv import load_dotenv

load_dotenv()

email = os.getenv('EMAIL')
password = os.getenv('PASSWORD')

captcha_str = os.getenv('CAPTCHA')

In [None]:
driver = webdriver.Firefox()

In [None]:
wait = WebDriverWait(driver,10)

In [None]:
driver.get("https://www.coursera.org/?authMode=login")
time.sleep(5)
email_field = driver.find_element(By.CSS_SELECTOR, 'input[name="email"]')
password_field = driver.find_element(By.CSS_SELECTOR, 'input[name="password"]')
email_field.send_keys(email)
password_field.send_keys(password)
password_field.send_keys(Keys.RETURN)

In [None]:
url = input("Enter the URL of the course you want to download: ")

In [None]:
driver.get(url)

In [None]:
time.sleep(5)

In [None]:
links=[]
weeks = driver.find_elements(By.XPATH, '//a[@data-test="rc-WeekNavigationItem"]')
for week in weeks:
    link = week.get_attribute('href')
    links.append(link)

In [None]:
def get_quizzes():
    quizzes = []
    items = driver.find_elements(By.XPATH, '//a[@data-track-component="item_link"]')
    for item in items:
        link = item.get_attribute('href')
        if 'assignment-submission' in link:
            quizzes.append(link)
    return quizzes

In [None]:
def continue_button():
    print("Looking for continue button")
    try:
        button = driver.find_element(By.XPATH, "//button[span[text()='Continue']]")
        button.click()
        print("Clicking continue button")
    except:
        print("No continue button")

In [None]:
def get_answers():
    df = pd.DataFrame(columns=['Question', 'Answer', 'Image'])

    questions = driver.find_elements(By.XPATH, '//div[@role="group"]')

    for q_index, q in enumerate(questions):
        question_text = q.get_attribute('innerText')
        answer_text = "Not found"
        image_base64_list = []

        try:
            correct = q.find_elements(By.XPATH, './/label[contains(@class, "cui-isChecked")]')
            if correct:
                answer_text = '; '.join([c.get_attribute('innerText') for c in correct])
        except:
            pass

        images = q.find_elements(By.XPATH, './/img')
        for img in images:
            image_src = img.get_attribute('src')
            try:
                if image_src.startswith('data:image'):
                    image_base64_list.append(image_src)
                else:
                    # Download and convert to base64
                    image_data = requests.get(image_src).content
                    encoded = base64.b64encode(image_data).decode('utf-8')
                    file_type = image_src.split('.')[-1].split('?')[0]
                    image_base64_list.append(f"data:image/{file_type};base64,{encoded}")
            except:
                image_base64_list.append("Unable to fetch image")

        df = pd.concat([df, pd.DataFrame([{
            'Question': question_text,
            'Answer': answer_text,
            'Image': '; '.join(image_base64_list) if image_base64_list else None
        }])], ignore_index=True)

    return df

In [None]:
df = pd.DataFrame(columns=["Question", "Answer", "Image"])

In [None]:
for link in links:
    driver.get(link)
    time.sleep(10)
    quizzes = get_quizzes()
    for quiz in quizzes:
        driver.get(quiz)
        time.sleep(10)
        continue_button()

        # Wait for submission button to appear and click it
        WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, '//button[@data-testid="view-submission-button"]'))
        ).click()

        continue_button()
        time.sleep(10)
        df = pd.concat([df, get_answers()], ignore_index=True)

In [None]:
df.to_csv('answers.csv', index=False)