<a href="https://colab.research.google.com/github/brendanpshea/database_sql/blob/main/SQL_Select_Quiz.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [24]:
import sqlite3
import random
from datetime import datetime, timedelta
import os
from IPython.display import display, HTML, clear_output
import ipywidgets as widgets
import pandas as pd
import json

pd.set_option('display.max_colwidth', None)

class DatabaseManager:
    def __init__(self, db_path):
        self.db_path = db_path

    def execute_query(self, query, params=None):
        with sqlite3.connect(self.db_path) as conn:
            cursor = conn.cursor()
            if params:
                cursor.execute(query, params)
            else:
                cursor.execute(query)
            conn.commit()
            return cursor.fetchall()

    def execute_many(self, query, params):
        with sqlite3.connect(self.db_path) as conn:
            cursor = conn.cursor()
            cursor.executemany(query, params)
            conn.commit()

    def get_table_schemas(self):
        tables = self.execute_query("SELECT name FROM sqlite_master WHERE type='table';")
        schemas = []
        for table in tables:
            table_name = table[0]
            columns = self.execute_query(f"PRAGMA table_info({table_name})")
            schemas.append((table_name, columns))
        return schemas

class SpaceAcademyDatabase(DatabaseManager):
    def __init__(self, db_path='space_academy.db'):
        super().__init__(db_path)

    def create_default_database(self):
        self.execute_query('''
        CREATE TABLE IF NOT EXISTS students (
            student_id INTEGER PRIMARY KEY,
            name TEXT NOT NULL,
            species TEXT NOT NULL,
            home_planet TEXT NOT NULL,
            admission_date DATE NOT NULL,
            age INTEGER NOT NULL,
            gpa FLOAT NOT NULL
        )
        ''')

        self.execute_query('''
        CREATE TABLE IF NOT EXISTS classes (
            class_id INTEGER PRIMARY KEY,
            class_name TEXT NOT NULL,
            instructor TEXT NOT NULL,
            max_capacity INTEGER NOT NULL,
            credits INTEGER NOT NULL,
            department TEXT NOT NULL
        )
        ''')

        self.execute_query('''
        CREATE TABLE IF NOT EXISTS enrollments (
            enrollment_id INTEGER PRIMARY KEY,
            student_id INTEGER,
            class_id INTEGER,
            enrollment_date DATE NOT NULL,
            grade FLOAT,
            FOREIGN KEY (student_id) REFERENCES students (student_id),
            FOREIGN KEY (class_id) REFERENCES classes (class_id)
        )
        ''')

        self.execute_query('''
        CREATE TABLE IF NOT EXISTS extracurriculars (
            student_id INTEGER PRIMARY KEY,
            activities JSON NOT NULL,
            FOREIGN KEY (student_id) REFERENCES students (student_id)
        )
        ''')

        self.populate_database()

    def populate_database(self):
        students_data = self.generate_students(100)
        self.execute_many('INSERT INTO students (name, species, home_planet, admission_date, age, gpa) VALUES (?, ?, ?, ?, ?, ?)', students_data)

        classes_data = self.generate_classes(10)
        self.execute_many('INSERT INTO classes (class_name, instructor, max_capacity, credits, department) VALUES (?, ?, ?, ?, ?)', classes_data)

        enrollments_data = self.generate_enrollments(100, 10)
        self.execute_many('INSERT INTO enrollments (student_id, class_id, enrollment_date, grade) VALUES (?, ?, ?, ?)', enrollments_data)

        extracurriculars_data = self.generate_extracurriculars(100)
        self.execute_many('INSERT INTO extracurriculars (student_id, activities) VALUES (?, ?)', extracurriculars_data)

    def generate_extracurriculars(self, num_students):
        activities = [
            "Intergalactic Chess Club", "Xenobotany Society", "Zero-G Sports Team",
            "Holographic Art Collective", "Time Travelers Association",
            "Alien Languages Club", "Quantum Computing Group", "Teleportation Ethics Board",
            "Cosmic Cuisine Cooking Club", "Extraterrestrial Music Ensemble"
        ]

        roles = ["President", "Vice President", "Secretary", "Member", "Member", "Member"]

        extracurriculars = []
        for student_id in range(1, num_students + 1):
            num_activities = random.randint(0, 3)
            student_activities = random.sample(activities, num_activities)
            activity_data = {
                "activities": [
                    {
                        "name": activity,
                        "role": random.choice(roles),
                        "hours_per_week": random.randint(1, 10),
                    } for activity in student_activities
                ]
            }
            extracurriculars.append((student_id, json.dumps(activity_data)))

        return extracurriculars

    def generate_students(self, num_students):
        first_names = ["Zorp", "Lira", "Blip", "Galax", "Nebula", "Quasar", "Zenith", "Vortex", "Aurora", "Neutron", "Cosmic", "Pulsar", "Gravity", "Quantum", "Stellar"]
        last_names = ["Xylax", "Starwhisper", "Neutron", "Stormrider", "Moonshadow", "Flux", "Stardust", "Cosmic", "Lightweave", "Starburst"]
        species_list = ["Zorlack", "Elf", "Robot", "Human", "Lunarian", "Energy Being", "Celestial", "Vortexian", "Photonic", "Chronovore"]
        planets = ["Xenon-7", "Eldoria", "Mechanica", "Earth", "Luna", "Novaria", "Astralis", "Whirlpool-9", "Lumina", "Temporia"]

        students = []
        for _ in range(num_students):
            name = f"{random.choice(first_names)} {random.choice(last_names)}"
            species = random.choice(species_list)
            planet = random.choice(planets)
            admission_date = (datetime(2340, 1, 1) + timedelta(days=random.randint(0, 365*10))).strftime('%Y-%m-%d')

            # age should be pareto -- most are near 18
            age = int(random.paretovariate(2) * 5) + 1

            # age = random.randint(9, 100)
            gpa = round(random.uniform(1.5, 4.0), 2)
            students.append((name, species, planet, admission_date, age, gpa))

        return students

    def generate_classes(self, num_classes):
        class_names = ["Astro-Navigation", "Xenobiology", "Quantum Mechanics", "Telepathy", "Lightsaber Combat",
                       "Wormhole Engineering", "Alien Linguistics", "Cosmic History", "Zero-G Athletics", "Interstellar Diplomacy"]
        instructors = ["Dr.", "Professor", "Master", "Captain", "Archivist", "Ambassador"]
        departments = ["Navigation", "Biology", "Physics", "Psionics", "Combat", "Engineering", "Languages", "History", "Physical Education", "Politics"]

        classes = []
        for i in range(num_classes):
            name = random.choice(class_names)
            instructor = f"{random.choice(instructors)} {random.choice(['Alpha', 'Beta', 'Gamma', 'Delta', 'Epsilon'])}"
            max_capacity = random.randint(15, 50)
            credits = random.randint(1, 5)
            department = departments[class_names.index(name)]

            # Add number to class (after adding department)
            name = f"{name} {random.choice([101,102,201,202,301,302,401,402])}"

            classes.append((name, instructor, max_capacity, credits, department))


        return classes

    def generate_enrollments(self, num_students, num_classes):
        enrollments = []
        for student_id in range(1, num_students + 1):
            num_enrollments = random.randint(0, 6)
            for _ in range(num_enrollments):
                class_id = random.randint(1, num_classes)
                enrollment_date = (datetime(2340, 1, 1) + timedelta(days=random.randint(0, 365*10))).strftime('%Y-%m-%d')
                grade = round(random.uniform(2.0, 4.0), 1) if random.random() > 0.1 else None
                enrollments.append((student_id, class_id, enrollment_date, grade))

        return enrollments

class QuizDatabase(DatabaseManager):
    def __init__(self, db_path='quiz_questions.db'):
        super().__init__(db_path)

    def create_quiz_database(self):
        self.execute_query('''
        CREATE TABLE IF NOT EXISTS quiz_questions (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            question TEXT NOT NULL,
            answer TEXT NOT NULL,
            problem_set TEXT NOT NULL
        )
        ''')

    def add_quiz_question(self, question, answer, problem_set):
        self.execute_query('INSERT INTO quiz_questions (question, answer, problem_set) VALUES (?, ?, ?)',
                           (question, answer, problem_set))

    def load_questions(self, problem_set=None):
        if problem_set:
            return self.execute_query('SELECT question, answer FROM quiz_questions WHERE problem_set = ?', (problem_set,))
        else:
            return self.execute_query('SELECT question, answer FROM quiz_questions')

    def add_default_questions(self):
        default_questions = [
            ("List all student names and their species.", "SELECT name, species FROM students", "basic"),
            ("How many students are enrolled in each class?", "SELECT c.class_name, COUNT(e.student_id) as enrolled_students FROM classes c LEFT JOIN enrollments e ON c.class_id = e.class_id GROUP BY c.class_id", "intermediate"),
            ("What is the average class size?", "SELECT AVG(enrolled_students) as average_class_size FROM (SELECT c.class_id, COUNT(e.student_id) as enrolled_students FROM classes c LEFT JOIN enrollments e ON c.class_id = e.class_id GROUP BY c.class_id)", "intermediate"),
            ("List all classes with their instructors, ordered by class name.", "SELECT class_name, instructor FROM classes ORDER BY class_name", "basic"),
            ("Find the student(s) enrolled in the most classes.", "SELECT s.name, COUNT(e.class_id) as num_classes FROM students s JOIN enrollments e ON s.student_id = e.student_id GROUP BY s.student_id ORDER BY num_classes DESC LIMIT 1", "advanced")
        ]
        for question, answer, problem_set in default_questions:
            self.add_quiz_question(question, answer, problem_set)

class QuizUI:
    def __init__(self, quiz):
        self.quiz = quiz
        self.text_area = widgets.Textarea(value='', placeholder='Type your SQL query here...', description='Query:', layout=widgets.Layout(width='60%', height='100px'))
        self.submit_button = widgets.Button(description="Submit")
        self.next_button = widgets.Button(description="Next Question", layout=widgets.Layout(visibility='hidden'))
        self.try_again_button = widgets.Button(description="Try Again", layout=widgets.Layout(visibility='hidden'))
        self.skip_button = widgets.Button(description="Skip Question")
        self.query_widget = widgets.VBox([self.text_area, widgets.HBox([self.submit_button, self.try_again_button, self.next_button, self.skip_button])])

        self.submit_button.on_click(self.submit_query)
        self.next_button.on_click(self.next_question)
        self.try_again_button.on_click(self.try_again)
        self.skip_button.on_click(self.skip_question)

    def display_current_question(self):
        clear_output(wait=True)
        display(HTML(self.quiz.render_table_schemas()))
        question_html = f"<h3>SQL Question {self.quiz.current_question + 1} of {self.quiz.total_questions}:</h3><p>{self.quiz.questions[self.quiz.current_question]}</p>"
        display(HTML(question_html))

        self.text_area.value = ''
        self.submit_button.layout.visibility = 'visible'
        self.next_button.layout.visibility = 'hidden'
        self.try_again_button.layout.visibility = 'hidden'
        self.skip_button.layout.visibility = 'visible'
        display(self.query_widget)

    def submit_query(self, button):
        self.quiz.submit_query(self.text_area.value.strip())
        self.update_button_visibility()

    def try_again(self, button):
        self.text_area.value = ''
        self.try_again_button.layout.visibility = 'hidden'
        self.display_current_question()

    def skip_question(self, button):
        self.quiz.skip_question()
        self.display_current_question()

    def next_question(self, button):
        if self.quiz.next_question():
            self.display_current_question()
        else:
            self.display_final_score()

    def update_button_visibility(self):
        self.submit_button.layout.visibility = 'hidden' if self.quiz.is_correct else 'visible'
        self.next_button.layout.visibility = 'visible' if self.quiz.is_correct else 'hidden'
        self.try_again_button.layout.visibility = 'visible' if not self.quiz.is_correct else 'hidden'
        self.skip_button.layout.visibility = 'hidden' if self.quiz.is_correct else 'visible'

    def display_final_score(self):
        clear_output(wait=True)
        attempted = len(self.quiz.attempted_questions)
        score_percentage = (self.quiz.score / attempted) * 100 if attempted > 0 else 0
        report_html = f"""
        <h2>Quiz Completed!</h2>
        <h3>Your Final Score: {self.quiz.score}/{attempted}</h3>
        <p>Percentage: {score_percentage:.2f}%</p>
        <p>Questions attempted: {attempted}/{self.quiz.total_questions}</p>
        """
        display(HTML(report_html))

class SQLQuiz:
    def __init__(self, quiz_db_path='quiz_questions.db', source_db_path='space_academy.db'):
        self.quiz_db = QuizDatabase(quiz_db_path)
        self.source_db = SpaceAcademyDatabase(source_db_path)
        self.current_question = 0
        self.questions = []
        self.answers = []
        self.score = 0
        self.total_questions = 0
        self.attempted_questions = set()
        self.is_correct = False
        self.ui = QuizUI(self)

    def run_quiz(self, problem_set=None):
        if not os.path.exists(self.source_db.db_path):
            self.source_db.create_default_database()
        if not os.path.exists(self.quiz_db.db_path):
            self.quiz_db.create_quiz_database()
            self.quiz_db.add_default_questions()

        questions_and_answers = self.quiz_db.load_questions(problem_set)
        self.questions, self.answers = zip(*questions_and_answers) if questions_and_answers else ([], [])

        if not self.questions:
            display(HTML(f"<div>No questions found{' for the specified problem set' if problem_set else ''}.</div>"))
            return

        self.total_questions = len(self.questions)
        self.ui.display_current_question()

    def render_table_schemas(self):
        schemas = self.source_db.get_table_schemas()
        schema_html = "<h2>Database Schema:</h2>"
        schema_html += "<ol>"
        for table_name, columns in schemas:
            column_info = ", ".join(f"{column[1]} {column[2]}" for column in columns)
            schema_html += f"<li><b>{table_name}</b> ({column_info})</li>"
        schema_html += "</ol>"

        schema_html += "<h3>Sample queries</h3>"
        sample_query = f'SELECT * FROM {schemas[0][0]} returns all rows and columns from {schemas[0][0]}.'
        schema_html += sample_query + "<br>"
        sample_query = f'SELECT {schemas[0][1][1][1]} FROM {schemas[0][0]} selects a specific column.'
        schema_html += sample_query
        return schema_html

    def submit_query(self, user_query):
        if not user_query.lower().startswith('select'):
            display(HTML("<div style='color: red;'><strong>Error:</strong> Please enter a valid SELECT query.</div>"))
            return

        try:
            with sqlite3.connect(self.source_db.db_path) as conn:
                user_result = pd.read_sql_query(user_query, conn)
                correct_query = self.answers[self.current_question]
                correct_result = pd.read_sql_query(correct_query, conn)

            self.is_correct = user_result.equals(correct_result)
            if self.is_correct:
                self.score += 1
                display(HTML("<div style='color: green;'><strong>Correct!</strong> Your query produced the expected result.</div>"))
                self.attempted_questions.add(self.current_question)
            else:
                display(HTML("<div style='color: red;'><strong>Incorrect.</strong> Your query did not produce the expected result. You can try again or skip to the next question.</div>"))

            display(HTML("<h4>Your Results:</h4>"))
            display(user_result)
            display(HTML("<h4>Expected Results:</h4>"))
            display(correct_result)

        except Exception as e:
            display(HTML(f"<div style='color: red;'><strong>Error:</strong> {str(e)}</div>"))

    def skip_question(self):
        self.attempted_questions.add(self.current_question)
        return self.next_question()

    def next_question(self):
        self.current_question += 1
        return self.current_question < len(self.questions)

# Usage example
quiz = SQLQuiz()
quiz.run_quiz()

VBox(children=(Textarea(value='', description='Query:', layout=Layout(height='100px', width='60%'), placeholde…

In [12]:
import sqlite3
import os

def setup_quiz_database(db_path='quiz_questions.db'):
    # Create the database if it doesn't exist
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()

    # Create the questions table
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS questions (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        question TEXT NOT NULL,
        answer TEXT NOT NULL,
        problem_set TEXT NOT NULL
    )
    ''')

    # Basic SQL questions
    basic_questions = [
        (
            "List the names of all students.",
            "SELECT name FROM students",
            "basic_sql"
        ),
        (
            "Show the names and species of all students, ordered alphabetically by name.",
            "SELECT name, species FROM students ORDER BY name",
            "basic_sql"
        ),
        (
            "Display the names of all students from the planet 'Earth'.",
            "SELECT name FROM students WHERE home_planet = 'Earth'",
            "basic_sql"
        ),
        (
            "List the names and ages of students who are older than 50 years.",
            "SELECT name, age FROM students WHERE age > 50",
            "basic_sql"
        ),
        (
            "Show the names and GPAs of the top 5 students, ordered by GPA in descending order.",
            "SELECT name, gpa FROM students ORDER BY gpa DESC LIMIT 5",
            "basic_sql"
        ),
        (
            "Display all information about students whose names start with 'A'.",
            "SELECT * FROM students WHERE name LIKE 'A%'",
            "basic_sql"
        ),
        (
            "List the names and home planets of students who are either Humans or Elves.",
            "SELECT name, home_planet FROM students WHERE species IN ('Human', 'Elf')",
            "basic_sql"
        ),
        (
            "Show the names and admission dates of students admitted before the year 2340.",
            "SELECT name, admission_date FROM students WHERE admission_date < '2340-01-01'",
            "basic_sql"
        ),
        (
            "Display the names and GPAs of students with a GPA between 3.0 and 3.5.",
            "SELECT name, gpa FROM students WHERE gpa BETWEEN 3.0 AND 3.5",
            "basic_sql"
        ),
        (
            "List all unique species in the student database.",
            "SELECT DISTINCT species FROM students",
            "basic_sql"
        ),
        (
            "Show the names and ages of the 3 youngest students.",
            "SELECT name, age FROM students ORDER BY age ASC LIMIT 3",
            "basic_sql"
        ),
        (
            "Display the names of students whose home planet is not 'Earth', ordered by species.",
            "SELECT name FROM students WHERE home_planet != 'Earth' ORDER BY species",
            "basic_sql"
        ),
        (
            "List the names and GPAs of students with a name ending in 'x'.",
            "SELECT name, gpa FROM students WHERE name LIKE '%x'",
            "basic_sql"
        ),
        (
            "Show all information for the student with the highest GPA.",
            "SELECT * FROM students ORDER BY gpa DESC LIMIT 1",
            "basic_sql"
        ),
        (
            "Display the names and home planets of students admitted in the year 2342.",
            "SELECT name, home_planet FROM students WHERE admission_date LIKE '2342%'",
            "basic_sql"
        ),
        (
            "List the names of students whose species contains the letter 'o'.",
            "SELECT name FROM students WHERE species LIKE '%o%'",
            "basic_sql"
        ),
        (
            "Show the names and ages of students, ordered first by age (descending) and then by name (ascending).",
            "SELECT name, age FROM students ORDER BY age DESC, name ASC",
            "basic_sql"
        ),

        (
            "List the names of all students except those from the 'Xenon-7' planet.",
            "SELECT name FROM students WHERE home_planet != 'Xenon-7'",
            "basic_sql"
        ),
        (
            "Show the names and admission dates of the 10 most recently admitted students.",
            "SELECT name, admission_date FROM students ORDER BY admission_date DESC LIMIT 10",
            "basic_sql"
        )
    ]

        # Aggregate and Math questions
    aggregate_math_questions = [
        (
            "What is the total number of students in the database?",
            "SELECT COUNT(*) FROM students",
            "aggregate_math"
        ),
        (
            "What is the average age of all students?",
            "SELECT AVG(age) FROM students",
            "aggregate_math"
        ),
        (
            "What is the highest GPA among all students?",
            "SELECT MAX(gpa) FROM students",
            "aggregate_math"
        ),
        (
            "What is the lowest GPA among all students?",
            "SELECT MIN(gpa) FROM students",
            "aggregate_math"
        ),
        (
            "What is the sum of all students' ages?",
            "SELECT SUM(age) FROM students",
            "aggregate_math"
        ),
        (
            "What is the age difference between the oldest and youngest student?",
            "SELECT MAX(age) - MIN(age) FROM students",
            "aggregate_math"
        ),
        (
            "What is the average GPA rounded to two decimal places?",
            "SELECT ROUND(AVG(gpa), 2) FROM students",
            "aggregate_math"
        ),
        (
            "How many distinct home planets are represented in the student body?",
            "SELECT COUNT(DISTINCT home_planet) FROM students",
            "aggregate_math"
        ),
        (
            "What is the total age of all students from Earth?",
            "SELECT SUM(age) FROM students WHERE home_planet = 'Earth'",
            "aggregate_math"
        ),
        (
            "What is the average age of students with a GPA above 3.5?",
            "SELECT AVG(age) FROM students WHERE gpa > 3.5",
            "aggregate_math"
        ),
        (
            "What is the difference between the highest and lowest GPA?",
            "SELECT MAX(gpa) - MIN(gpa) FROM students",
            "aggregate_math"
        ),
        (
            "How many students have a name longer than 15 characters?",
            "SELECT COUNT(*) FROM students WHERE LENGTH(name) > 15",
            "aggregate_math"
        ),
        (
            "What is the average length of student names?",
            "SELECT AVG(LENGTH(name)) FROM students",
            "aggregate_math"
        ),
        (
            "What is the total number of credits for all classes?",
            "SELECT SUM(credits) FROM classes",
            "aggregate_math"
        ),
        (
            "What is the average maximum capacity of all classes?",
            "SELECT AVG(max_capacity) FROM classes",
            "aggregate_math"
        ),
        (
            "How many enrollments have a grade (are not NULL)?",
            "SELECT COUNT(grade) FROM enrollments",
            "aggregate_math"
        ),
        (
            "What is the highest grade given in any class?",
            "SELECT MAX(grade) FROM enrollments",
            "aggregate_math"
        ),

        (
            "How many students have a GPA within 0.5 points of the average GPA?",
            "SELECT COUNT(*) FROM students WHERE ABS(gpa - (SELECT AVG(gpa) FROM students)) <= 0.5",
            "aggregate_math"
        ),
        (
            "What is the total age of all students divided by the total number of students?",
            "SELECT SUM(age) * 1.0 / COUNT(*) FROM students",
            "aggregate_math"
        )
    ]

    join_questions = [
        (
            "List the names of all students along with the classes they are enrolled in.",
            "SELECT s.name, c.class_name FROM students s INNER JOIN enrollments e ON s.student_id = e.student_id INNER JOIN classes c ON e.class_id = c.class_id",
            "join_queries"
        ),
        (
            "Show the names of students and their instructors for all enrollments. Use 'student_name' as the alias for the student's name.",
            "SELECT s.name AS student_name, c.instructor FROM students s INNER JOIN enrollments e ON s.student_id = e.student_id INNER JOIN classes c ON e.class_id = c.class_id",
            "join_queries"
        ),
        (
            "Display the names of all students and the number of classes they are enrolled in. Use 'num_classes' as the alias for the count.",
            "SELECT s.name, COUNT(e.class_id) AS num_classes FROM students s LEFT JOIN enrollments e ON s.student_id = e.student_id GROUP BY s.student_id",
            "join_queries"
        ),
        (
            "List all classes along with the number of students enrolled in each. Use 'num_students' as the alias for the count.",
            "SELECT c.class_name, COUNT(e.student_id) AS num_students FROM classes c LEFT JOIN enrollments e ON c.class_id = e.class_id GROUP BY c.class_id",
            "join_queries"
        ),
        (
            "Show the names of students who are enrolled in classes starting with 'Astro-Navigation'.",
            "SELECT DISTINCT s.name FROM students s INNER JOIN enrollments e ON s.student_id = e.student_id INNER JOIN classes c ON e.class_id = c.class_id WHERE c.class_name LIKE 'Astro-Navigation 101'",
            "join_queries"
        ),
        (
            "Display the names of students and their grades in classes starting with 'Quantum Mechanics'.",
            "SELECT s.name, e.grade FROM students s INNER JOIN enrollments e ON s.student_id = e.student_id INNER JOIN classes c ON e.class_id = c.class_id WHERE c.class_name LIKE 'Quantum Mechanics'",
            "join_queries"
        ),
        (
            "List all classes with no enrolled students.",
            "SELECT c.class_name FROM classes c LEFT JOIN enrollments e ON c.class_id = e.class_id WHERE e.enrollment_id IS NULL",
            "join_queries"
        ),
        (
            "Show the names of students who are not enrolled in any classes.",
            "SELECT s.name FROM students s LEFT JOIN enrollments e ON s.student_id = e.student_id WHERE e.enrollment_id IS NULL",
            "join_queries"
        ),
        (
            "Display the names of students and their average grades across all classes. Use 'avg_grade' as the alias for the average grade.",
            "SELECT s.name, AVG(e.grade) AS avg_grade FROM students s LEFT JOIN enrollments e ON s.student_id = e.student_id GROUP BY s.student_id",
            "join_queries"
        ),
        (
            "List the names of instructors and the number of students in their largest class. Use 'max_students' as the alias for the maximum number of students.",
            "SELECT c.instructor, MAX(class_size) AS max_students FROM (SELECT class_id, COUNT(student_id) AS class_size FROM enrollments GROUP BY class_id) AS class_sizes INNER JOIN classes c ON class_sizes.class_id = c.class_id GROUP BY c.instructor",
            "join_queries"
        ),
        (
            "Show the names of students enrolled in more than 2 classes.",
            "SELECT s.name FROM students s INNER JOIN enrollments e ON s.student_id = e.student_id GROUP BY s.student_id HAVING COUNT(DISTINCT e.class_id) > 2",
            "join_queries"
        ),
        (
            "Display the classes with the highest average grade. Use 'avg_grade' as the alias for the average grade.",
            "SELECT c.class_name, AVG(e.grade) AS avg_grade FROM classes c INNER JOIN enrollments e ON c.class_id = e.class_id GROUP BY c.class_id ORDER BY avg_grade DESC LIMIT 1",
            "join_queries"
        ),
        (
            "List students and their grades who are enrolled in classes with more than 3 credits.",
            "SELECT s.name, c.class_name, e.grade FROM students s INNER JOIN enrollments e ON s.student_id = e.student_id INNER JOIN classes c ON e.class_id = c.class_id WHERE c.credits > 3",
            "join_queries"
        ),
        (
            "Show the names of students and their latest enrollment date. Use 'latest_enrollment' as the alias for the maximum enrollment date.",
            "SELECT s.name, MAX(e.enrollment_date) AS latest_enrollment FROM students s LEFT JOIN enrollments e ON s.student_id = e.student_id GROUP BY s.student_id",
            "join_queries"
        ),
        (
            "Display the names of instructors and the total credits of classes they teach. Use 'total_credits' as the alias for the sum of credits.",
            "SELECT c.instructor, SUM(c.credits) AS total_credits FROM classes c GROUP BY c.instructor",
            "join_queries"
        ),
        (
            "List all students and their enrolled classes, including students with no enrollments.",
            "SELECT s.name, c.class_name FROM students s LEFT JOIN enrollments e ON s.student_id = e.student_id LEFT JOIN classes c ON e.class_id = c.class_id",
            "join_queries"
        ),

        (
            "Display the names of students and their grades for classes in the 'Physics' department.",
            "SELECT s.name, c.class_name, e.grade FROM students s INNER JOIN enrollments e ON s.student_id = e.student_id INNER JOIN classes c ON e.class_id = c.class_id WHERE c.department = 'Physics'",
            "join_queries"
        ),
        (
            "List the names of students who have the same GPA as at least one other student.",
            "SELECT DISTINCT s1.name FROM students s1 INNER JOIN students s2 ON s1.gpa = s2.gpa AND s1.student_id != s2.student_id",
            "join_queries"
        ),
        (
            "Show the classes with more students enrolled than the average class size. Use 'num_students' as the alias for the count of students.",
            "SELECT c.class_name, COUNT(e.student_id) AS num_students FROM classes c INNER JOIN enrollments e ON c.class_id = e.class_id GROUP BY c.class_id HAVING num_students > (SELECT AVG(class_size) FROM (SELECT class_id, COUNT(student_id) AS class_size FROM enrollments GROUP BY class_id))",
            "join_queries"
        )
    ]

    set_operations_questions = [
        (
            "List all unique home planets of students and all unique departments of classes. Use UNION to combine the results.",
            """
            SELECT home_planet AS location FROM students
            UNION
            SELECT department AS location FROM classes
            """,
            "set_operations"
        ),
        (
            "List student names that do not appear in the enrollments table. Use EXCEPT.",
            """
            SELECT name FROM students
            EXCEPT
            SELECT s.name FROM students s
            INNER JOIN enrollments e ON s.student_id = e.student_id
            """,
            "set_operations"
        ),
        (
            "Find ages that appear both in the students table and as class credits. Use INTERSECT.",
            """
            SELECT age FROM students
            INTERSECT
            SELECT credits FROM classes
            """,
            "set_operations"
        ),
        (
            "Show all unique student ids from both the students table and the enrollments table. Use UNION.",
            """
            SELECT student_id FROM students
            UNION
            SELECT student_id FROM enrollments
            """,
            "set_operations"
        ),
        (
            "Find student ids that appear in both the students table and the enrollments table. Use INTERSECT.",
            """
            SELECT student_id FROM students
            INTERSECT
            SELECT student_id FROM enrollments
            """,
            "set_operations"
        ),
        (
            "Show student ids from the students table that do not appear in the enrollments table. Use EXCEPT.",
            """
            SELECT student_id FROM students
            EXCEPT
            SELECT student_id FROM enrollments
            """,
            "set_operations"
        )
    ]

    # Insert the questions into the database
    cursor.executemany('INSERT INTO questions (question, answer, problem_set) VALUES (?, ?, ?)', set_operations_questions)
    cursor.executemany('INSERT INTO questions (question, answer, problem_set) VALUES (?, ?, ?)', join_questions)
    cursor.executemany('INSERT INTO questions (question, answer, problem_set) VALUES (?, ?, ?)', basic_questions)
    cursor.executemany('INSERT INTO questions (question, answer, problem_set) VALUES (?, ?, ?)', aggregate_math_questions)

    # Commit the changes and close the connection
    conn.commit()
    conn.close()

    print(f"Quiz database created and populated with basic SQL questions at {db_path}")

setup_quiz_database()

Quiz database created and populated with basic SQL questions at quiz_questions.db


In [13]:
import sqlite3
import pandas as pd

def validate_questions(quiz_db_path='quiz_questions.db', space_academy_db_path='space_academy.db'):
    # Connect to the quiz database
    quiz_conn = sqlite3.connect(quiz_db_path)
    quiz_cursor = quiz_conn.cursor()

    # Connect to the space academy database
    space_conn = sqlite3.connect(space_academy_db_path)

    # Fetch all questions from the quiz database
    quiz_cursor.execute("SELECT id, question, answer, problem_set FROM questions")
    questions = quiz_cursor.fetchall()

    issues = []

    for q_id, question, answer, problem_set in questions:
        try:
            # Execute the query
            df = pd.read_sql_query(answer, space_conn)

            # Check if the result is empty
            if df.empty:
                issues.append(f"Question {q_id} ({problem_set}) returns no results: {question}")
        except Exception as e:
            issues.append(f"Question {q_id} ({problem_set}) raises an error: {question}\nError: {str(e)}")

    # Close connections
    quiz_conn.close()
    space_conn.close()

    # Report issues
    if issues:
        print("The following issues were found:")
        for issue in issues:
            print(issue)
            print("-" * 50)
    else:
        print("All questions validated successfully!")

validate_questions()

The following issues were found:
Question 3 (set_operations) returns no results: Find ages that appear both in the students table and as class credits. Use INTERSECT.
--------------------------------------------------
Question 11 (join_queries) returns no results: Show the names of students who are enrolled in classes starting with 'Astro-Navigation'.
--------------------------------------------------
Question 12 (join_queries) returns no results: Display the names of students and their grades in classes starting with 'Quantum Mechanics'.
--------------------------------------------------
Question 13 (join_queries) returns no results: List all classes with no enrolled students.
--------------------------------------------------
Question 33 (basic_sql) returns no results: Show the names and admission dates of students admitted before the year 2340.
--------------------------------------------------
