In [1]:
import pandas as pd
from openai import OpenAI
from dotenv import load_dotenv
import time
import argparse
import csv
import os

from _0_prepare_tasks import clean_csv
from _1_translate_tasks import translate_csv
from gemini._2_solve_tasks_gemini import solve_tasks
from gemini._3_technical_terms_gemini import extract_technical_terms
from gemini._4_evaluate_solution_gemini import evaluate_solutions
from _5_cross_lang_validation import compare_results

  from .autonotebook import tqdm as notebook_tqdm


## Data collection

#### Cleaning exercises list file

In [2]:
input_exercises = "topic_areas.csv"
cleaned_exercises = "topic_areas_cleaned.csv"

clean_csv(input_exercises, cleaned_exercises)

print(f"Cleaned exercises saved to {cleaned_exercises}")

Cleaned exercises saved to topic_areas_cleaned.csv


### Translate exercises to German and Arabic

In [3]:
translated_exercises_de = "topic_areas_cleaned_de.csv"
translated_exercises_ar = "topic_areas_cleaned_ar.csv"

**German translation**

In [4]:
translate_csv(cleaned_exercises, translated_exercises_de, target_language="German")
print(f"Translated tasks to German saved to {translated_exercises_de}")

Translating exercises from English to German...
Translation to German complete. CSV file saved as: topic_areas_cleaned_de.csv.
Translated tasks to German saved to topic_areas_cleaned_de.csv


**Arabic translation**

In [5]:
translate_csv(cleaned_exercises, translated_exercises_ar, target_language="Arabic")
print(f"Translated tasks to Arabic saved to {translated_exercises_ar}")

Translating exercises from English to Arabic...
Translation to Arabic complete. CSV file saved as: topic_areas_cleaned_ar.csv.
Translated tasks to Arabic saved to topic_areas_cleaned_ar.csv


## Solving Exercises

In [4]:
solved_exercises_en = "gemini/topic_areas_solutions_en.csv"
solved_exercises_de = "gemini/topic_areas_solutions_de.csv"
solved_exercises_ar = "gemini/topic_areas_solutions_ar.csv"

English Solutions:

In [5]:
solve_tasks(cleaned_exercises, solved_exercises_en)
print(f"Solved tasks in English saved to {solved_exercises_en}")

df_en = pd.read_csv(solved_exercises_en)
print(df_en.head())

Solving task 1: Draw 10 stars in the box and color 5 of them blue.
Solving task 2: Count the number of apples in the picture and write the total number in the empty circle below.
Solving task 3: Write the numbers from 1 to 20 in order on the lines provided.
Solving task 4: Draw lines to connect the numbers 1 to 20 with their matching dot patterns.
Solving task 5: Circle the number that has the same number of dots underneath it; for example if you see 5 dots circle the number 5.
Solving task 6: Fill in the blanks with the correct number symbol that matches the dot patterns shown on the page.
Solving task 7: Emma has 24 apples and she gives 13 apples to her friend; how many apples does she have left?
Solving task 8: A farmer has 36 carrots and picks another 12 carrots; how many carrots does he have in total?
Solving task 9: Liam read 45 pages of his book then finished 22 more pages; how many pages did he read altogether?
Solving task 10: Sarah has 8 boxes of cookies and each box contains

German Solutions:

In [6]:
solve_tasks(translated_exercises_de, solved_exercises_de)
print(f"Solved tasks in German saved to {solved_exercises_de}")

df_de = pd.read_csv(solved_exercises_de)
print(df_de.head())

Solving task 1: Zeichne 10 Sterne in das Kästchen und male 5 davon blau.
Solving task 2: Zähle die Anzahl der Äpfel im Bild und schreibe die Gesamtsumme in den leeren Kreis darunter.
Solving task 3: Schreibe die Zahlen von 1 bis 20 der Reihenfolge nach auf die vorgesehenen Linien.
Solving task 4: Zeichne Linien, um die Zahlen 1 bis 20 mit ihren passenden Punktmustern zu verbinden.
Solving task 5: Umkreise die Zahl, die die gleiche Anzahl von Punkten darunter hat; zum Beispiel, wenn du 5 Punkte siehst, umkreise die Zahl 5.
Solving task 6: Fülle die Lücken mit dem richtigen Zahlensymbol aus, das den auf der Seite gezeigten Punktmustern entspricht.
Solving task 7: Emma hat 24 Äpfel und sie gibt 13 Äpfel an ihre Freundin; wie viele Äpfel hat sie noch?
Solving task 8: Ein Bauer hat 36 Karotten und pflückt weitere 12 Karotten; wie viele Karotten hat er insgesamt?
Solving task 9: Liam hat 45 Seiten seines Buches gelesen und dann 22 weitere Seiten beendet; wie viele Seiten hat er insgesamt gel

Arabic Solutions:

In [7]:
solve_tasks(translated_exercises_ar, solved_exercises_ar)
print(f"Solved tasks in Arabic saved to {solved_exercises_ar}")

df_ar = pd.read_csv(solved_exercises_ar)
print(df_ar.head())

Solving task 1: ارسم 10 نجوم في الصندوق والون 5 منها باللون الأزرق.
Solving task 2: احسب عدد التفاح في الصورة واكتب العدد الكلي في الدائرة الفارغة أدناه.
Solving task 3: اكتب الأرقام من 1 إلى 20 بالترتيب على الأسطر المتاحة.
Solving task 4: ارسم خطوطاً لربط الأرقام من 1 إلى 20 بأنماط النقاط المطابقة لها.
Solving task 5: قم بدائرة الرقم الذي يحتوي على نفس عدد النقاط الموجودة تحته؛ على سبيل المثال، إذا رأيت 5 نقاط، قم بدائرة الرقم 5.
Solving task 6: املأ الفراغات بالرمز الرقمي الصحيح الذي يطابق أنماط النقاط الموضحة في الصفحة.
Solving task 7: إيما لديها 24 تفاحة وقد أعطت 13 تفاحة لصديقتها؛ كم عدد التفاح المتبقي لديها؟
Solving task 8: مزارع لديه 36 جزرة ويقطف 12 جزرة أخرى؛ كم عدد الجزر التي يمتلكها في المجموع؟
Solving task 9: ليام قرأ 45 صفحة من كتابه ثم أنهى 22 صفحة أخرى؛ فكم صفحًة قرأها إجمالًا؟
Solving task 10: سارة لديها 8 علب من الكعك وكل علبة تحتوي على 12 كعكة; كم عدد الكعك الذي تمتلكه في المجموع؟
Solving task 11: يمتلك مزارع 240 تفاحة ويريد تعبئتها في سلال بحيث تحتوي كل سلة على 15 تف

## Extracting technical terms

In [8]:
terms_en = "gemini/technical_terms_en.csv"
terms_de = "gemini/technical_terms_de.csv"
terms_ar = "gemini/technical_terms_ar.csv"

In [9]:
import importlib
import gemini._3_technical_terms_gemini

importlib.reload(gemini._3_technical_terms_gemini)
from gemini._3_technical_terms_gemini import extract_technical_terms

English terms

In [10]:
extract_technical_terms(cleaned_exercises, terms_en, target_language="en")
print(f"Extracted technical terms in English saved to {terms_en}")

Extracting technical terms for task 1: Draw 10 stars in the box and color 5 of them blue.
Extracting technical terms for task 2: Count the number of apples in the picture and write the total number in the empty circle below.
Extracting technical terms for task 3: Write the numbers from 1 to 20 in order on the lines provided.
Extracting technical terms for task 4: Draw lines to connect the numbers 1 to 20 with their matching dot patterns.
Extracting technical terms for task 5: Circle the number that has the same number of dots underneath it; for example if you see 5 dots circle the number 5.
Extracting technical terms for task 6: Fill in the blanks with the correct number symbol that matches the dot patterns shown on the page.
Extracting technical terms for task 7: Emma has 24 apples and she gives 13 apples to her friend; how many apples does she have left?
Extracting technical terms for task 8: A farmer has 36 carrots and picks another 12 carrots; how many carrots does he have in total

German terms

In [11]:
extract_technical_terms(translated_exercises_de, terms_de, target_language="de")
print(f"Extracted technical terms in German saved to {terms_de}")

Extracting technical terms for task 1: Zeichne 10 Sterne in das Kästchen und male 5 davon blau.
Extracting technical terms for task 2: Zähle die Anzahl der Äpfel im Bild und schreibe die Gesamtsumme in den leeren Kreis darunter.
Extracting technical terms for task 3: Schreibe die Zahlen von 1 bis 20 der Reihenfolge nach auf die vorgesehenen Linien.
Extracting technical terms for task 4: Zeichne Linien, um die Zahlen 1 bis 20 mit ihren passenden Punktmustern zu verbinden.
Extracting technical terms for task 5: Umkreise die Zahl, die die gleiche Anzahl von Punkten darunter hat; zum Beispiel, wenn du 5 Punkte siehst, umkreise die Zahl 5.
Extracting technical terms for task 6: Fülle die Lücken mit dem richtigen Zahlensymbol aus, das den auf der Seite gezeigten Punktmustern entspricht.
Extracting technical terms for task 7: Emma hat 24 Äpfel und sie gibt 13 Äpfel an ihre Freundin; wie viele Äpfel hat sie noch?
Extracting technical terms for task 8: Ein Bauer hat 36 Karotten und pflückt weit

Arabic terms

In [12]:
extract_technical_terms(translated_exercises_ar, terms_ar, target_language="ar")
print(f"Extracted technical terms in Arabic saved to {terms_ar}")

Extracting technical terms for task 1: ارسم 10 نجوم في الصندوق والون 5 منها باللون الأزرق.
Extracting technical terms for task 2: احسب عدد التفاح في الصورة واكتب العدد الكلي في الدائرة الفارغة أدناه.
Extracting technical terms for task 3: اكتب الأرقام من 1 إلى 20 بالترتيب على الأسطر المتاحة.
Extracting technical terms for task 4: ارسم خطوطاً لربط الأرقام من 1 إلى 20 بأنماط النقاط المطابقة لها.
Extracting technical terms for task 5: قم بدائرة الرقم الذي يحتوي على نفس عدد النقاط الموجودة تحته؛ على سبيل المثال، إذا رأيت 5 نقاط، قم بدائرة الرقم 5.
Extracting technical terms for task 6: املأ الفراغات بالرمز الرقمي الصحيح الذي يطابق أنماط النقاط الموضحة في الصفحة.
Extracting technical terms for task 7: إيما لديها 24 تفاحة وقد أعطت 13 تفاحة لصديقتها؛ كم عدد التفاح المتبقي لديها؟
Extracting technical terms for task 8: مزارع لديه 36 جزرة ويقطف 12 جزرة أخرى؛ كم عدد الجزر التي يمتلكها في المجموع؟
Extracting technical terms for task 9: ليام قرأ 45 صفحة من كتابه ثم أنهى 22 صفحة أخرى؛ فكم صفحًة قرأه

## Evaluating Solutions

In [13]:
evaluations_en = "gemini/topic_areas_evaluations_en.csv"
evaluations_de = "gemini/topic_areas_evaluations_de.csv"
evaluations_ar = "gemini/topic_areas_evaluations_ar.csv"

In [14]:
evaluate_solutions(solved_exercises_en, terms_en, evaluations_en)
print(f"Evaluation of solutions in English saved to {evaluations_en}")

Evaluating solution for task 1: Draw 10 stars in the box and color 5 of them blue.
Evaluating solution for task 2: Count the number of apples in the picture and write the total number in the empty circle below.
Evaluating solution for task 3: Write the numbers from 1 to 20 in order on the lines provided.
Evaluating solution for task 4: Draw lines to connect the numbers 1 to 20 with their matching dot patterns.
Evaluating solution for task 5: Circle the number that has the same number of dots underneath it; for example if you see 5 dots circle the number 5.
Evaluating solution for task 6: Fill in the blanks with the correct number symbol that matches the dot patterns shown on the page.
Evaluating solution for task 7: Emma has 24 apples and she gives 13 apples to her friend; how many apples does she have left?
Evaluating solution for task 8: A farmer has 36 carrots and picks another 12 carrots; how many carrots does he have in total?
Evaluating solution for task 9: Liam read 45 pages of 

In [15]:
evaluate_solutions(solved_exercises_de, terms_de, evaluations_de)
print(f"Evaluation of solutions in German saved to {evaluations_de}")

Evaluating solution for task 1: Zeichne 10 Sterne in das Kästchen und male 5 davon blau.
Error evaluating task 1: 429 Resource has been exhausted (e.g. check quota).
Evaluating solution for task 2: Zähle die Anzahl der Äpfel im Bild und schreibe die Gesamtsumme in den leeren Kreis darunter.
Error evaluating task 2: 429 Resource has been exhausted (e.g. check quota).
Evaluating solution for task 3: Schreibe die Zahlen von 1 bis 20 der Reihenfolge nach auf die vorgesehenen Linien.
Error evaluating task 3: 429 Resource has been exhausted (e.g. check quota).
Evaluating solution for task 4: Zeichne Linien, um die Zahlen 1 bis 20 mit ihren passenden Punktmustern zu verbinden.
Error evaluating task 4: 429 Resource has been exhausted (e.g. check quota).
Evaluating solution for task 5: Umkreise die Zahl, die die gleiche Anzahl von Punkten darunter hat; zum Beispiel, wenn du 5 Punkte siehst, umkreise die Zahl 5.
Error evaluating task 5: 429 Resource has been exhausted (e.g. check quota).
Evaluat

In [16]:
evaluate_solutions(solved_exercises_ar, terms_ar, evaluations_ar)
print(f"Evaluation of solutions in Arabic saved to {evaluations_ar}")

Evaluating solution for task 1: ارسم 10 نجوم في الصندوق والون 5 منها باللون الأزرق.
Error evaluating task 1: 429 Resource has been exhausted (e.g. check quota).
Evaluating solution for task 2: احسب عدد التفاح في الصورة واكتب العدد الكلي في الدائرة الفارغة أدناه.
Error evaluating task 2: 429 Resource has been exhausted (e.g. check quota).
Evaluating solution for task 3: اكتب الأرقام من 1 إلى 20 بالترتيب على الأسطر المتاحة.
Error evaluating task 3: 429 Resource has been exhausted (e.g. check quota).
Evaluating solution for task 4: ارسم خطوطاً لربط الأرقام من 1 إلى 20 بأنماط النقاط المطابقة لها.
Error evaluating task 4: 429 Resource has been exhausted (e.g. check quota).
Evaluating solution for task 5: قم بدائرة الرقم الذي يحتوي على نفس عدد النقاط الموجودة تحته؛ على سبيل المثال، إذا رأيت 5 نقاط، قم بدائرة الرقم 5.
Error evaluating task 5: 429 Resource has been exhausted (e.g. check quota).
Evaluating solution for task 6: املأ الفراغات بالرمز الرقمي الصحيح الذي يطابق أنماط النقاط الموضحة ف

## Results Comparisons

In [17]:
files = {
    "en": evaluations_en,
    "de": evaluations_de,
    "ar": evaluations_ar,
}

In [18]:
cross_validation_results = compare_results(files)

KeyError: "['Learning Appropriateness (Is the Explanation Suitable for Learners?)', 'Generalization (Can the Learner Apply This Method to Similar Problems?)', 'Appropriateness Based on Progress Level (Grade)'] not in index"

In [None]:
comparison_results = pd.read_csv('comparison_results.csv')
highest_averages = pd.read_csv('highest_averages.csv')

print(comparison_results.head())
print(highest_averages.head())

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load the data from the CSV file
data = pd.read_csv("comparison_results.csv")

# Set the index of the DataFrame to the metrics column
data.set_index("Unnamed: 0", inplace=True)

# Create the heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(
    data,
    annot=True,  # Show the values in each cell
    cmap="YlGnBu",  # Color palette
    fmt=".2f",  # Format the numbers to 2 decimal places
    linewidths=0.5,  # Add space between cells
    cbar_kws={'label': 'Score'}  # Label for the color bar
)

# Add labels and title
plt.title("Comparison Heatmap of Scores by Language and Metric", fontsize=14)
plt.xlabel("Language", fontsize=12)
plt.ylabel("Metrics", fontsize=12)

# Rotate x-axis labels for readability
plt.xticks(rotation=45, ha='right', fontsize=10)

# Show the plot
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Load comparison results data
comparison_results = pd.read_csv('comparison_results.csv')

# Reshape data for easy plotting
comparison_results = comparison_results.set_index('Language').T

# Create a grouped bar plot
plt.figure(figsize=(10, 6))
sns.barplot(data=comparison_results)
plt.title('Comparison of Languages Across Different Metrics')
plt.ylabel('Scores')
plt.xlabel('Metrics')
plt.xticks(rotation=45, ha='right')
plt.show()