In [1]:
import pandas as pd
from openai import OpenAI
from dotenv import load_dotenv
import time
import argparse
import csv
import os

from _0_prepare_tasks import clean_csv
from _1_translate_tasks import translate_csv
from _2_solve_tasks import solve_tasks
from _3_technical_terms import extract_technical_terms
from _4_evaluate_solution import evaluate_solutions
from _5_cross_lang_validation import compare_results

## Data collection

#### Cleaning exercises list file

In [2]:
input_exercises = "topic_areas.csv"
cleaned_exercises = "topic_areas_cleaned.csv"

clean_csv(input_exercises, cleaned_exercises)

print(f"Cleaned exercises saved to {cleaned_exercises}")

Cleaned exercises saved to topic_areas_cleaned.csv


### Translate exercises to German and Arabic

In [3]:
translated_exercises_de = "topic_areas_cleaned_de.csv"
translated_exercises_ar = "topic_areas_cleaned_ar.csv"

**German translation**

In [4]:
translate_csv(cleaned_exercises, translated_exercises_de, target_language="German")
print(f"Translated tasks to German saved to {translated_exercises_de}")

Translating exercises from English to German...
Translation to German complete. CSV file saved as: topic_areas_cleaned_de.csv.
Translated tasks to German saved to topic_areas_cleaned_de.csv


**Arabic translation**

In [5]:
translate_csv(cleaned_exercises, translated_exercises_ar, target_language="Arabic")
print(f"Translated tasks to Arabic saved to {translated_exercises_ar}")

Translating exercises from English to Arabic...
Translation to Arabic complete. CSV file saved as: topic_areas_cleaned_ar.csv.
Translated tasks to Arabic saved to topic_areas_cleaned_ar.csv


## Solving Exercises

In [7]:
solved_exercises_en = "topic_areas_solutions_en.csv"
solved_exercises_de = "topic_areas_solutions_de.csv"
solved_exercises_ar = "topic_areas_solutions_ar.csv"

In [8]:
def show_results_overview(df, model="gpt-4o-mini"):
    for index, row in df.iterrows():
        if index > 3:
            break;
        print(f"Topic Area: {row['Topic Area']}")
        print(f"Topic: {row['Topic']}")
        print(f"Progress Level: {row['Progress Level']}")
        print(f"Exercise: {row['Exercise']}")
        print()
        print(f"gpt-4o solution:")
        print(f"{row[f"{model} solution"]}")
        print(f"--------------------------\n")

English Solutions:

In [14]:
solve_tasks(cleaned_exercises, solved_exercises_en, model='gemini-1.5-flash')
print(f"Solved tasks in English saved to {solved_exercises_en}")

df_en = pd.read_csv(solved_exercises_en)
print(df_en.head())

Solving task 1: Count objects up to 20
Solving task 2: Write 15 in tens and ones
Solving task 3: Write 234 in expanded form
Solving task 4: Show ½ with a fraction bar
Solving task 5: Express 3.75 as a fraction
Solving task 6: Convert 27 (base 10) to base 2
Solving task 7: Plot negative numbers on a line
Solving task 8: Illustrate 3/5 as decimal and percent
Solving task 9: Rewrite 2.4×10³ in standard form
Solving task 10: Represent 125 in binary


NameError: name 'openai' is not defined

In [18]:
df_en = pd.read_csv(solved_exercises_en)

show_results_overview(df_en)

Topic Area: Zahlen und Operationen
Topic: Zahlen auffassen und darstellen
Progress Level: B
Exercise: Count objects up to 20

gpt-4o solution:
Counting objects up to 20 is a straightforward task that can be accomplished in a few simple steps. Here’s a guide on how to do it effectively:

### Steps to Count Objects Up to 20

1. **Gather Your Objects**: 
   - Collect all the objects you want to count. This could be anything, such as blocks, coins, buttons, or even drawings.

2. **Organize the Objects**: 
   - If the objects are scattered, it can be helpful to arrange them in a single line or grouped in small groups (e.g., groups of 5) to make counting easier.

3. **Begin Counting**:
   - Start from one and count each object one by one. 
   - As you count each object, say the number out loud and point to the object. This reinforcement helps ensure you don’t miss any objects or accidentally count one twice.

4. **Use a Method**:
   - If counting more than a handful of objects, consider usin

German Solutions:

In [None]:
solve_tasks(translated_exercises_de, solved_exercises_de)
print(f"Solved tasks in German saved to {solved_exercises_de}")

Solving task 1: Zähle Objekte bis 20
Solving task 2: Schreibe 15 in Zehnern und Einern.


In [None]:
df_en = pd.read_csv(solved_exercises_de)

show_results_overview(df_de)

Arabic Solutions:

In [None]:
solve_tasks(translated_exercises_ar, solved_exercises_ar)
print(f"Solved tasks in Arabic saved to {solved_exercises_ar}")

In [None]:
df_en = pd.read_csv(solved_exercises_ar)

show_results_overview(df_ar)

## Extracting technical terms

In [24]:
terms_en = "technical_terms_en.csv"
terms_de = "technical_terms_de.csv"
terms_ar = "technical_terms_ar.csv"

English terms

In [None]:
extract_technical_terms(solved_exercises_en, terms_en, target_language="en")
print(f"Extracted technical terms in English saved to {terms_en}")

German terms

In [None]:
extract_technical_terms(solved_exercises_de, terms_de, target_language="de")
print(f"Extracted technical terms in German saved to {terms_de}")

Arabic terms

In [None]:
extract_technical_terms(solved_exercises_ar, terms_ar, target_language="ar")
print(f"Extracted technical terms in Arabic saved to {terms_ar}")

## Evaluating Solutions

In [28]:
evaluations_en = "topic_areas_evaluations_en.csv"
evaluations_de = "topic_areas_evaluations_de.csv"
evaluations_ar = "topic_areas_evaluations_ar.csv"

In [None]:
evaluate_solutions(solved_exercises_en, terms_en, evaluations_en)
print(f"Evaluation of solutions in English saved to {evaluations_en}")

In [None]:
evaluate_solutions(solved_exercises_de, terms_de, evaluations_de)
print(f"Evaluation of solutions in German saved to {evaluations_de}")

In [None]:
evaluate_solutions(solved_exercises_ar, terms_ar, evaluations_ar)
print(f"Evaluation of solutions in Arabic saved to {evaluations_ar}")

## Results Comparisons

In [None]:
files = {
    "en": evaluations_en,
    "de": evaluations_de,
    "ar": evaluations_ar,
}

In [None]:
cross_validation_results = compare_results(files)

In [30]:
comparison_results = pd.read_csv('comparison_results.csv')
highest_averages = pd.read_csv('highest_averages.csv')

print(comparison_results.head())
print(highest_averages.head())

FileNotFoundError: [Errno 2] No such file or directory: 'comparison_results.csv'

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Load comparison results data
comparison_results = pd.read_csv('comparison_results.csv')

# Reshape data for easy plotting
comparison_results = comparison_results.set_index('Language').T

# Create a grouped bar plot
plt.figure(figsize=(10, 6))
sns.barplot(data=comparison_results)
plt.title('Comparison of Languages Across Different Metrics')
plt.ylabel('Scores')
plt.xlabel('Metrics')
plt.xticks(rotation=45, ha='right')
plt.show()