In [2]:
from datasets import load_dataset
from dotenv import load_dotenv
from google import genai

load_dotenv()
dataset = load_dataset("AI4Math/MathVista")

  from .autonotebook import tqdm as notebook_tqdm


## Test query on Gemini 2 Flash Thinking

In [56]:
client = genai.Client()

response = client.models.generate_content(
    model='gemini-2.0-flash-thinking-exp', contents=[
        dataset["testmini"][104]["decoded_image"],
        dataset["testmini"][104]["query"]
    ]
)

In [57]:
print(dataset["testmini"][104]["query"])

Hint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.
Question: Does Dark Violet have the minimum area under the curve?
Choices:
(A) yes
(B) no


In [58]:
print(response.candidates[0].content.parts[0].text)

The user is asking whether the "Dark Violet" line in the graph has the minimum area under the curve.
To answer this, I need to visually inspect the graph and determine which line consistently has the lowest y-values across the x-axis. The area under the curve is related to the sum of the y-values for each x-value.

From the graph, it's clear that the "Dark Violet" points are consistently at the lowest y-values compared to the other colors.


In [59]:
print(response.candidates[0].content.parts[1].text)

Answer: (A)


In [60]:
print(dataset["testmini"][104]["answer"])

yes


## Building Dataset on ~350 examples

In [8]:
import time
import pandas as pd
from google import genai

questions = []
thoughts = []
generated_answers = []
true_answers = []
indexes = []

In [9]:
client = genai.Client()

for i in range(350):
    print(f"step {i}")
    try:
        response = client.models.generate_content(
            model='gemini-2.0-flash-thinking-exp', contents=[
                dataset["testmini"][i]["decoded_image"],
                dataset["testmini"][i]["query"]
            ]
        )
        indexes.append(i)
        questions.append(dataset["testmini"][i]["query"])
        true_answers.append(dataset["testmini"][i]["answer"])
        try:
            thoughts.append(response.candidates[0].content.parts[0].text)
            generated_answers.append(response.candidates[0].content.parts[1].text)
        except:
            print("Query issue")
            thoughts.append("")
            generated_answers.append("")
        time.sleep(2)
    except:
        print("Flash issue")
df_dict = {"questions": questions,
           "thoughts": thoughts,
           "generated_answers": generated_answers,
           "true_answers": true_answers,
           "indexes": indexes}
df = pd.DataFrame.from_dict(df_dict)
df.to_csv("training_data/training.csv", index=False)

step 0
step 1
step 2
step 3
step 4
step 5
step 6
step 7
step 8
Query issue
step 9
step 10
step 11
step 12
step 13
step 14
step 15
step 16
Query issue
step 17
step 18
step 19
step 20
step 21
step 22
step 23
step 24
step 25
step 26
step 27
step 28
step 29
step 30
step 31
step 32
step 33
step 34
step 35
step 36
step 37
step 38
step 39
step 40
step 41
step 42
step 43
step 44
step 45
step 46
step 47
step 48
step 49
step 50
step 51
step 52
step 53
step 54
step 55
step 56
step 57
step 58
step 59
step 60
step 61
step 62
step 63
step 64
step 65
step 66
step 67
step 68
step 69
step 70
step 71
step 72
step 73
step 74
step 75
step 76
step 77
step 78
step 79
step 80
step 81
Query issue
step 82
step 83
step 84
step 85
step 86
step 87
step 88
step 89
step 90
step 91
step 92
step 93
step 94
step 95
step 96
step 97
step 98
step 99
step 100
step 101
step 102
step 103
step 104
step 105
step 106
step 107
step 108
step 109
step 110
step 111
step 112
step 113
step 114
step 115
step 116
step 117
Flash issue


In [74]:
print(len(questions))

344
