In [36]:
import pandas as pd
import os

# Load the CSV file as a pandas DataFrame
csv_file_path = r"g:\My Drive\Colab Notebooks\llm_harness\model_answers_qwen3_8b_thinking_multiprocess.csv"

# Check if file exists
if os.path.exists(csv_file_path):
    # Load the CSV into a DataFrame with UTF-8 encoding to handle Turkish characters
    df = pd.read_csv(csv_file_path, encoding='utf-8')
    
    print(f"Successfully loaded CSV file: {csv_file_path}")
    print(f"DataFrame shape: {df.shape}")
    print(f"Columns: {list(df.columns)}")
    print("\nFirst few rows:")
    print(df.head())
    
    print("\nDataFrame info:")
    print(df.info())
    
    print("\nBasic statistics:")
    print(df.describe())
    
else:
    print(f"File not found: {csv_file_path}")

Successfully loaded CSV file: g:\My Drive\Colab Notebooks\llm_harness\model_answers_qwen3_8b_thinking_multiprocess.csv
DataFrame shape: (291, 10)
Columns: ['question_number_in_dataset', 'section', 'question', 'thinking_content', 'raw_content_after_thinking', 'parsed_answer_index', 'correct_answer_index', 'correct', 'gpu_id', 'timestamp']

First few rows:
   question_number_in_dataset                                         section  \
0                           6  TURK-DILI-VE-EDEBIYATI-SOSYAL-BILIMLER-1-TESTI   
1                           5  TURK-DILI-VE-EDEBIYATI-SOSYAL-BILIMLER-1-TESTI   
2                           4  TURK-DILI-VE-EDEBIYATI-SOSYAL-BILIMLER-1-TESTI   
3                           3  TURK-DILI-VE-EDEBIYATI-SOSYAL-BILIMLER-1-TESTI   
4                           1  TURK-DILI-VE-EDEBIYATI-SOSYAL-BILIMLER-1-TESTI   

                                            question  \
0  Bu parçada güvercinlerle ilgili aşağıdakilerin...   
1  Bu parçada söz edilen hafıza geliştirme p

In [37]:
#print all columns
print("\nAll columns in the DataFrame:")
print(df.columns.tolist())
# print only one row
print("\nFirst row of the DataFrame:")
print(df.iloc[80])
print(df.iloc[192])


All columns in the DataFrame:
['question_number_in_dataset', 'section', 'question', 'thinking_content', 'raw_content_after_thinking', 'parsed_answer_index', 'correct_answer_index', 'correct', 'gpu_id', 'timestamp']

First row of the DataFrame:
question_number_in_dataset                                                   40
section                          TURK-DILI-VE-EDEBIYATI-SOSYAL-BILIMLER-1-TESTI
question                      Bu sorunların azaltılması ve önlenmesinde aşağ...
thinking_content              <think>\nOkay, let's tackle this question. The...
raw_content_after_thinking                                                    3
parsed_answer_index                                                           3
correct_answer_index                                                          3
correct                                                                    True
gpu_id                                                                        7
timestamp                          

In [None]:


grouped_data = df.groupby('section').agg(
    total_questions=('question', 'count'),
    total_true=('correct', lambda x: (x == True).sum()),
    avarage_score=('correct', lambda x: (x == True).sum()/ x.count()), 
    # images included=('image', lambda x: x.notnull().sum())
    no_image=('has_image', lambda x: (x == False).sum()),
    # find the number of true answers in the rows where has_image is True
    true_without_image=('correct', lambda x: (x[df['has_image'] == False] == True).sum()),
    # no image accrcacy
    no_image_accuracy=('correct', lambda x: (x[df['has_image'] == False] == True).sum() / x[df['has_image'] == False].count())

)
# print the total number of questions, total true, average score, and total false for entire DataFrame

# Entire DataFrame statistics

print("\nStatistics for the entire DataFrame:")
print(f"Total number of questions: {df['question'].count()}")
print(f"Total number of true answers: {(df['correct'] == True).sum()}")
# total number of no image answers
print(f"Total number of no image answers: {(df['has_image'] == False).sum()}")
# total number of images with has_image == False and correct == True
print(f"Total number of true answers without image: {(df['correct'] == True) & (df['has_image'] == False).sum()}")

print(f"Average score: {(df['correct'] == True).sum() / df['correct_answer_index'].count()}")

# avarage score for no image answers
print(f"Average score for no image answers: {(df['correct'] == True) & (df['has_image'] == False).sum() / (df['has_image'] == False).sum()}")


# Display the grouped DataFrame
print("\nGrouped DataFrame by section:")
print(grouped_data)



Statistics for the entire DataFrame:
Total number of questions: 291
Total number of true answers: 200
Average score: 0.6872852233676976

Grouped DataFrame by section:
                                                total_questions  total_true  \
section                                                                       
FEN-BILIMLERI-TESTI                                          40          29   
MATEMATIK-TESTI                                              40          23   
SOSYAL-BILIMLER-2-TESTI                                      46          36   
SOSYAL-BILIMLER-TESTI                                        25          20   
TEMEL-FEN-BILIMLERI-TESTI                                    20          16   
TEMEL-MATEMATIK-TESTI                                        40          18   
TURK-DILI-VE-EDEBIYATI-SOSYAL-BILIMLER-1-TESTI               40          30   
TURKCE-TESTI                                                 40          28   

                                         

In [None]:
from datasets import load_dataset

dataset = load_dataset("AtlasPolat/yks2024", streaming=False)


# the dataset train contains a column named has_image which is a boolean value indicating whether the question has an image or not
# append the has_image column to the df DataFrame





  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Generating train split: 100%|██████████| 291/291 [00:00<00:00, 10237.99 examples/s]


DataFrame with has_image column:
   question_number_in_dataset                                         section  \
0                           6  TURK-DILI-VE-EDEBIYATI-SOSYAL-BILIMLER-1-TESTI   
1                           5  TURK-DILI-VE-EDEBIYATI-SOSYAL-BILIMLER-1-TESTI   
2                           4  TURK-DILI-VE-EDEBIYATI-SOSYAL-BILIMLER-1-TESTI   
3                           3  TURK-DILI-VE-EDEBIYATI-SOSYAL-BILIMLER-1-TESTI   
4                           1  TURK-DILI-VE-EDEBIYATI-SOSYAL-BILIMLER-1-TESTI   

                                            question  \
0  Bu parçada güvercinlerle ilgili aşağıdakilerin...   
1  Bu parçada söz edilen hafıza geliştirme progra...   
2  Bu parçanın yazarıyla ilgili aşağıdakilerden h...   
3  Bu parçadan aşağıdakilerin hangisine ulaşılabi...   
4  Bu cümlede boş bırakılan yerlere sırasıyla aşa...   

                                    thinking_content  \
0  <think>\nOkay, let's tackle this question. The...   
1  <think>\nOkay, let's tackle




In [39]:
print(dataset['train'].features)# Convert the dataset to a DataFrame
dataset_df = pd.DataFrame(dataset['train'])

print("\nDataset features:")
print(dataset_df.columns.tolist())

print(dataset_df.head())

{'section': Value(dtype='string', id=None), 'question_number': Value(dtype='int64', id=None), 'page': Value(dtype='int64', id=None), 'question': Value(dtype='string', id=None), 'has_image': Value(dtype='bool', id=None), 'passage': Value(dtype='string', id=None), 'choices': Sequence(feature=Value(dtype='string', id=None), length=-1, id=None), 'answer': Value(dtype='int64', id=None)}

Dataset features:
['section', 'question_number', 'page', 'question', 'has_image', 'passage', 'choices', 'answer']
                                          section  question_number  page  \
0  TURK-DILI-VE-EDEBIYATI-SOSYAL-BILIMLER-1-TESTI                1     3   
1  TURK-DILI-VE-EDEBIYATI-SOSYAL-BILIMLER-1-TESTI                2     3   
2  TURK-DILI-VE-EDEBIYATI-SOSYAL-BILIMLER-1-TESTI                3     3   
3  TURK-DILI-VE-EDEBIYATI-SOSYAL-BILIMLER-1-TESTI                4     4   
4  TURK-DILI-VE-EDEBIYATI-SOSYAL-BILIMLER-1-TESTI                5     4   

                                           

In [40]:

# go throught the pd and find matches with the section named and question_number_in_dataset
df['has_image'] = False  # Initialize the has_image column with False

for i, row in df.iterrows():
    section = row['section']
    question_number = row['question_number_in_dataset']
    
    # Find the matching row in the dataset_df
    match = dataset_df[(dataset_df['section'] == section) & (dataset_df['question_number'] == question_number)]
    if not match.empty:
        # If a match is found, update the has_image column
        df.at[i, 'has_image'] = match['has_image'].values[0]






In [27]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 291 entries, 0 to 290
Data columns (total 11 columns):
 #   Column                      Non-Null Count  Dtype 
---  ------                      --------------  ----- 
 0   question_number_in_dataset  291 non-null    int64 
 1   section                     291 non-null    object
 2   question                    291 non-null    object
 3   thinking_content            291 non-null    object
 4   raw_content_after_thinking  291 non-null    object
 5   parsed_answer_index         291 non-null    int64 
 6   correct_answer_index        291 non-null    int64 
 7   correct                     291 non-null    bool  
 8   gpu_id                      291 non-null    int64 
 9   timestamp                   291 non-null    object
 10  has_image                   291 non-null    bool  
dtypes: bool(2), int64(4), object(5)
memory usage: 21.2+ KB
None


In [41]:
print(df.iloc[100:150]['has_image'])  # Display a slice of the DataFrame to check the has_image column

100     True
101    False
102    False
103    False
104    False
105     True
106    False
107    False
108    False
109     True
110    False
111    False
112    False
113    False
114     True
115    False
116     True
117    False
118    False
119    False
120    False
121    False
122    False
123     True
124    False
125    False
126     True
127    False
128    False
129    False
130     True
131     True
132    False
133    False
134    False
135    False
136     True
137    False
138    False
139    False
140     True
141     True
142    False
143    False
144    False
145     True
146    False
147     True
148    False
149     True
Name: has_image, dtype: bool


In [42]:

# print the total number of questions with images by lambda function

total_questions_with_images = df['has_image'].map(lambda x: 1 if x  else 0).sum()

print(f"\nTotal number of questions with images: {total_questions_with_images}")


Total number of questions with images: 65
