<a href="https://colab.research.google.com/github/ZygoOoade/Statistics_on_prompts/blob/main/Few_data_generation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install google-generativeai -q
!pip install datasets -q

In [5]:
from datasets import load_dataset
ds = load_dataset("qwedsacf/competition_math")

Downloading readme:   0%|          | 0.00/4.82k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/4.85M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/12500 [00:00<?, ? examples/s]

The following python code generates answers to the first 10 math problems presented in the dataset.

Gemini's answers are stored in a file named "gemini_responses.csv".

In [None]:
import os
import google.generativeai as genai
import csv
from tqdm import tqdm
import time

# Configure Gemini
genai.configure(api_key="GEMINI_API_KEY")

# Create the model
generation_config = {
  "temperature": 0.9,
  "top_p": 0.95,
  "top_k": 64,
  "response_mime_type": "text/plain",
}

model = genai.GenerativeModel(
  model_name="gemini-1.5-pro-exp-0801",
  generation_config=generation_config,
  safety_settings=[
    {"category": "HARM_CATEGORY_DANGEROUS", "threshold": "BLOCK_NONE"},
    {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
    {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
    {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
    {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
  ],
  system_instruction="You are a highly accomplished mathematician with a PhD in mathematics and multiple Fields Medals. You approach every problem with utmost rigor and ensure that every assertion is thoroughly proven. You break down complex problems into manageable steps, providing clear, detailed explanations at each stage. Precision and clarity are your guiding principles, and your solutions are both comprehensive and meticulous. When solving problems, you consider various methods and choose the most effective one, always verifying the correctness of each step.",
)

# Rate limiting parameters
requests_per_second = 1
max_requests_per_minute = 1

# Initialize timing variables
start_time = time.time()
request_times = []

def wait_for_rate_limit():
    current_time = time.time()

    # Remove request times older than 1 minute
    request_times[:] = [t for t in request_times if current_time - t < 60]

    # Check and wait for per-minute limit
    if len(request_times) >= max_requests_per_minute:
        sleep_time = 60 - (current_time - request_times[0])
        if sleep_time > 0:
            time.sleep(sleep_time)

    # Check and wait for per-second limit
    if request_times and current_time - request_times[-1] < 1/requests_per_second:
        time.sleep(1/requests_per_second - (current_time - request_times[-1]))

    # Add current request time
    request_times.append(time.time())

# Create a new chat session
chat_session = model.start_chat(history=[])

# Create and open the output CSV file
with open('gemini_responses.csv', 'w', newline='', encoding='utf-8') as csvfile:
    csv_writer = csv.writer(csvfile)

    # Iterate through all rows in the dataset
    for i in tqdm(range(10)):  # Using tqdm for progress bar
        # Get the prompt from the first column of the current row
        prompt = ds['train'][i][list(ds['train'][i].keys())[0]]

        # Skip empty prompts
        if not prompt:
            continue

        try:
            # Apply rate limiting
            wait_for_rate_limit()

            # Send the prompt to Gemini and get the response
            response = chat_session.send_message(str(prompt))

            # Write the response to the CSV file
            csv_writer.writerow([response.text])

        except Exception as e:
            print(f"Error processing row {i}: {str(e)}")
            csv_writer.writerow(["Error occurred"])

print("Processing complete. Results saved in a csv'")

In [None]:
from datasets import load_dataset
ds = load_dataset("qwedsacf/competition_math")

print(ds['train'].column_names)

# Extract the 4th column for the first 10 rows using the correct column name
# Replace 'column_name' with the correct name after inspecting
column_4_data = ds['train'][:10][ds['train'].column_names[3]]  # Using index 3 for the 4th column

# Save the extracted data to a CSV file
import csv

with open('extracted_data.csv', mode='w', newline='') as file:
    writer = csv.writer(file)

    # Write each entry from column_4_data to the CSV file
    for item in column_4_data:
        writer.writerow([item])

print("CSV file 'extracted_data.csv' has been created.")


['problem', 'level', 'type', 'solution']
CSV file 'extracted_data.csv' has been created.
