<a href="https://colab.research.google.com/github/casllmproject/wjs_llm_project/blob/main/german_gpt_4o_survey_simulation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# prompt: mount goolge drive
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [None]:
!pip install openai
!pip install openai==0.28
!pip install openai python-dotenv

Collecting openai==0.28
  Downloading openai-0.28.0-py3-none-any.whl.metadata (13 kB)
Downloading openai-0.28.0-py3-none-any.whl (76 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.5/76.5 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 1.108.0
    Uninstalling openai-1.108.0:
      Successfully uninstalled openai-1.108.0
Successfully installed openai-0.28.0




In [None]:
import pandas as pd
import openai
import numpy as np
import json
import time
import os
import re
import random
from tqdm import tqdm

In [None]:
openai.api_key = open("/content/drive/MyDrive/CASLLM/casllm_apikey.txt", "r").read().strip("\n")

In [None]:
# Load the survey questions from the CSV file
survey_df = pd.read_csv('/content/drive/MyDrive/AI_shoes/german_survey_questions.csv')

In [None]:
def gpt_take_survey(model_name, temperature=0.7, max_retries=3):
    responses = []

    # Extract the context from the CSV file
    context = survey_df[survey_df['Variable_Name'] == 'context']['Question'].values[0]

    # Track time
    start_time = time.time()

    # Loop through each survey question with a progress bar
    for i, row in tqdm(enumerate(survey_df.iterrows()), total=len(survey_df), desc=f'Processing {model_name}'):
        if row[1]['Variable_Name'] == 'context':
            continue  # Skip the context row

        # Prepare the prompt
        prompt = f"{context}\n\n{row[1]['Question']}\nOptions: {row[1]['Options']}\nChoose one of the options."

        # Implement retry mechanism
        for attempt in range(max_retries):
            try:
                # Make the API call using ChatCompletion
                response = openai.ChatCompletion.create(
                    model=model_name,
                    messages=[
                        {"role": "system", "content": context},
                        {"role": "user", "content": prompt}
                    ],
                    temperature=temperature
                )

                # Extract the model's response
                answer = response['choices'][0]['message']['content'].strip()

                # Extract the number from the response using regular expression
                match = re.search(r'\b[1-5]\b', answer)
                if match:
                    answer = match.group(0)
                else:
                    answer = None  # Handle cases where the model's response doesn't match any option

                # Store the response
                responses.append({
                    "Model": model_name,
                    "Variable_Name": row[1]['Variable_Name'],
                    "Question": row[1]['Question'],
                    "Response": answer
                })
                break  # If successful, break out of retry loop

            except openai.error.APIError as e:
                print(f"Attempt {attempt + 1} failed with error: {e}")
                time.sleep(random.uniform(1, 3))  # Wait a bit before retrying

        # Update progress and time estimate
        elapsed_time = time.time() - start_time
        questions_remaining = len(survey_df) - i - 1
        time_per_question = elapsed_time / (i + 1)
        time_remaining = questions_remaining * time_per_question
        tqdm.write(f"Estimated time remaining: {time.strftime('%H:%M:%S', time.gmtime(time_remaining))}")

    # Convert the responses to a DataFrame and save to CSV in the specified folder
    gpt_df = pd.DataFrame(responses)
    output_path = f'/content/drive/MyDrive/AI_shoes/german_{model_name}_responses.csv'
    gpt_df.to_csv(output_path, index=False)

    print(f"Responses from {model_name} saved to {output_path}.")

# Example: Run the survey with GPT-4o-mini, GPT-4o
models = ['gpt-4o-2024-08-06']
for model in models:
    gpt_take_survey(model)

Processing gpt-4o-2024-08-06:   5%|▌         | 2/40 [00:02<00:55,  1.47s/it]

Estimated time remaining: 00:00:55


Processing gpt-4o-2024-08-06:   8%|▊         | 3/40 [00:07<01:45,  2.86s/it]

Estimated time remaining: 00:01:35


Processing gpt-4o-2024-08-06:  10%|█         | 4/40 [00:10<01:45,  2.93s/it]

Estimated time remaining: 00:01:37


Processing gpt-4o-2024-08-06:  12%|█▎        | 5/40 [00:13<01:38,  2.81s/it]

Estimated time remaining: 00:01:33


Processing gpt-4o-2024-08-06:  15%|█▌        | 6/40 [00:17<01:54,  3.36s/it]

Estimated time remaining: 00:01:41


Processing gpt-4o-2024-08-06:  18%|█▊        | 7/40 [00:22<02:09,  3.94s/it]

Estimated time remaining: 00:01:48


Processing gpt-4o-2024-08-06:  20%|██        | 8/40 [00:26<02:05,  3.92s/it]

Estimated time remaining: 00:01:47


Processing gpt-4o-2024-08-06:  22%|██▎       | 9/40 [00:28<01:43,  3.32s/it]

Estimated time remaining: 00:01:39


Processing gpt-4o-2024-08-06:  25%|██▌       | 10/40 [00:32<01:40,  3.35s/it]

Estimated time remaining: 00:01:36


Processing gpt-4o-2024-08-06:  28%|██▊       | 11/40 [00:35<01:38,  3.39s/it]

Estimated time remaining: 00:01:34


Processing gpt-4o-2024-08-06:  30%|███       | 12/40 [00:40<01:49,  3.89s/it]

Estimated time remaining: 00:01:35


Processing gpt-4o-2024-08-06:  32%|███▎      | 13/40 [00:45<01:49,  4.04s/it]

Estimated time remaining: 00:01:33


Processing gpt-4o-2024-08-06:  35%|███▌      | 14/40 [00:47<01:33,  3.61s/it]

Estimated time remaining: 00:01:28


Processing gpt-4o-2024-08-06:  38%|███▊      | 15/40 [00:49<01:17,  3.10s/it]

Estimated time remaining: 00:01:22


Processing gpt-4o-2024-08-06:  40%|████      | 16/40 [00:54<01:23,  3.50s/it]

Estimated time remaining: 00:01:21


Processing gpt-4o-2024-08-06:  42%|████▎     | 17/40 [00:55<01:05,  2.86s/it]

Estimated time remaining: 00:01:15


Processing gpt-4o-2024-08-06:  45%|████▌     | 18/40 [00:59<01:10,  3.22s/it]

Estimated time remaining: 00:01:12


Processing gpt-4o-2024-08-06:  48%|████▊     | 19/40 [01:01<00:58,  2.77s/it]

Estimated time remaining: 00:01:07


Processing gpt-4o-2024-08-06:  50%|█████     | 20/40 [01:04<00:56,  2.81s/it]

Estimated time remaining: 00:01:04


Processing gpt-4o-2024-08-06:  52%|█████▎    | 21/40 [01:07<00:57,  3.04s/it]

Estimated time remaining: 00:01:01


Processing gpt-4o-2024-08-06:  55%|█████▌    | 22/40 [01:14<01:13,  4.07s/it]

Estimated time remaining: 00:01:00


Processing gpt-4o-2024-08-06:  57%|█████▊    | 23/40 [01:18<01:10,  4.17s/it]

Estimated time remaining: 00:00:58


Processing gpt-4o-2024-08-06:  60%|██████    | 24/40 [01:21<00:58,  3.63s/it]

Estimated time remaining: 00:00:54


Processing gpt-4o-2024-08-06:  62%|██████▎   | 25/40 [01:23<00:47,  3.14s/it]

Estimated time remaining: 00:00:49


Processing gpt-4o-2024-08-06:  65%|██████▌   | 26/40 [01:26<00:43,  3.12s/it]

Estimated time remaining: 00:00:46


Processing gpt-4o-2024-08-06:  68%|██████▊   | 27/40 [01:28<00:37,  2.92s/it]

Estimated time remaining: 00:00:42


Processing gpt-4o-2024-08-06:  70%|███████   | 28/40 [01:30<00:33,  2.77s/it]

Estimated time remaining: 00:00:38


Processing gpt-4o-2024-08-06:  72%|███████▎  | 29/40 [01:33<00:28,  2.56s/it]

Estimated time remaining: 00:00:35


Processing gpt-4o-2024-08-06:  75%|███████▌  | 30/40 [01:35<00:24,  2.48s/it]

Estimated time remaining: 00:00:31


Processing gpt-4o-2024-08-06:  78%|███████▊  | 31/40 [01:40<00:29,  3.25s/it]

Estimated time remaining: 00:00:29


Processing gpt-4o-2024-08-06:  80%|████████  | 32/40 [01:40<00:19,  2.43s/it]

Estimated time remaining: 00:00:25


Processing gpt-4o-2024-08-06:  82%|████████▎ | 33/40 [01:43<00:17,  2.57s/it]

Estimated time remaining: 00:00:22


Processing gpt-4o-2024-08-06:  85%|████████▌ | 34/40 [01:47<00:17,  2.86s/it]

Estimated time remaining: 00:00:18


Processing gpt-4o-2024-08-06:  88%|████████▊ | 35/40 [01:48<00:12,  2.50s/it]

Estimated time remaining: 00:00:15


Processing gpt-4o-2024-08-06:  90%|█████████ | 36/40 [01:49<00:07,  1.93s/it]

Estimated time remaining: 00:00:12


Processing gpt-4o-2024-08-06:  92%|█████████▎| 37/40 [01:52<00:06,  2.19s/it]

Estimated time remaining: 00:00:09


Processing gpt-4o-2024-08-06:  95%|█████████▌| 38/40 [01:55<00:04,  2.45s/it]

Estimated time remaining: 00:00:06


Processing gpt-4o-2024-08-06:  98%|█████████▊| 39/40 [01:58<00:02,  2.73s/it]

Estimated time remaining: 00:00:03


Processing gpt-4o-2024-08-06: 100%|██████████| 40/40 [02:01<00:00,  3.03s/it]

Estimated time remaining: 00:00:00
Responses from gpt-4o-2024-08-06 saved to /content/drive/MyDrive/AI_shoes/german_gpt-4o-2024-08-06_responses.csv.





In [None]:
gpt4o_flag = pd.read_csv("/content/drive/MyDrive/AI_shoes/german_gpt-4o-2024-08-06_responses.csv", encoding='utf-8')
print(gpt4o_flag)

In [None]:
def gpt_take_survey(model_name, temperature=0.7, max_retries=3):
    responses = []

    # Extract the context from the CSV file
    context = survey_df[survey_df['Variable_Name'] == 'context']['Question'].values[0]

    # Track time
    start_time = time.time()

    # Loop through each survey question with a progress bar
    for i, row in tqdm(enumerate(survey_df.iterrows()), total=len(survey_df), desc=f'Processing {model_name}'):
        if row[1]['Variable_Name'] == 'context':
            continue  # Skip the context row

        # Prepare the prompt
        prompt = f"{context}\n\n{row[1]['Question']}\nOptions: {row[1]['Options']}\nChoose one of the options."

        # Implement retry mechanism
        for attempt in range(max_retries):
            try:
                # Make the API call using ChatCompletion
                response = openai.ChatCompletion.create(
                    model=model_name,
                    messages=[
                        {"role": "system", "content": context},
                        {"role": "user", "content": prompt}
                    ],
                    temperature=temperature
                )

                # Extract the model's response
                answer = response['choices'][0]['message']['content'].strip()

                # Extract the number from the response using regular expression
                match = re.search(r'\b[1-5]\b', answer)
                if match:
                    answer = match.group(0)
                else:
                    answer = None  # Handle cases where the model's response doesn't match any option

                # Store the response
                responses.append({
                    "Model": model_name,
                    "Variable_Name": row[1]['Variable_Name'],
                    "Question": row[1]['Question'],
                    "Response": answer
                })
                break  # If successful, break out of retry loop

            except openai.error.APIError as e:
                print(f"Attempt {attempt + 1} failed with error: {e}")
                time.sleep(random.uniform(1, 3))  # Wait a bit before retrying

        # Update progress and time estimate
        elapsed_time = time.time() - start_time
        questions_remaining = len(survey_df) - i - 1
        time_per_question = elapsed_time / (i + 1)
        time_remaining = questions_remaining * time_per_question
        tqdm.write(f"Estimated time remaining: {time.strftime('%H:%M:%S', time.gmtime(time_remaining))}")

    # Convert the responses to a DataFrame and save to CSV in the specified folder
    gpt_df = pd.DataFrame(responses)
    output_path = f'/content/drive/MyDrive/AI_shoes/german_{model_name}_responses_2nd.csv'
    gpt_df.to_csv(output_path, index=False)

    print(f"Responses from {model_name} saved to {output_path}.")

# Example: Run the survey with GPT-4o-mini, GPT-4o
models = ['gpt-4o-2024-08-06']
for model in models:
    gpt_take_survey(model)

Processing gpt-4o-2024-08-06:   5%|▌         | 2/40 [00:02<00:53,  1.41s/it]

Estimated time remaining: 00:00:53


Processing gpt-4o-2024-08-06:   8%|▊         | 3/40 [00:04<00:55,  1.50s/it]

Estimated time remaining: 00:00:54


Processing gpt-4o-2024-08-06:  10%|█         | 4/40 [00:07<01:20,  2.25s/it]

Estimated time remaining: 00:01:11


Processing gpt-4o-2024-08-06:  12%|█▎        | 5/40 [00:10<01:17,  2.21s/it]

Estimated time remaining: 00:01:10


Processing gpt-4o-2024-08-06:  15%|█▌        | 6/40 [00:11<01:02,  1.83s/it]

Estimated time remaining: 00:01:03


Processing gpt-4o-2024-08-06:  18%|█▊        | 7/40 [00:13<01:09,  2.09s/it]

Estimated time remaining: 00:01:05


Processing gpt-4o-2024-08-06:  20%|██        | 8/40 [00:16<01:12,  2.26s/it]

Estimated time remaining: 00:01:05


Processing gpt-4o-2024-08-06:  22%|██▎       | 9/40 [00:18<01:05,  2.13s/it]

Estimated time remaining: 00:01:02


Processing gpt-4o-2024-08-06:  25%|██▌       | 10/40 [00:22<01:26,  2.88s/it]

Estimated time remaining: 00:01:08


Processing gpt-4o-2024-08-06:  28%|██▊       | 11/40 [00:26<01:29,  3.07s/it]

Estimated time remaining: 00:01:09


Processing gpt-4o-2024-08-06:  30%|███       | 12/40 [00:28<01:17,  2.77s/it]

Estimated time remaining: 00:01:06


Processing gpt-4o-2024-08-06:  32%|███▎      | 13/40 [00:31<01:18,  2.92s/it]

Estimated time remaining: 00:01:05


Processing gpt-4o-2024-08-06:  35%|███▌      | 14/40 [00:35<01:26,  3.34s/it]

Estimated time remaining: 00:01:06


Processing gpt-4o-2024-08-06:  38%|███▊      | 15/40 [00:38<01:15,  3.01s/it]

Estimated time remaining: 00:01:03


Processing gpt-4o-2024-08-06:  40%|████      | 16/40 [00:41<01:14,  3.12s/it]

Estimated time remaining: 00:01:02


Processing gpt-4o-2024-08-06:  42%|████▎     | 17/40 [00:43<01:03,  2.78s/it]

Estimated time remaining: 00:00:58


Processing gpt-4o-2024-08-06:  45%|████▌     | 18/40 [00:47<01:06,  3.02s/it]

Estimated time remaining: 00:00:57


Processing gpt-4o-2024-08-06:  48%|████▊     | 19/40 [00:49<00:59,  2.83s/it]

Estimated time remaining: 00:00:54


Processing gpt-4o-2024-08-06:  50%|█████     | 20/40 [00:51<00:48,  2.44s/it]

Estimated time remaining: 00:00:51


Processing gpt-4o-2024-08-06:  52%|█████▎    | 21/40 [00:54<00:49,  2.62s/it]

Estimated time remaining: 00:00:48


Processing gpt-4o-2024-08-06:  55%|█████▌    | 22/40 [00:56<00:48,  2.67s/it]

Estimated time remaining: 00:00:46


Processing gpt-4o-2024-08-06:  57%|█████▊    | 23/40 [00:59<00:44,  2.60s/it]

Estimated time remaining: 00:00:43


Processing gpt-4o-2024-08-06:  60%|██████    | 24/40 [01:03<00:47,  2.99s/it]

Estimated time remaining: 00:00:42


Processing gpt-4o-2024-08-06:  62%|██████▎   | 25/40 [01:05<00:41,  2.78s/it]

Estimated time remaining: 00:00:39


Processing gpt-4o-2024-08-06:  65%|██████▌   | 26/40 [01:08<00:38,  2.77s/it]

Estimated time remaining: 00:00:36


Processing gpt-4o-2024-08-06:  68%|██████▊   | 27/40 [01:12<00:40,  3.13s/it]

Estimated time remaining: 00:00:34


Processing gpt-4o-2024-08-06:  70%|███████   | 28/40 [01:14<00:33,  2.75s/it]

Estimated time remaining: 00:00:31


Processing gpt-4o-2024-08-06:  72%|███████▎  | 29/40 [01:16<00:28,  2.61s/it]

Estimated time remaining: 00:00:28


Processing gpt-4o-2024-08-06:  75%|███████▌  | 30/40 [01:18<00:23,  2.38s/it]

Estimated time remaining: 00:00:26


Processing gpt-4o-2024-08-06:  78%|███████▊  | 31/40 [01:24<00:30,  3.41s/it]

Estimated time remaining: 00:00:24


Processing gpt-4o-2024-08-06:  80%|████████  | 32/40 [01:26<00:23,  2.98s/it]

Estimated time remaining: 00:00:21


Processing gpt-4o-2024-08-06:  82%|████████▎ | 33/40 [01:29<00:21,  3.04s/it]

Estimated time remaining: 00:00:18


Processing gpt-4o-2024-08-06:  85%|████████▌ | 34/40 [01:32<00:19,  3.18s/it]

Estimated time remaining: 00:00:16


Processing gpt-4o-2024-08-06:  88%|████████▊ | 35/40 [01:35<00:14,  2.94s/it]

Estimated time remaining: 00:00:13


Processing gpt-4o-2024-08-06:  90%|█████████ | 36/40 [01:38<00:12,  3.04s/it]

Estimated time remaining: 00:00:10


Processing gpt-4o-2024-08-06:  92%|█████████▎| 37/40 [01:42<00:10,  3.33s/it]

Estimated time remaining: 00:00:08


Processing gpt-4o-2024-08-06:  95%|█████████▌| 38/40 [01:45<00:06,  3.24s/it]

Estimated time remaining: 00:00:05


Processing gpt-4o-2024-08-06:  98%|█████████▊| 39/40 [01:49<00:03,  3.37s/it]

Estimated time remaining: 00:00:02


Processing gpt-4o-2024-08-06: 100%|██████████| 40/40 [01:50<00:00,  2.77s/it]

Estimated time remaining: 00:00:00
Responses from gpt-4o-2024-08-06 saved to /content/drive/MyDrive/AI_shoes/german_gpt-4o-2024-08-06_responses_2nd.csv.





In [None]:
gpt4o_flag = pd.read_csv("/content/drive/MyDrive/AI_shoes/german_gpt-4o-2024-08-06_responses_2nd.csv", encoding='utf-8')
print(gpt4o_flag)