## Data Generation

This document will deal with the generation of the dataset for each concept using contrasting views

In [1]:
import os
from prompts import *
from dotenv import load_dotenv
from openai import OpenAI
import pandas as pd
from tqdm import tqdm
import json

Read the environment variables

In [2]:
load_dotenv()  # take environment variables from .env.

True

#### Initializing the OpenAI client

In [3]:
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

#### Making a function to read the responses from GPT

In [4]:
def get_gpt_response(system_prompt, user_prompt):
    try:
        response = client.responses.create(
            model="gpt-4o-mini",
            instructions=system_prompt,
            input=user_prompt,
            temperature=0.7 #for randomness storytellng
        )
    except Exception as e:
        print(f"Error occurred: {e}")
        return None
    return response.output_text

#### Initialization for Data Samples

In [None]:
TOTAL_SAMPLES=60
os.makedirs('data/gpt_responses_raw', exist_ok=True)

#### Generating Data for Political Equality

In [None]:
sys_prompt = SYSTEM_PROMPTS['POLITICAL_LEADERSHIP_EQUALITY']
user_prompt = POITICAL_USER_PROMPT
outputs = []
print("Generating responses for POLITICAL_LEADERSHIP_EQUALITY")
for i in tqdm(range(TOTAL_SAMPLES)):
    try:
        output = get_gpt_response(sys_prompt, user_prompt)
        outputs.append(output)
    except Exception as e:
        print(f"Error occurred during sample {i}: {e}")
        
outputs_df = pd.DataFrame(outputs, columns=['response'])
outputs_df.to_csv('data/gpt_responses_raw/POLITICAL_LEADERSHIP_EQUALITY_responses.csv', index=True)

100%|██████████| 2/2 [00:16<00:00,  8.02s/it]


#### Generating Data for Gender Equality Employment

In [None]:
sys_prompt = SYSTEM_PROMPTS['GENDER_EQUALITY_EMPLOYMENT']
user_prompt = GENDER_EQUALITY_EMPLOYMENT_USER_PROMPT
outputs = []
print("Generating responses for GENDER_EQUALITY_EMPLOYMENT")
for i in tqdm(range(TOTAL_SAMPLES)):
    try:
        output = get_gpt_response(sys_prompt, user_prompt)
        outputs.append(output)
    except Exception as e:
        print(f"Error occurred during sample {i}: {e}")

outputs_df = pd.DataFrame(outputs, columns=['response'])
outputs_df.to_csv('data/gpt_responses_raw/GENDER_EQUALITY_EMPLOYMENT_responses.csv', index=True)

#### Generating Data for Media Freedom

In [None]:
sys_prompt = SYSTEM_PROMPTS['MEDIA_FREEDOM']
user_prompt = MEDIA_FREEDOM_USER_PROMPT
outputs = []
print("Generating responses for MEDIA_FREEDOM")
for i in tqdm(range(TOTAL_SAMPLES)):
    try:
        output = get_gpt_response(sys_prompt, user_prompt)
        outputs.append(output)
    except Exception as e:
        print(f"Error occurred during sample {i}: {e}")

outputs_df = pd.DataFrame(outputs, columns=['response'])
outputs_df.to_csv('data/gpt_responses_raw/MEDIA_FREEDOM_responses.csv', index=True)

#### Generating Data for Money Redistribution

In [None]:
sys_prompt = SYSTEM_PROMPTS['REDISTRIBUTION_MONEY']
user_prompt = REDISTRIBUTION_MONEY_USER_PROMPT
outputs = []
print("Generating responses for REDISTRIBUTION_MONEY")
for i in tqdm(range(TOTAL_SAMPLES)):
    try:
        output = get_gpt_response(sys_prompt, user_prompt)
        outputs.append(output)
    except Exception as e:
        print(f"Error occurred during sample {i}: {e}")

outputs_df = pd.DataFrame(outputs, columns=['response'])
outputs_df.to_csv('data/gpt_responses_raw/REDISTRIBUTION_MONEY_responses.csv', index=True)

#### Generating Data for Unemployment Benefits

In [None]:
sys_prompt = SYSTEM_PROMPTS['UNEMPLOYMENT_BENEFITS']
user_prompt = UNEMPLOYMENT_BENEFITS_USER_PROMPT
outputs = []
print("Generating responses for UNEMPLOYMENT_BENEFITS")
for i in tqdm(range(TOTAL_SAMPLES)):
    try:
        output = get_gpt_response(sys_prompt, user_prompt)
        outputs.append(output)
    except Exception as e:
        print(f"Error occurred during sample {i}: {e}")

outputs_df = pd.DataFrame(outputs, columns=['response'])
outputs_df.to_csv('data/gpt_responses_raw/UNEMPLOYMENT_BENEFITS_responses.csv', index=True)

#### Generating Data for Punishment Severity

In [None]:
sys_prompt = SYSTEM_PROMPTS['PUNISHMENT_SEVERITY']
user_prompt = PUNISHMENT_SEVERITY_USER_PROMPT
outputs = []
print("Generating responses for PUNISHMENT_SEVERITY")
for i in tqdm(range(TOTAL_SAMPLES)):
    try:
        output = get_gpt_response(sys_prompt, user_prompt)
        outputs.append(output)
    except Exception as e:
        print(f"Error occurred during sample {i}: {e}")

outputs_df = pd.DataFrame(outputs, columns=['response'])
outputs_df.to_csv('data/gpt_responses_raw/PUNISHMENT_SEVERITY_responses.csv', index=True)