In [3]:
import pandas as pd

data_path = 'test_data.txt'

with open(data_path, 'r', encoding='utf-8') as file:
    raw_data = file.read()

entries = raw_data.split('\n\n')

df = pd.DataFrame(entries, columns=['Entry'])

df[['Question', 'Answer']] = df['Entry'].str.split('\nAnswer: ', expand=True)
df['Label'] = ['human' if i % 2 == 0 else 'machine' for i in range(len(df))]

df.drop(columns=['Entry'], inplace=True)

df.head()

Unnamed: 0,Question,Answer,Label
0,Question: What are the treatments for Myelodys...,Because myelodysplastic /myeloproliferative ne...,human
1,Question: What are the treatments for Myelodys...,Treatments for. Myelodysplastic/ Myeloprolifer...,machine
2,Question: What are the symptoms of Adult Non-H...,Signs and symptoms of adult nonHodgkin lymphom...,human
3,Question: What are the symptoms of Adult Non-H...,Swollen lymph nodes. Fever. Night sweats. Wei...,machine
4,Question: What are the symptoms of Small Cell ...,Signs and symptoms of small cell lung cancer i...,human


In [4]:
df['Prompt'] = df['Answer'].apply(lambda x: f'Read the following text: "{x}" and tell me whether it is generated by human or machine. You only need to reply one word, human or machine.')

df.head()

Unnamed: 0,Question,Answer,Label,Prompt
0,Question: What are the treatments for Myelodys...,Because myelodysplastic /myeloproliferative ne...,human,"Read the following text: ""Because myelodysplas..."
1,Question: What are the treatments for Myelodys...,Treatments for. Myelodysplastic/ Myeloprolifer...,machine,"Read the following text: ""Treatments for. Myel..."
2,Question: What are the symptoms of Adult Non-H...,Signs and symptoms of adult nonHodgkin lymphom...,human,"Read the following text: ""Signs and symptoms o..."
3,Question: What are the symptoms of Adult Non-H...,Swollen lymph nodes. Fever. Night sweats. Wei...,machine,"Read the following text: "" Swollen lymph nodes..."
4,Question: What are the symptoms of Small Cell ...,Signs and symptoms of small cell lung cancer i...,human,"Read the following text: ""Signs and symptoms o..."


In [6]:
from openai import OpenAI
from tqdm import tqdm

client = OpenAI(api_key="your_api")

def gpt_response(prompt):
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}],
        temperature=1,
        max_tokens=256,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
    )
    return response.choices[0].message.content

responses = []
for prompt in tqdm(df['Prompt']):
    response = gpt_response(prompt)
    responses.append(response)

df['Response'] = responses
df.head()

100%|██████████| 147/147 [01:24<00:00,  1.74it/s]


Unnamed: 0,Question,Answer,Label,Prompt,Response
0,Question: What are the treatments for Myelodys...,Because myelodysplastic /myeloproliferative ne...,human,"Read the following text: ""Because myelodysplas...",Machine
1,Question: What are the treatments for Myelodys...,Treatments for. Myelodysplastic/ Myeloprolifer...,machine,"Read the following text: ""Treatments for. Myel...",Machine
2,Question: What are the symptoms of Adult Non-H...,Signs and symptoms of adult nonHodgkin lymphom...,human,"Read the following text: ""Signs and symptoms o...",Machine
3,Question: What are the symptoms of Adult Non-H...,Swollen lymph nodes. Fever. Night sweats. Wei...,machine,"Read the following text: "" Swollen lymph nodes...",Machine
4,Question: What are the symptoms of Small Cell ...,Signs and symptoms of small cell lung cancer i...,human,"Read the following text: ""Signs and symptoms o...",Human


In [7]:
df.to_csv('updated_dataframe.csv', index=False)

In [9]:
import pandas as pd

df = pd.read_csv("updated_dataframe.csv")
def is_correct(row):
    return row['Response'].strip().lower() == row['Label'].strip().lower()

df['Correct'] = df.apply(is_correct, axis=1)

accuracy = df['Correct'].mean()
print(f"Accuracy: {accuracy:.2%}")

Accuracy: 58.50%
