In [14]:
import pandas as pd
import numpy as np
import json
from together import Together
import os
from dotenv import load_dotenv
from together import Together

# Load API key from .env file
load_dotenv()
API_KEY = os.getenv("TOGETHER_API_KEY")
client = Together(api_key=API_KEY)

In [15]:
physician_responded_posts_df = pd.read_csv("merged_physician_submissions_1000.csv")

In [4]:
physician_responded_posts_df["clean_link_id"]

0      1i7s0u
1      1i7qst
2      1i7hzo
3      1iawxh
4      1i9v7i
        ...  
995    2c49tv
996    2c4sfy
997    2c50li
998    2c5me7
999    2cbtkn
Name: clean_link_id, Length: 1000, dtype: object

In [17]:
physician_responded_posts_df['physician_comments']

0      Although I don't know anything about the creat...
1      Sounds like he has COPD since you say his lung...
2      Is there any family history of colon cancer or...
3      Rosacea is a difficult dermatological conditio...
4      There's a difference between panic disorder an...
                             ...                        
995    That looks pretty innocent to me. Belly button...
996    There really is not much to do. There is a tub...
997    Haha no, you are not. Superman doesn't get sec...
998                                       That's a scam.
999    This sounds a lot like excersize induced asthm...
Name: physician_comments, Length: 1000, dtype: object

In [7]:
# Prepare storage for results
results = []

# Store output of models separately
results_prompt1_dict = {}
results_prompt2_dict = {}

for _, post in physician_responded_posts_df.iterrows():
    post_id = post.get("clean_link_id", "No id")
    title = post.get("title", "No title")
    link_flair_text = post.get("link_flair_text", "No flair")
    body = post.get("selftext", "No content available")
    num_comments = post.get("num_comments", 0)
    comment_score = post.get("score_comment", 0)
    
    # Prompt 1: Medical Diagnosis Suggestion
    prompt1 = (
        f"Based on the following symptoms, list possible medical conditions and recommend "
        f"the next diagnostic tests or evaluations a doctor might order. "
        f"Symptoms: {body}. Include brief justifications for each recommendation."
    )

    response1 = client.chat.completions.create(
        model="meta-llama/Llama-3.3-70B-Instruct-Turbo",
        messages=[{"role": "user", "content": prompt1}],
    )

    results_prompt1_dict[post_id] = response1.choices[0].message.content

    # Prompt 2: Reddit-Style Physician Response
    prompt2 = (
        f"You're replying to a Reddit post in r/AskDocs. The post asks: {body}. "
        f"Write a comment like a real Reddit user who is a verified physician."
    )

    response2 = client.chat.completions.create(
        model="meta-llama/Llama-3.3-70B-Instruct-Turbo",
        messages=[{"role": "user", "content": prompt2}],
    )

    results_prompt2_dict[post_id] = response2.choices[0].message.content

    # Store results in dictionary
    results.append({
        "id": post_id,
        "title": title,
        "link_flair_text": link_flair_text,
        "selftext": body,
        "num_comments": num_comments,
        "comment_score": comment_score,
        "generated_response_prompt1": response1.choices[0].message.content,
        "generated_response_prompt2": response2.choices[0].message.content
    })

# Save model outputs as JSON files
with open("generated_responses_prompt1_1000.json", "w") as f:
    json.dump(results_prompt1_dict, f, indent=4)

with open("generated_responses_prompt2_1000.json", "w") as f:
    json.dump(results_prompt2_dict, f, indent=4)

df_final = pd.DataFrame(results)

df_final.to_csv("final_generated_responses_1000.csv", index=False)

print(f"Processed {len(df_final)} posts. Results saved to 'final_generated_responses_1000.csv'.")


Processed 1000 posts. Results saved to 'final_generated_responses_1000.csv'.


In [22]:
NEW_df_final = pd.DataFrame(results)
NEW_df_final.head()

Unnamed: 0,id,title,link_flair_text,selftext,num_comments,comment_score,generated_response_prompt1,generated_response_prompt2
0,1i7s0u,Leg pain after 4ml IM injection,,"I'm a regular user of anabolic steroids, pepti...",9,1,"Based on the symptoms described, possible medi...",**Verified Physician here**\n\nI'm glad you're...
1,1i7qst,"Exhausted, out of breath",,"Hi everyone, Today I went to another city with...",1,1,"Based on the symptoms described, possible medi...",**Verified Physician here**\n\nI can sense you...
2,1i7hzo,Do these symptoms mean I should get a colonosc...,,My question is about the necessity of a colono...,9,2,"Based on the symptoms described, possible medi...","**Verified Physician Comment**\n\nHello OP, I'..."
3,1iawxh,How can I fight my Rosacea problem,,I am a 21 year old male turning 22 next month....,3,1,"Based on the symptoms described, possible medi...","**Verified Physician here**\n\nI feel for you,..."
4,1i9v7i,Thought I was going to die yesterday.,,I (33 M) was standing in the kitchen yesterday...,13,1,"Based on the symptoms you've described, here a...",**Verified Physician here**\n\nI'm glad to hea...


In [None]:
NEW_df_final = NEW_df_final.merge(
    physician_responded_posts_df[["clean_link_id", "physician_comments"]],
    left_on="id", 
    right_on="clean_link_id", 
    how="left"
)

In [24]:
NEW_df_final.drop(columns=["clean_link_id"], inplace=True)

In [25]:
NEW_df_final.columns

Index(['id', 'title', 'link_flair_text', 'selftext', 'num_comments',
       'comment_score', 'generated_response_prompt1',
       'generated_response_prompt2', 'physician_comments'],
      dtype='object')

In [26]:
# Save the corrected dataset
NEW_df_final.to_csv("NEW_final_generated_responses_1000.csv", index=False)