In [33]:
import pandas as pd
import numpy as np
import json
from together import Together
import os
from dotenv import load_dotenv
from together import Together

# Load API key from .env file
load_dotenv()
API_KEY = os.getenv("TOGETHER_API_KEY")

In [21]:
client = Together(api_key=API_KEY)

high_engagement_posts = []

# Read and collect posts
with open("AskDocs_submissions.csv", "r") as f:
    # Starting with 10,000 rows
    for _ in range(10000):
        line = f.readline()
        if not line:
            break
        post = json.loads(line)

        # Only considering posts that have some engagement
        if post.get("score", 0) > 0:
            high_engagement_posts.append(post)

if not high_engagement_posts:
    print("No high-engagement posts found.")
    exit()

In [9]:
high_engagement_posts.sort(key=lambda x: x.get("score", 0), reverse=True)

# Select top 50% of posts to get posts with the most engagement
top_half_index = len(high_engagement_posts) // 2
top_engagement_posts = high_engagement_posts[:top_half_index]

# Only process 100 posts at first
max_posts = min(len(top_engagement_posts), 100)
top_engagement_posts = top_engagement_posts[:max_posts]

In [22]:
df_selected_posts = pd.DataFrame(top_engagement_posts)
df_selected_posts.to_csv("selected_posts.csv", index=False)

In [32]:
results_prompt1 = []
results_prompt2 = []

for post in top_engagement_posts:
    title = post.get("title", "No title")
    post_id = post.get("id", "No id")
    print(post_id)
    body = post.get("selftext", "No content available")
    score = post.get("score", 0)

    # Prompt 1: Naive Approach - Medical Diagnosis Suggestion
    prompt1 = (
        f"Based on the following symptoms, list possible medical conditions and recommend "
        f"the next diagnostic tests or evaluations a doctor might order. "
        f"Symptoms: {body}. Include brief justifications for each recommendation."
    )

    # response1 = client.chat.completions.create(
    #     model="meta-llama/Llama-3.3-70B-Instruct-Turbo",
    #     messages=[{"role": "user", "content": prompt1}],
    # )

    # Prompt 2: Reddit-Style Physician Response
    prompt2 = (
        f"You're replying to a Reddit post in r/AskDocs. The post asks: {body}. "
        f"Write a comment like a real Reddit user who is a verified physician."
    )

    # response2 = client.chat.completions.create(
    #     model="meta-llama/Llama-3.3-70B-Instruct-Turbo",
    #     messages=[{"role": "user", "content": prompt2}],
    # )

    common_data = {"title": title, "body": body, "score": score}

    results_prompt1.append({**common_data, "generated_response": response1.choices[0].message.content})
    results_prompt2.append({**common_data, "generated_response": response2.choices[0].message.content})

df_prompt1 = pd.DataFrame(results_prompt1)
df_prompt2 = pd.DataFrame(results_prompt2)

# df_prompt1.to_csv("prompt1_responses.csv", index=False)
# df_prompt2.to_csv("prompt2_responses.csv", index=False)

# print(f"Processed {len(results_prompt1)} posts. Results saved to 'prompt1_responses.csv' and 'prompt2_responses.csv'.")

2efzm2
23u7jp
27k4sw
1xqkls
2cwqpo
26e0bz
283z7s
27iusc
2elgli
27c5d4
2ay24m
2ek0le
2f332w
25e63v
2cyoce
2drsj6
2eo31u
1i77ym
23zfes
25kv9r
290aa8
2e05l1
2e6rvt
2eddko
233kwu
241ve7
24qmjy
27urbk
28wh1w
2c0yt4
2c6zg7
2cxkwq
2czo83
2ehwdj
2eyo1e
2f67ba
1i4673
1i7pg1
21vw6d
21x6wc
24zkdn
269nh5
26oukm
2bj5cy
2bqi5t
2c3g2c
2cxe62
2d2idv
2dht8x
2dmkli
2e4vnx
2f7ovi
2f7s72
2feak9
2139h5
21tpxv
22k30p
233bcl
23e5tt
242w3w
247m3y
24rtml
25zei1
267pj3
26ala9
26cq7o
26crfo
26qcsp
26tmdr
26zxkp
275tlk
275u89
2762v8
27asta
27gqy2
27tivq
27xr8g
28ihm5
28u3cf
28vcvn
29d6j9
29wbcs
29zkry
2a2vcw
2aldl9
2bm8xd
2bosz3
2bprko
2brj0g
2bwc65
2c8dae
2cdwv5
2cifzd
2cjoz5
2cpa1k
2cqjob
2d0qbk
2d2bxp
2d6q35
2d9t7v


In [28]:
results_prompt1[0]

{'title': 'Doctors of Reddit: I only want straight answers',
 'body': 'My 25 year old brother (6\'4", roughly 100 kg, long time IV drug user) was in a motorcycle accident roughly 36 hours ago. He has multiple broken ribs, dislocated hips and a multitude of other internal injuries. He was found at the scene, unconscious with pupils fixed and dilated. In emergency he was declared to have a Glasgow coma scale score of 3. After emergency he had surgery where they removed the top and front of his skull to accommodate the rising intercranial pressure. However, his ICP has been tracking upward steadily from 24 or so up to 90 (not sure what the numbers mean, but I know higher is bad), while his Cerebral Perfusion Pressure (CPP) is down to 25 (from my research, I know this is VERY, VERY bad)... My laymen description is that "he\'s all fucked up". \n\nI would like to know 2 things;\n\n1. What is the worst TBI you have ever seen a patient recover from (I mean survive, discounting brain damage)\n\

In [30]:
df_selected_posts

Unnamed: 0,link_flair_css_class,gilded,name,id,archived,author_flair_css_class,num_comments,saved,over_18,is_self,...,permalink,ups,retrieved_on,selftext,media,selftext_html,user_reports,mod_reports,banned_by,report_reasons
0,,0,t3_2efzm2,2efzm2,True,,12,False,False,True,...,/r/AskDocs/comments/2efzm2/doctors_of_reddit_i...,33,1441204390,"My 25 year old brother (6'4"", roughly 100 kg, ...",,,,,,
1,,0,t3_23u7jp,23u7jp,True,verified-doc,11,False,False,True,...,/r/AskDocs/comments/23u7jp/raskdocs_has_reache...,30,1441566554,"/r/AskDocs has reached the 1,000 reader mark a...",,,,,,
2,,0,t3_27k4sw,27k4sw,True,,11,False,False,True,...,/r/AskDocs/comments/27k4sw/my_wife_is_delusion...,21,1441321923,"Dear Reddit,\n\n\nMy wife came home late from ...",,,,,,
3,,0,t3_1xqkls,1xqkls,True,verified-doc,23,False,False,True,...,/r/AskDocs/comments/1xqkls/join_the_raskdocs_t...,20,1441916409,"We are currently a small subreddit, but we wou...",,,,,,
4,,0,t3_2cwqpo,2cwqpo,True,verified-doc,16,False,False,True,...,/r/AskDocs/comments/2cwqpo/raskdocs_has_merged...,20,1441230685,We are happy to announce that as of today /r/A...,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,,0,t3_2cqjob,2cqjob,True,,2,False,True,True,...,/r/AskDocs/comments/2cqjob/should_i_be_worried...,7,1441233637,"Hi, I'm a 20y/o female (obviously) about 5'5"",...",,,,,,
96,,0,t3_2d0qbk,2d0qbk,True,,10,False,False,True,...,/r/AskDocs/comments/2d0qbk/why_does_my_heart_r...,7,1441228825,When I run my HR is about 160bpm for 30-60 min...,,,,,,
97,,0,t3_2d2bxp,2d2bxp,True,,4,False,False,True,...,/r/AskDocs/comments/2d2bxp/i_think_i_had_a_min...,7,1441228018,My nose started to burn then my lip started to...,,,,,,
98,,0,t3_2d6q35,2d6q35,True,,5,False,False,True,...,/r/AskDocs/comments/2d6q35/can_being_in_pain_l...,7,1441225968,"I have a torn peroneal tendon, and have been w...",,,,,,
