In [4]:
import pandas as pd
import numpy as np
import json
from together import Together
import os
from dotenv import load_dotenv
from together import Together

# Load API key from .env file
load_dotenv()
API_KEY = os.getenv("TOGETHER_API_KEY")
client = Together(api_key=API_KEY)

In [6]:
# Getting comments on posts where physicians have responded
physician_comments = []

# Read and filter comments for physician responses
with open("AskDocs_comments.csv", "r") as f:
    for line in f:
        comment = json.loads(line)

        if comment.get("author_flair_text") == "Physician":
            physician_comments.append(comment)

df_physician_comments = pd.DataFrame(physician_comments)

# Clean up link_id field
if "link_id" in df_physician_comments.columns:
    df_physician_comments["clean_link_id"] = df_physician_comments["link_id"].str.replace("t3_", "", regex=False)

df_physician_comments.head()

Unnamed: 0,author_flair_css_class,distinguished,downs,name,author_flair_text,retrieved_on,archived,score_hidden,score,controversiality,...,author_premium,treatment_tags,top_awarded_type,comment_type,collapsed_reason_code,retrieved_utc,author_is_blocked,unrepliable_reason,media_metadata,clean_link_id
0,verified-doc,,0.0,t1_cb2x8ny,Physician,1430463000.0,True,False,2,0,...,,,,,,,,,,1ib0nw
1,verified-doc,,0.0,t1_cb2xecb,Physician,1430463000.0,True,False,1,0,...,,,,,,,,,,1iawxh
2,verified-doc,,0.0,t1_cb2xkkn,Physician,1430463000.0,True,False,2,0,...,,,,,,,,,,1i7hzo
3,verified-doc,,0.0,t1_cb2xohb,Physician,1430463000.0,True,False,1,0,...,,,,,,,,,,1i9v7i
4,verified-doc,,0.0,t1_cb3cgot,Physician,1430456000.0,True,False,1,0,...,,,,,,,,,,1icpya


In [7]:
physician_post_ids = set(df_physician_comments["clean_link_id"])

filtered_submissions = []
found_count = 0
max_posts = 100

# Read and filter submissions
with open("AskDocs_submissions.csv", "r") as f:
    for line in f:
        submission = json.loads(line)
        
        if submission["id"] in physician_post_ids:
            filtered_submissions.append(submission)
            found_count += 1

        if found_count >= max_posts:
            break

df_filtered_submissions = pd.DataFrame(filtered_submissions)

# Save filtered submissions to CSV
# df_filtered_submissions.to_csv("filtered_submissions_with_physician_comments.csv", index=False)

# Display first few rows
df_filtered_submissions.head()
print(f"Saved {found_count} submissions with physician responses.")


Saved 100 submissions with physician responses.


In [24]:
merged_df = df_filtered_submissions.merge(
    df_physician_comments,
    left_on="id", 
    right_on="clean_link_id", 
    how="left"
)

merged_df.rename(columns={"body": "physician_comments"}, inplace=True)

# Save to CSV
merged_df.to_csv("merged_physician_submissions_100.csv", index=False)

# Display first few rows
print(merged_df.head())
print(f"Saved {len(merged_df)} merged records.")

  subreddit_x created_utc_x  report_reasons_x  is_self        domain  \
0     AskDocs    1373720814               NaN     True  self.AskDocs   
1     AskDocs    1373718621               NaN     True  self.AskDocs   
2     AskDocs    1373700194               NaN     True  self.AskDocs   
3     AskDocs    1373843382               NaN     True  self.AskDocs   
4     AskDocs    1373806677               NaN     True  self.AskDocs   

                                         permalink_x  stickied_x  \
0  /r/AskDocs/comments/1i7s0u/leg_pain_after_4ml_...       False   
1  /r/AskDocs/comments/1i7qst/exhausted_out_of_br...       False   
2  /r/AskDocs/comments/1i7hzo/do_these_symptoms_m...       False   
3  /r/AskDocs/comments/1iawxh/how_can_i_fight_my_...       False   
4  /r/AskDocs/comments/1i9v7i/thought_i_was_going...       False   

                                               title  gilded_x secure_media  \
0                    Leg pain after 4ml IM injection         0         None   


In [16]:
merged_df.shape

(147, 121)

In [21]:
df_physician_comments['body']

0         There's more questions that need answering tha...
1         Rosacea is a difficult dermatological conditio...
2         Is there any family history of colon cancer or...
3         There's a difference between panic disorder an...
4         I think your intuition is quite accurate. Most...
                                ...                        
126727    As long as you were on blood thinners immediat...
126728    I think you should be fine with just aspirin s...
126729    Ciprofloxacin does not interact with alcohol, ...
126730    The odds of you having a heart attack at 22 is...
126731    Yeah, those are pretty typical withdrawal symp...
Name: body, Length: 126732, dtype: object