In [1]:
import boto3
import os, json
from pathlib import Path
from dotenv import load_dotenv

%load_ext sql

In [2]:
# config
# ROOT = Path(__file__).resolve().parents[1]
ROOT = Path('/home/ubuntu/deds2025b_proj/opt/reddit_pipeline')    # FOR NOTEBOOK ONLY
load_dotenv(ROOT / '.env')

BUCKET = os.environ["LAKE_BUCKET"]
PREFIX = "bronze/reddit"
REDDIT_RDS_ARN = os.environ["REDDIT_RDS_ARN"]
s3 = boto3.client("s3")
secrets = boto3.client("secretsmanager")
DATABASE = os.environ["RDS_DB"]

cfg = json.loads(secrets.get_secret_value(SecretId=REDDIT_RDS_ARN)['SecretString'])
os.environ['DATABASE_URL'] = f"postgresql://{cfg['username']}:{cfg['password']}@{cfg['host']}:{cfg['port']}/{DATABASE}"

# Reddit RDS

![REDDIT_RDS.png](attachment:5424b62b-de2c-4986-a1cb-2b1e3f689757.png)

In [3]:
%%sql
-- AUTHORS
CREATE TABLE
  IF NOT EXISTS author (
    author_fullname VARCHAR(32) PRIMARY KEY,
    author VARCHAR(100) NOT NULL,
    author_premium VARCHAR(16)
  );

-- SUBREDDITS
CREATE TABLE
  IF NOT EXISTS subreddit (
    subreddit_id VARCHAR(32) PRIMARY KEY,
    subreddit_name_prefixed VARCHAR(100) NOT NULL,
    subreddit_type VARCHAR(20),
    subreddit_subscribers BIGINT
  );

-- POSTS
CREATE TABLE
  IF NOT EXISTS post (
    post_name VARCHAR(32) PRIMARY KEY,
    subreddit_id VARCHAR(32) NOT NULL REFERENCES subreddit (subreddit_id),
    author_fullname VARCHAR(32) REFERENCES author (author_fullname),
    title VARCHAR(300),
    selftext TEXT,
    score INTEGER,
    upvote_ratio REAL,
    num_comments INTEGER,
    url TEXT NOT NULL,
    created_utc BIGINT NOT NULL
  );

-- COMMENTS
CREATE TABLE
  IF NOT EXISTS comment (
    comment_name VARCHAR(32) PRIMARY KEY,
    author_fullname VARCHAR(32) REFERENCES author (author_fullname),
    post_name VARCHAR(32) NOT NULL REFERENCES post (post_name) ON DELETE CASCADE,
    parent_comment_name VARCHAR(32) REFERENCES comment (comment_name) ON DELETE CASCADE,
    body TEXT,
    score INTEGER,
    created_utc BIGINT NOT NULL
  );

ALTER TABLE post
ADD COLUMN created_ts TIMESTAMPTZ GENERATED ALWAYS AS (to_timestamp (created_utc)) STORED;

ALTER TABLE comment
ADD COLUMN created_ts TIMESTAMPTZ GENERATED ALWAYS AS (to_timestamp (created_utc)) STORED;

In [3]:
%sql \d

Schema,Name,Type,Owner
public,author,table,deds
public,comment,table,deds
public,post,table,deds
public,subreddit,table,deds


In [4]:
%sql SELECT * FROM post LIMIT 100;

post_name,subreddit_id,author_fullname,title,selftext,score,upvote_ratio,num_comments,url,created_utc,created_ts
t3_1n3fq4g,t5_3mp17l,t2_qvqwzlq5,Member Dewless has passed away,"I wanted to let this group know that my beautiful daughter “Dewless” passed away Thursday 8/28/25. She held on to celebrate her son’s 3rd birthday that previous Sunday. She was diagnosed while pregnant in 2022, and found out it was metastatic in June 2024. She courageously endured 5 lines of treatment. She was in home hospice for 2 weeks. I refuse to say cancer won. She won, she chose to say enough. She handled living with cancer with grace if that’s possible. In the end love won, not cancer because she was surrounded by those that loved her enormously and endlessly.",99,1.0,16,https://www.reddit.com/r/LivingWithMBC/comments/1n3fq4g/member_dewless_has_passed_away/,1756493162,2025-08-29 18:46:02+00:00
t3_1n39kq3,t5_3mp17l,t2_4h9m8rcw,A Stage 4 Vent,"I usually handle this crap pretty well, but I'm getting so worn down. These last few months have been a nightmare, and not because the cancer isn't responding. It is. It's the freaking facility (Roswell Park Cancer Center in Buffalo - avoid it!!). They used to fit in CTs in between NM bone scan injections and the actual scan. Not anymore. After my last injection they told me to come back at 1; they would do it then. I did, and sat till 2:45, when my appointment was. By then I'm in tears. I'm frustrated, tired, scared, in pain, and just want to go home. I ask when they'll take me back. They tell me they'll see me when my appointment is and go sit down some more (which they didn't. they were 30 minutes late) Last time I was there for 7 hours, for less than 1 hour of testing. Plus a three hour round trip drive. God forbid you book appointments together to cause less inconvenience to your patients. GI - I have horrible stomach pain and am on multiple meds that aren't helping. They won't do an endoscopy because 1. we don't have the staff. 2. we don't think we'll find anything. 3. we're too busy and 4. (my personal favorite) you're not that sick. (I had one scheduled but an NP decided to cancel it herself. They were too busy). ""If you want it worked up, go somewhere else."" Pain - headaches are excruciating and I've now developed tinnitus. Breast Clinic refers me to neurosurgery. She's has a low suspicion of leptomeningeal (MRI was fine but clinically I was having symptoms) so she orders ONE lumbar puncture. The false negative rate for the first LP is approx 50%. The last time I went through the lepto-scare the radiologist told one is meaningless. Disregard it Three is standard of care/best practice. Anything less has no value. I point this out to her. She says ""we do 'real world medicine' here."" WTAF????? Anyways it came back negative and I get a message in my patient portal to go somewhere else for treatment for the headaches. Maybe find someone in my city that practices neurology (that was her idea of a referral). I have asked REPEATEDLY for a pain med; I'm a stage 4 cancer patient. A pain med is not unreasonable. They refer me to palliative who prescribes medicinal marijuana. I hated weed in high school and hate it now. So back to Tylenol and near constant pain (I did take my dead dogs Tramadol til that ran out). Brain MRIs - I had gamma knife immediately upon stage 4 diagnosis. After the first post surgery MRI came back good, they discharged me and had the Breast Clinic order all future surveillance MRIs. They did this for 5 years. After my recent debacle with neurosurgery, I now have to back to radiation oncology for the MRI order. So another 3 hour round trip (in shitty weather 8 months out of the year), another day off work for my husband, another copay, another $50-60 in gas. Scammy much, Roswell? I'm just so sick of every little goddamn thing being a struggle. ""Real world medicine""???? I've never heard of such crap. Why would she care about missing the most devastating sequelae of BC? She's not the one that's gonna die from it; that would be me.",14,1.0,7,https://www.reddit.com/r/LivingWithMBC/comments/1n39kq3/a_stage_4_vent/,1756479169,2025-08-29 14:52:49+00:00
t3_1n322ac,t5_3mp17l,t2_m095tkcj,Triggers,"Hi all, Just venting. I’m on my 6th cycle of Xeloda Capecitabine. Last scan showed stabilisation of bone mets and bone marrow mets very very minor. I had a review today, bloods are normal (for chemo drugs, stable red/white/plasma) but tumour markets had a little jump. I immediately got triggered & started worrying treatment is failing. I am in therapy, am living my life around treatment and am usually happy but one result can make me spiral. It could be the tail end of winter (I’m in Melbourne, Australia) but I felt deflated by one number - a number my oncologist repeatedly says is just one part of data they use. Any advice for triggers? I’m thinking this is just mets life.",10,1.0,7,https://www.reddit.com/r/LivingWithMBC/comments/1n322ac/triggers/,1756457107,2025-08-29 08:45:07+00:00
t3_1n2usqx,t5_3mp17l,t2_4d9y1ex4,Trovelvy has likely stopped working,"I've had some changes to my opposite breast that my oncologist is now thinking is spread of my disease. I lasted 5 months on this only and it barely stabilized my current disease for a short period of time. I have a few chemo options. Radition to the breast etc. I also see MD anderson and will get a second opinion they had suggested clinical trial if Trodelvy stops. Maybe try that before circling back to platinum chemo and something else... The concern being resistance to platinum based therapy. I have TNBC and its aggressive.... im 39 and it just deflates me, my son young just turned 20months. Try to be as postive as possible but I am pragmatic and I dont know every time this thing changes it just gets me. I have 2 bone mets and some lymph on both sides. I have mammo, ultra sound scheduled for early next week and biopsy. Just feeling discouraged",15,0.95,2,https://www.reddit.com/r/LivingWithMBC/comments/1n2usqx/trovelvy_has_likely_stopped_working/,1756432275,2025-08-29 01:51:15+00:00
t3_1n2try2,t5_3mp17l,t2_8hcek2gj5,2 months in.. (my story),"Er + IDC to liver. Originally in right breast, got passed off my my OB as “milk” and “not to worry about it”. My mom (now passed with pancreatic cancer) god bless her soul, pushed me to get an ultrasound. Even the tech said it looked like milk but once they called requesting a mammogram I knew. I declined 32 rounds of chemo at first and opted for double mastectomy and 15 rounds of rads. I luckily only had minor cording issues which I resolved but no limitation of movement etc. Started feeling little pains in late 2023 (a lot of stress that year and the year before a losing mom, almost losing dad and a new born plus 2 under 5) but we were living abroad so I didn’t think anything of it. Once we got back I got an ultrasound just to be safe. My GP said it just sounded like bile issues. Confirmed about 5 days later it was metastases. I lost my mind. I felt I had been doing everything “right”. Eating clean, moving my body etc. I was recommended to go on tamoxifen initially (pre Mets) but I declined. I didn’t want too much “medicine” in my body. I always had a knowing it would come back in my liver and I trust my intuition more than anything. Anywho - confirmed liver Mets in February, I went nuts. 2 tumours and some lesions. I decided I’d go on tamoxifen even though my oncologist suggested ovarian suppression, letrozole and ribociclib. I’ve always felt great, never fatigued how I was at first, very minimal discomfort (if any) still going on 5k runs like it was nothing. Went for my scan in May - tumours had doubled and my original 14 lesions “had increased” but when he counted there was still 14? lol. ANYWAY I ended up on the 3 treatments and started them back in June. My liver enzymes elevated a little, tumor markers went from 280 to 351 which I know can be the cells being dumped into the blood. I had a “rough go” (in my own terms) but apparently nothing like many people on that first cycle. Nausea, abdominal pain. That’s about it. July/August was my second round and I saw my oncologist last week - enzymes are higher and markers are at 474. Of course they don’t really care about markers as scans hold the truth but I do have a scan next month so I’m praying that things have shrunk. Because of the elevation, I’ve been given an extra 2 weeks off of my usual one week off. Onco says this looks typical of this phase of the medication (and he’s pretty pessimistic lol) so if he says that, I feel good about it. I guess I’m sharing this for anyone who’s going through it. I don’t fear cancer. At all. What I do is want to understand it. I want to heal so that my kids get a mom here for as long as humanly possible, so that other women can see it’s possible, so that I can understand my body and heal what it’s been through (traumatic births, SA etc). I see my oncologist next week for blood work and will create an update post! OH! I also got in to see the surgical onco at princess margaret this week who said typically they don’t bother looking at someone who’s at this stage but because I’m young (35) and healthy, if I can shrink the Mets and keep them stable for a year, she’ll operate 🥳 Don’t give up hope!!",16,1.0,10,https://www.reddit.com/r/LivingWithMBC/comments/1n2try2/2_months_in_my_story/,1756429390,2025-08-29 01:03:10+00:00
t3_1n2tnqu,t5_3mp17l,t2_em721q17,Pain weeks after sbrt?,So I had a small lesion on my hip and they did srbt up at Mayo. Its been about 4 weeks and suddenly I'm getting jolts of pain in my hip. Of course my mind is screaming progression but I did contact mayo and they said my pain could be related to the sbrt. Just wanna know if anyone has had a similar experience,7,1.0,3,https://www.reddit.com/r/LivingWithMBC/comments/1n2tnqu/pain_weeks_after_sbrt/,1756429069,2025-08-29 00:57:49+00:00
t3_1n2thcl,t5_3mp17l,t2_m40v2jzgt,"Well, fuck ...","Well, fuck ... everything has been a chaotic mess and now my oldest and youngest kiddos are about to have birthdays...I was really counting on some help from the assistance reddit but my post wasn't approved because I have two weeks I wasn't active in the last sixty days...I tried to find some odd jobs on nextdoor for extra funds and that's going nowhere...I got told about a local group that helps for birthdays but they aren't excepting new applications...so how is everyone dealing with birthdays because I can't be the only broke cancer parent...my brain is fried, if you have any creative suggestions I open for them...they are both so excited and I am worried they about to super disappointed about their birthdays ...with everything going on I have to at least make their special days memorable...",10,0.86,17,https://www.reddit.com/r/LivingWithMBC/comments/1n2thcl/well_fuck/,1756428578,2025-08-29 00:49:38+00:00
t3_1n2odi8,t5_3mp17l,t2_xxfekcgui,Just found out that doctor’s kids attend same school as my daughter,"Hello, I just discovered that my gynecologist’s kids attend the same private school as my daughter. We meet regularly as I will eventually like to pause treatment for egg retrieval (crazy, I know). She signed off on it given I had a deep response to treatment but I would like a deeper response before doing so. No one knows about my MBC, aside from my doctors and a handful members of my family. Should I tell her not to share anything? I assume she won’t but freaking out a bit now given we’re no longer separated by a degree or so… Thanks!",5,0.73,9,https://www.reddit.com/r/LivingWithMBC/comments/1n2odi8/just_found_out_that_doctors_kids_attend_same/,1756415379,2025-08-28 21:09:39+00:00
t3_1n2mlxl,t5_3mp17l,t2_t4gzgpvm8,Positive Stories,I was just confirmed to have stave IV mTNBC. I’m terrified and trying my absolute darndest to stay off Google but it’s hard. I REALLY need some positive stories and hope here. Please no sad stories.,15,0.95,20,https://www.reddit.com/r/LivingWithMBC/comments/1n2mlxl/positive_stories/,1756411262,2025-08-28 20:01:02+00:00
t3_1n2ljcq,t5_3mp17l,t2_pow8ki5,Liver and bone Mets,Does anyone here have liver mets? I found out today (see previous post) I have 3 small liver Mets and 1 new bone met. I’ve been stable almost 3 years with bone only. I was always told when it goes to an organ it’s very bad. My dr says she’s not super worried. That this is how it goes. We get a few years of stability and then see small progress and need a new treatment. I’ve only been on one protocol so far so I have so many other things to try. I just really want to know what I’m dealing with here. Are yall stable? Is it working? How long have you had the Mets? I don’t want to die even tho I know eventually we all do. I just want to live stable for as long as I can.,8,1.0,6,https://www.reddit.com/r/LivingWithMBC/comments/1n2ljcq/liver_and_bone_mets/,1756408790,2025-08-28 19:19:50+00:00


In [5]:
%sql SELECT * FROM comment LIMIT 100;

comment_name,author_fullname,post_name,parent_comment_name,body,score,created_utc,created_ts
t1_nbd8cfl,t2_1wcjuzqpkr,t3_1n3fq4g,,"im so sorry to hear of your loss. There is this glaring gap in cancer screening in women who are having babies. It is tragic and unfair. I just wanted to share how wonderful it is that your daughter saw her son turn 3. Those first three years of life are so important, and create a foundation that will be with your grandson for life. She won by getting those three vital years. i just wish you had all and many more with her. [https://www.uhc.com/news-articles/newsroom/first-three-years](https://www.uhc.com/news-articles/newsroom/first-three-years)",13,1756494472,2025-08-29 19:07:52+00:00
t1_nbd7u3o,t2_ck9oqkx2,t3_1n3fq4g,,Thank you for sharing this with us. The way you talk about the situation is beautiful. We will miss Dewless. She is in our hearts.,9,1756494319,2025-08-29 19:05:19+00:00
t1_nbde0if,t2_d0dwi62y,t3_1n3fq4g,,I’ll light a candle for her tonight. **hugs,7,1756496197,2025-08-29 19:36:37+00:00
t1_nbdgiyf,t2_ux141ytk,t3_1n3fq4g,,I'm very sorry for your loss. Wishing peace & comfort to your family.,4,1756496962,2025-08-29 19:49:22+00:00
t1_nbdhs2a,t2_6gt3mnh3,t3_1n3fq4g,,♥️,3,1756497342,2025-08-29 19:55:42+00:00
t1_nbdnf20,t2_uuv0nv3uo,t3_1n3fq4g,,❤️ thoughts are with you x,3,1756499058,2025-08-29 20:24:18+00:00
t1_nbdqcz3,t2_4w8zo1nz,t3_1n3fq4g,,Keeping you and your family close in my thoughts. May her memory forever be a blessing.,3,1756499937,2025-08-29 20:38:57+00:00
t1_nbdtlkt,t2_sm9o4euz4,t3_1n3fq4g,,❤️,3,1756500905,2025-08-29 20:55:05+00:00
t1_nbdw2wo,t2_m40v2jzgt,t3_1n3fq4g,,My heart and prayers go out to her loved ones. May her energy rest in eternal peace. The light she shined bright with will never dim. The love she gave will carry her light on forever. I will be planting something in my community's garden in her name. 💜 💕,3,1756501668,2025-08-29 21:07:48+00:00
t1_nbe1g26,t2_5rjr6am6,t3_1n3fq4g,,Thank you so much for sharing this with us. I often wonder about folks who drop off and hope that they had a peaceful passing. I’m sorry that you have lost your daughter.,3,1756503384,2025-08-29 21:36:24+00:00


In [6]:
%sql SELECT * FROM author LIMIT 100;

author_fullname,author,author_premium
t2_qvqwzlq5,Cafe_chill25,not premium
t2_4h9m8rcw,Own-Land-9359,not premium
t2_m095tkcj,Flaky_Amphibian_5597,not premium
t2_4d9y1ex4,Elegant-Cricket8106,not premium
t2_8hcek2gj5,Acxc28,not premium
t2_em721q17,Low-Negotiation-8458,not premium
t2_u49f2ucdn,mkellg1,not premium
t2_xxfekcgui,Artistic_Engineer_29,not premium
t2_t4gzgpvm8,Confident_Mix1282,not premium
t2_pow8ki5,sinistersavanna,not premium


In [7]:
%sql SELECT * FROM subreddit LIMIT 100;

subreddit_id,subreddit_name_prefixed,subreddit_type,subreddit_subscribers
t5_3mp17l,r/LivingWithMBC,public,3677
t5_2t6dy,r/breastcancer,public,38017
t5_7y604d,r/ovariancancer_new,public,936
t5_3eibw,r/BRCA,public,5329
t5_2qixx,r/cancer,public,77813
t5_35ezb,r/IndustrialPharmacy,restricted,1234
