In [2]:
import boto3
import os, json
from pathlib import Path
from dotenv import load_dotenv

%load_ext sql
%config SqlMagic.displaylimit = 100

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [5]:
ROOT = Path('/home/ubuntu/deds2025b_proj/opt/reddit_pipeline')    # FOR NOTEBOOK ONLY
load_dotenv(ROOT / '.env')
REDDIT_OLAP_ARN = os.environ["REDDIT_OLAP_ARN"]
secrets = boto3.client("secretsmanager")
DATABASE = os.environ["OLAP_DB"]

cfg = json.loads(secrets.get_secret_value(SecretId=REDDIT_OLAP_ARN)['SecretString'])
os.environ['DATABASE_URL'] = f"{cfg['engine']}://{cfg['username']}:{cfg['password']}@{cfg['host']}:{cfg['port']}/{DATABASE}"

In [6]:
%sql \d

schema,name,type,owner
public,dim_author,table,mydbuser
public,dim_date,table,mydbuser
public,dim_subreddit,table,mydbuser
public,fact_comment,table,mydbuser
public,fact_post,table,mydbuser
public,fact_tags,table,mydbuser


In [9]:
%sql SELECT * FROM dim_author LIMIT(100)

author_sk,author_fullname,author,author_premium
1,t2_aomzp7db,Most_Room_1408,NOT PREMIUM
2,t2_j3c83dzkd,Pristine_Pick_2494,NOT PREMIUM
3,t2_oxzeb,potatoesyo,NOT PREMIUM
4,t2_1tbjwbky74,Best_Cantaloupe556,NOT PREMIUM
5,t2_661r33yx,disc0pants,NOT PREMIUM
6,t2_swponftt,Ready-Shallot-2029,NOT PREMIUM
7,t2_5wxh2xp5,mystique023,NOT PREMIUM
8,t2_95shdqwj4,cc0818,NOT PREMIUM
9,t2_1bqdboywcq,Prize_Gift_5609,NOT PREMIUM
10,t2_9dcfyeur,Kalyankarthi,NOT PREMIUM


In [10]:
%sql SELECT * FROM dim_date LIMIT(100)

date_key,date_value,year,month,day,dow,month_name,dow_name,is_weekend
1,2025-08-03,2025,8,3,0,August,Sun,Weekend
2,2025-06-03,2025,6,3,2,June,Tue,Weekday
3,2025-08-02,2025,8,2,6,August,Sat,Weekend
4,2025-03-12,2025,3,12,3,March,Wed,Weekday
5,2025-08-01,2025,8,1,5,August,Fri,Weekday
6,2025-03-02,2025,3,2,0,March,Sun,Weekend
7,2025-07-27,2025,7,27,0,July,Sun,Weekend
8,2025-02-28,2025,2,28,5,February,Fri,Weekday
9,2025-07-24,2025,7,24,4,July,Thu,Weekday
10,2025-02-23,2025,2,23,0,February,Sun,Weekend


In [11]:
%sql SELECT * FROM dim_subreddit LIMIT(100)

subreddit_sk,subreddit_id,subreddit_name_prefixed,subreddit_type,subreddit_subscribers
1,t5_35ezb,r/IndustrialPharmacy,restricted,1234
2,t5_3eibw,r/BRCA,public,5329
3,t5_3mp17l,r/LivingWithMBC,public,3677
4,t5_3mp17l,r/LivingWithMBC,public,3675
5,t5_2qjov,r/Philippines,public,3468767
6,t5_2qjov,r/Philippines,public,3468769
7,t5_7y604d,r/ovariancancer_new,public,936
8,t5_2qjov,r/Philippines,public,3468768
9,t5_2qjov,r/Philippines,public,3468771
10,t5_2qjov,r/Philippines,public,3468770


In [12]:
%sql SELECT * FROM fact_comment LIMIT(100)

comment_name,post_name,date_key,subreddit_sk,author_sk,score,negative,neutral,positive,net_sentiment
t1_mvq8s6r,t3_1ki76jd,2,7,2063.0,1,0.7859,0.1846,0.0296,-0.7563
t1_mvosptf,t3_1ktuddi,2,7,2936.0,2,0.005,0.0263,0.9687,0.9638
t1_mvosfjd,t3_1ktuddi,2,7,2936.0,1,0.0033,0.0188,0.9779,0.9746
t1_mhba8mr,t3_1j846s6,4,7,2043.0,1,0.0302,0.855,0.1149,0.0847
t1_mhb5ype,t3_1j7ztyg,4,7,4776.0,1,0.3292,0.6055,0.0654,-0.2638
t1_mhb9q9x,t3_1iyhrjh,4,7,211.0,1,0.0645,0.8504,0.0851,0.0206
t1_n6evwlg,t3_1mcpld1,5,2,17.0,1,0.0083,0.1774,0.8142,0.8059
t1_n6ew323,t3_1mcpld1,5,2,17.0,2,0.2044,0.2594,0.5362,0.3319
t1_n6afozn,t3_1me6yu4,5,2,9.0,1,0.586,0.3124,0.1016,-0.4844
t1_n6bw9ha,t3_1me6yu4,5,2,5.0,1,0.6219,0.3591,0.0189,-0.603


In [13]:
%sql SELECT * FROM fact_post LIMIT(100)

post_name,date_key,subreddit_sk,author_sk,score,upvote_ratio,num_comments,negative,neutral,positive,net_sentiment
t3_1mgpcoe,1,2,198,2,1.0,12,0.1999,0.7556,0.0445,-0.1553
t3_1mgju7k,1,2,2617,3,1.0,7,0.4654,0.4625,0.0721,-0.3933
t3_1mghzhp,1,2,1,3,1.0,7,0.0641,0.8203,0.1155,0.0514
t3_1mgth4a,1,3,2667,37,0.97,25,0.8022,0.1875,0.0103,-0.7919
t3_1mgql6x,1,3,81,11,1.0,0,0.5366,0.3731,0.0903,-0.4464
t3_1mgkeie,1,3,121,7,1.0,19,0.8975,0.092,0.0105,-0.887
t3_1mgjauf,1,3,2725,41,1.0,18,0.2301,0.6035,0.1664,-0.0637
t3_1mg9z5x,1,3,175,47,1.0,7,0.8844,0.1018,0.0138,-0.8706
t3_1mg8e8q,1,3,129,19,1.0,12,0.8501,0.125,0.0249,-0.8252
t3_1mg7s3a,1,3,2633,14,1.0,2,0.0159,0.4352,0.5489,0.533


In [14]:
%sql SELECT * FROM fact_tags LIMIT(100)

tag_name,date_key,subreddit_sk,score,negative,neutral,positive,net_sentiment
chemo,2,7,1,0.7859,0.1846,0.0296,-0.7563
follow-up,2,7,2,0.005,0.0263,0.9687,0.9638
surgery,2,7,1,0.7859,0.1846,0.0296,-0.7563
platinum,4,7,1,0.0645,0.8504,0.0851,0.0206
brca2,5,2,1,0.5471,0.3197,0.1331,-0.414
breast,5,2,3,0.6841,0.2926,0.0232,-0.6609
chemo,5,2,2,0.2044,0.2594,0.5362,0.3319
follow-up,5,2,1,0.6219,0.3591,0.0189,-0.603
surgery,5,2,1,0.6219,0.3591,0.0189,-0.603
biopsy,5,3,20,0.6346,0.3399,0.0255,-0.6091
