In [None]:
import os, platform, psutil, sys, datetime as dt
print("Time (UTC):", dt.datetime.utcnow().isoformat()+"Z")
print("Python:", sys.version)
print("Platform:", platform.platform())
print("CPUs:", psutil.cpu_count(logical=True))
vmem = psutil.virtual_memory()
print("RAM total (GB):", round(vmem.total/1e9, 2))


In [None]:
try:
    import torch
    cuda_ok = torch.cuda.is_available()
    print("Torch:", torch.__version__)
    print("CUDA available:", cuda_ok)
    if cuda_ok:
        print("GPU name:", torch.cuda.get_device_name(0))
        print("GPU total memory (GB):", round(torch.cuda.get_device_properties(0).total_memory/1e9, 2))
    else:
        print("No GPU — training will still work with smaller batches.")
except Exception as e:
    print("Torch not yet installed; will install next.")


In [None]:
from pathlib import Path
ROOT = Path("/content/SentimentAnalysis_bias_study")
DATA = ROOT / "data_raw"
OUT  = ROOT / "outputs"
for p in (ROOT, DATA, OUT):
    p.mkdir(parents=True, exist_ok=True)
print("Project root:", ROOT)


In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Where to save copies so they persist across sessions
DRIVE_DIR = "/content/drive/MyDrive/SentimentAnalysis_bias_study"
import os
os.makedirs(DRIVE_DIR, exist_ok=True)
print("Drive dir:", DRIVE_DIR)


In [None]:
!pip -q install --upgrade pip
!pip -q install \
  praw==7.7.1 \
  pandas==2.2.2 \
  tqdm==4.66.5 \
  pyarrow==17.0.0 \
  langdetect==1.0.9 \
  langid==1.1.6 \
  emoji==2.12.1

# Torch often preinstalled. If missing, uncomment the next line:
# !pip -q install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

# For modeling later (not used in scraping yet, but we pin early)
!pip -q install \
  scikit-learn==1.5.1 \
  transformers==4.43.3 \
  datasets==2.20.0 \
  accelerate==0.33.0 \
  shap==0.45.1 \
  fairlearn==0.10.0 \
  aif360==0.6.1


In [None]:
!pip install --upgrade tqdm


In [None]:
import importlib.metadata as importlib_metadata
import pandas as pd, praw, sklearn, transformers, datasets, shap, fairlearn
import pyarrow

def pkg_ver(pkg_name):
    try:
        return importlib_metadata.version(pkg_name)
    except importlib_metadata.PackageNotFoundError:
        return "Not installed"

print("praw:", praw.__version__)
print("pandas:", pd.__version__)
print("pyarrow:", pyarrow.__version__)
print("langdetect:", pkg_ver("langdetect"))
print("langid:", pkg_ver("langid"))
print("scikit-learn:", sklearn.__version__)
print("transformers:", transformers.__version__)
print("datasets:", datasets.__version__)
print("shap:", shap.__version__)
print("fairlearn:", fairlearn.__version__)


praw: 7.7.1
pandas: 2.2.2
pyarrow: 17.0.0
langdetect: 1.0.9
langid: 1.1.6
scikit-learn: 1.5.1
transformers: 4.43.3
datasets: 2.20.0
shap: 0.45.1
fairlearn: 0.10.0


In [None]:
from getpass import getpass

# Prompt without showing what you type
CLIENT_ID = getpass("**************")
CLIENT_SECRET = getpass("**************")

# Set a descriptive user agent (required by Reddit API rules)
USER_AGENT = "SentimentAnalysis-Bias-Study/1.0 (academic research; contact: akatuwal1@cougars.ccis.edu)"

import praw
reddit = praw.Reddit(
    client_id=CLIENT_ID,
    client_secret=CLIENT_SECRET,
    user_agent=USER_AGENT,
    check_for_async=False
)
reddit.read_only = True

# Quick test fetch from r/Anxiety
test_post = next(reddit.subreddit("Anxiety").new(limit=1))
print("Connection OK. Example post title:")
print(test_post.title[:80], "...")


In [None]:
from getpass import getpass
import requests

def clean(s):
    return s.strip().strip("\n").strip("\r")

CLIENT_ID = clean(getpass("***************"))
CLIENT_SECRET = clean(getpass("****************"))
USER_AGENT = "MH-Bias-Study/1.0 (academic; contact: your_email@example.com)"

# 1) Try client_credentials grant (app-only read-only)
resp = requests.post(
    "https://www.reddit.com/api/v1/access_token",
    data={"grant_type": "client_credentials"},
    auth=(CLIENT_ID, CLIENT_SECRET),
    headers={"User-Agent": USER_AGENT},
    timeout=20
)

print("Status:", resp.status_code)
print(resp.text[:500])

if resp.status_code == 200:
    token = resp.json().get("access_token")
    print("\n✅ Token received. Your credentials are valid.")
else:
    print("\n❌ Could not get a token. Check these:")
    print("  • App type must be 'script' (shows 'personal use script' under the name).")
    print("  • Client ID is the short string under the app name (NOT the secret).")
    print("  • Client Secret is the long value labeled 'secret'.")
    print("  • Redirect URI can be http://localhost:8080 (but not used for script auth).")
    print("  • No extra spaces/newlines in ID/secret.")
