<a href="https://colab.research.google.com/github/fyas101/Reddit_Group_Project/blob/main/RedditProject3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Project 3

from abc import ABC, abstractmethod
from typing import Any, List, Dict, Optional
from datetime import datetime, timedelta
import re


# abstract base class

class BaseAnalyzer(ABC):

    def __init__(self):
        self._history: List[Dict[str, Any]] = []

    @abstractmethod
    def analyze(self, data: Any) -> Any:
        pass

    @abstractmethod
    def describe(self) -> str:
        pass

    def add_history(self, record: Dict[str, Any]):
        self._history.append(record)

    @property
    def history(self) -> List[Dict[str, Any]]:
        return list(self._history)

    @property
    def analysis_count(self) -> int:
        return len(self._history)


# exact project 2 functions

def categorize_by_tone(post_text):
    if not isinstance(post_text, str):
        raise TypeError("post_text must be a string")
    clean = post_text.strip()
    lower = clean.lower()
    if "yeah right" in lower or "sure" in lower: return "sarcastic"
    if any(w in lower for w in ["angry", "hate"]) or clean.isupper(): return "angry"
    if "lol" in lower or "haha" in lower: return "humorous"
    if "?" in clean: return "neutral"
    return "informative"

def detect_misinformation(post_text, keyword_list=None):
    default = ["rumor", "unconfirmed", "fake news", "scam", "hoax"]
    words = keyword_list if keyword_list else default
    cleaned = re.sub(r"http\S+", "", post_text.lower())
    matches = [k for k in words if k in cleaned]
    return {"is_misinformation": bool(matches), "matched_keywords": matches}


def clean_post_text(text: str) -> str:
    text = re.sub(r"http\S+|www\S+", "", text)
    text = re.sub(r"[^a-zA-Z\s]", "", text)
    return re.sub(r"\s+", " ", text).lower().strip()


def check_duplicate(new_post: str, existing_posts: List[str], threshold: float = 0.9) -> bool:
    clean = lambda t: set(clean_post_text(t).split())
    nw = clean(new_post)
    for p in existing_posts:
        pw = clean(p)
        if len(nw | pw) == 0:
            continue
        overlap = len(nw & pw) / len(nw | pw)
        if overlap >= threshold:
            return True
    return False

def track_users(posts):
    users = {}
    for p in posts:
        u = p["username"]
        users.setdefault(u, {
            "total_posts": 0,
            "total_upvotes": 0,
            "total_comments": 0,
            "disinformation_posts": 0
        })
        users[u]["total_posts"] += 1
        users[u]["total_upvotes"] += p["upvotes"]
        users[u]["total_comments"] += p["comments"]
        if p.get("is_disinformation"):
            users[u]["disinformation_posts"] += 1
    return users

def total_interactions_this_week(posts):
    week_start = datetime.utcnow() - timedelta(days=7)
    up, com = 0, 0
    count = 0
    for p in posts:
        if datetime.fromisoformat(p["created_utc"]) >= week_start:
            count += 1
            up += p["upvotes"]
            com += p["comments"]
    return {
        "total_posts": count,
        "total_upvotes": up,
        "total_comments": com,
        "total_interactions": up + com
    }

def compare_engagement(posts, group_by="category"):
    totals = {}
    counts = {}
    for p in posts:
        grp = p[group_by]
        totals[grp] = totals.get(grp, 0) + (p["upvotes"] + p["comments"])
        counts[grp] = counts.get(grp, 0) + 1
    return {g: round(totals[g]/counts[g], 2) for g in totals}

def track_top_posts(posts, n=5):
    for p in posts:
        p["total_interactions"] = p["upvotes"] + p["comments"]
    return sorted(posts, key=lambda x: x["total_interactions"], reverse=True)[:n]


# Project 2 Classes with ineretance
class ContentCategorizer(BaseAnalyzer):

    def __init__(self, custom_keywords=None):
        super().__init__()
        self._keywords = custom_keywords

    def analyze(self, post_text: str) -> Dict[str, Any]:
        tone = categorize_by_tone(post_text)
        misinfo = detect_misinformation(post_text)
        result = {"tone": tone, "misinformation": misinfo}
        self.add_history({"text": post_text[:40], "result": result})
        return result

    def describe(self) -> str:
        return "ContentCategorizer (tone + misinformation)"


class PostCleaner(BaseAnalyzer):

    def __init__(self, post_text: str, existing=None):
        super().__init__()
        self.post_text = post_text
        self.existing = existing or []

    def analyze(self, _=None) -> Dict[str, Any]:
        cleaned = clean_post_text(self.post_text)
        dup = check_duplicate(self.post_text, self.existing)
        result = {"cleaned": cleaned, "duplicate": dup}
        self.add_history(result)
        return result

    def describe(self) -> str:
        return "PostCleaner (cleaning + duplicate detection)"


class UserTracker(BaseAnalyzer):

    def __init__(self, posts=None):
        super().__init__()
        self.posts = posts or []

    def analyze(self, username: str) -> Dict[str, Any]:
        stats = track_users(self.posts)
        result = stats.get(username, {})
        self.add_history({"username": username, "stats": result})
        return result

    def describe(self) -> str:
        return "UserTracker (user statistics + reliability)"


class EngagementAnalyzer(BaseAnalyzer):

    def __init__(self, posts=None):
        super().__init__()
        self.posts = posts or []

    def analyze(self, _=None) -> Dict[str, Any]:
        weekly = total_interactions_this_week(self.posts)
        self.add_history(weekly)
        return weekly

    def describe(self) -> str:
        return "EngagementAnalyzer (engagement + weekly stats)"


class MetadataAnalyzer(BaseAnalyzer):

    def __init__(self, metadata=None):
        super().__init__()
        self.metadata = metadata or []

    def analyze(self, _=None) -> Dict[str, Any]:
        summary = {}
        for record in self.metadata:
            for key, val in record.items():
                summary.setdefault(key, {})
                v = str(val)
                summary[key][v] = summary[key].get(v, 0) + 1

        self.add_history({"records": len(self.metadata)})
        return summary

    def describe(self) -> str:
        return "MetadataAnalyzer (metadata frequencies)"


#polymorhpic comoposition

class RedditAnalysisManager:
    def __init__(self):
        self.analyzers: List[BaseAnalyzer] = []

    def add_analyzer(self, analyzer: BaseAnalyzer):
        self.analyzers.append(analyzer)

    def run_all(self, data: Any) -> Dict[str, Any]:
        """Polymorphic execution."""
        results = {}
        for analyzer in self.analyzers:
            results[analyzer.describe()] = analyzer.analyze(data)
        return results


#demo for reddit data

if __name__ == "__main__":

    mgr = RedditAnalysisManager()

    mgr.add_analyzer(ContentCategorizer())
    mgr.add_analyzer(PostCleaner("Good Reddit Post!", ["good reddit post!"]))
    mgr.add_analyzer(EngagementAnalyzer([
        {"created_utc": datetime.utcnow().isoformat(), "upvotes": 10, "comments": 4, "category": "news"}
    ]))

    print(mgr.run_all("Good Reddit Post!"))


{'ContentCategorizer (tone + misinformation)': {'tone': 'informative', 'misinformation': {'is_misinformation': False, 'matched_keywords': []}}, 'PostCleaner (cleaning + duplicate detection)': {'cleaned': 'good reddit post', 'duplicate': True}, 'EngagementAnalyzer (engagement + weekly stats)': {'total_posts': 1, 'total_upvotes': 10, 'total_comments': 4, 'total_interactions': 14}}


  {"created_utc": datetime.utcnow().isoformat(), "upvotes": 10, "comments": 4, "category": "news"}
  week_start = datetime.utcnow() - timedelta(days=7)
