# Social Sentinel Backend
## An engine developed to mitigate cyberbullying in mixed-code text.

In [None]:
!pip install datasets
!pip install sklearn

## Testing our refined keywords on an open-source dataset
[Dataset](https://huggingface.co/datasets/community-datasets/roman_urdu_hate_speech)

In [32]:
import re
from typing import List

def load_hatewords(filepath: str) -> set:
    with open(filepath, "r", encoding="utf-8") as file:
        lines = file.readlines()
        hatewords = set(word.strip().lower() for word in lines if word.strip())
    return hatewords

def detect_keywords(text: str, hatewords: set):
    text = text.lower()
    flagged_keywords = []

    for word in hatewords:
            pattern = re.sub(r"\\*", r".*", word)
            if re.search(pattern, text):
                flagged_keywords.append(word)
    return flagged_keywords

def is_cyberbullying(text: str, hatewords: set, threshold: int = 1):
    flagged_keywords = detect_keywords(text, hatewords)
    return len(flagged_keywords) >= threshold, flagged_keywords


In [44]:
from datasets import load_dataset
dataset = load_dataset("community-datasets/roman_urdu_hate_speech", "Coarse_Grained")

flagged_results = []
y_true = []
y_pred = []
hatewords = load_hatewords("/content/hatewords.txt")

for tweet, label in zip(dataset["train"]["tweet"], dataset["train"]["label"]):
    is_flagged, keywords = is_cyberbullying(tweet, hatewords, threshold=1)
    y_true.append(label)
    y_pred.append(int(is_flagged))
    flagged_results.append({
        "tweet": tweet,
        "is_cyberbullying": is_flagged,
        "flagged_keywords": keywords,
        "actual_label": label,
        "predicted_label" : int(is_flagged)
    })

for result in flagged_results:
    if result["is_cyberbullying"]:
        print(f"[FLAGGED] Cyberbullying detected in: '{result['tweet']}'")
        print(f"Flagged keywords: {result['flagged_keywords']}\n")
        print(f"Actual Label: {result['actual_label']}\n")
        print(f"Predicted Label: {result['predicted_label']}\n")
    else:
        print(f"[SAFE] No cyberbullying detected in: '{result['tweet']}'\n")


train-00000-of-00001.parquet:   0%|          | 0.00/525k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/147k [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/58.5k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/7208 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/2002 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/800 [00:00<?, ? examples/s]

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Actual Label: 0

Predicted Label: 1

[FLAGGED] Cyberbullying detected in: 'ye  saali mujh sy baat krna e chhor deti hai if i speak my stomach out so no i'm not lucky ✋'
Flagged keywords: ['saali']

Actual Label: 0

Predicted Label: 1

[FLAGGED] Cyberbullying detected in: 'chutiye halala ki upaz tu hai mein nahin isaliye tujhe zyada pata hoga'
Flagged keywords: ['halala']

Actual Label: 0

Predicted Label: 1

[SAFE] No cyberbullying detected in: 'sabar tou ussi din seekh liya tha jis din ptcl lagwaya tha.'

[FLAGGED] Cyberbullying detected in: 'khottay k bacho dallo haramkhoro kanjro nalaiko jahilo or pata nhi kiya kch niazi teri pen di sirii jahill kanjar yahodi loby k agent haramkhor badzat dalay baigairaton k sardar lakh di lanat tere munh tay itnay jahil ho k ghin ati hai tum jahiloon se badbo ati hai'
Flagged keywords: ['haram', 'hot', 'kanjar', 'lanat', 'dala', 'dalay', 'yahodi']

Actual Label: 0

Predicted Label: 1


# Testing keywords with scores.

In [36]:
import re
from typing import List, Dict
from datasets import load_dataset
import json

def load_keywords(filepath: str) -> Dict[str, float]:
    with open(filepath, "r", encoding="utf-8") as f:
        return json.load(f)

def detect_keywords(text: str, hatewords: set, use_regex: bool = False) -> List[str]:
    text = text.lower()
    flagged_keywords = []

    for word in hatewords:
        if word in text:
            flagged_keywords.append(word)

    return flagged_keywords

def is_cyberbullying(text: str, hatewords: set, threshold: int = 1, use_regex: bool = False) -> bool:
    flagged_keywords = detect_keywords(text, hatewords, use_regex)
    return len(flagged_keywords) >= threshold, flagged_keywords

def calculate_score(text: str, keywords_with_weights: Dict[str, float]) -> float:
    text = text.lower()
    total_score = 0.0
    for keyword, weight in keywords_with_weights.items():
        if keyword.lower() in text:
            total_score += weight
    return total_score

def update_fairness_score(user_scores: Dict[str, float], user: str, current_score: float) -> None:
    if user not in user_scores:
        user_scores[user] = 0
    user_scores[user] += current_score

def issue_warning(user_scores: Dict[str, float], warnings: Dict, user: str, threshold: float = 0.5) -> str:
    if user not in warnings:
        warnings[user] = 0
    if warnings[user] >= 2:
        return f"Warning: You have been temporarily blocked due to inappropriate behaviour and cant use any services until futher action."
    if user_scores[user] < threshold:
        return f"Warning: User {user} has a low fairness score ({user_scores[user]:.2f}). No actions taken."
    elif warnings[user] >= 2:
        return f"Warning: User {user} has exceeded the warning limit. And have been temporarily blocked."
    else:
        warnings[user] += 1
        return f"User {user} has been recieved a warning due to inappropriate behaviour (Score: {user_scores[user]:.2f}) (Total warnings: {warnings[user]})."

In [37]:
keywords_file = "/content/keywords.json"
keywords_with_weights = load_keywords(keywords_file)
user_scores = {}
warnings = {}

messages = [
    "Tum kitne ganda ho!",
    "Woh banda kitna kala hai.",
    "Yeh message bilkul safe hai.",
]
users = ["user1", "user2", "user3"]

for user, message in zip(users, messages):
    score = calculate_score(message, keywords_with_weights)
    update_fairness_score(user_scores, user, score)
    warning = issue_warning(user_scores, warnings, user)

    print(f"Message: {message}")
    print(f"User: {user}")
    print(f"Score: {score:.2f}")
    print(f"Fairness Score: {user_scores[user]:.2f}")
    print(f"{warning}\n")


Message: Tum kitne ganda ho!
User: user1
Score: 0.40
Fairness Score: 0.40

Message: Woh banda kitna kala hai.
User: user2
Score: 0.70
Fairness Score: 0.70

Message: Yeh message bilkul safe hai.
User: user3
Score: 0.00
Fairness Score: 0.00



In [38]:
messages = [
        "Bhut besharam insan ho tum.",
    ]
for user, message in zip(users, messages):
    score = calculate_score(message, keywords_with_weights)
    update_fairness_score(user_scores, user, score)
    warning = issue_warning(user_scores, warnings, user)

    print(f"Message: {message}")
    print(f"User: {user}")
    print(f"Score: {score:.2f}")
    print(f"Fairness Score: {user_scores[user]:.2f}")
    print(f"{warning}\n")

Message: Bhut besharam insan ho tum.
User: user1
Score: 0.80
Fairness Score: 1.20



In [40]:
messages = [
        "Behra hogaya hai kiya?",
    ]
for user, message in zip(users, messages):
    score = calculate_score(message, keywords_with_weights)
    update_fairness_score(user_scores, user, score)
    warning = issue_warning(user_scores, warnings, user)

    print(f"Message: {message}")
    print(f"User: {user}")
    print(f"Score: {score:.2f}")
    print(f"Fairness Score: {user_scores[user]:.2f}")
    print(f"{warning}\n")

Message: Behra hogaya hai kiya?
User: user1
Score: 0.30
Fairness Score: 1.80



# Testing on custom message

In [None]:
messages = [
        "",
    ]
for user, message in zip(users, messages):
    score = calculate_score(message, keywords_with_weights)
    update_fairness_score(user_scores, user, score)
    warning = issue_warning(user_scores, warnings, user)

    print(f"Message: {message}")
    print(f"User: {user}")
    print(f"Score: {score:.2f}")
    print(f"Fairness Score: {user_scores[user]:.2f}")
    print(f"{warning}\n")