## Imports & Project Setup
Sets correct project root
‚úî Enables imports from src/
‚úî Confirms Phase 5 output exists
‚úî Prevents silent downstream errors

In [2]:
# ========== BASIC IMPORTS ==========
import sys
from pathlib import Path
import yaml
import pandas as pd
import numpy as np

# ========== PROJECT ROOT ==========
PROJECT_ROOT = Path("..").resolve()
sys.path.append(str(PROJECT_ROOT))

print("Project root:", PROJECT_ROOT)
print("Combined signals file exists:",
      (PROJECT_ROOT / "results/combined_signals.csv").exists())


Project root: C:\Project_Final_Year
Combined signals file exists: True


## Load Config & Decision Thresholds
‚úî Loads decision logic from config
‚úî Keeps decisions configurable & explainable
‚úî Allows easy tuning for different datasets

Risk ‚â• REJECT_THRESHOLD ‚Üí ‚ùå Reject
Risk between REVIEW_THRESHOLD and REJECT_THRESHOLD ‚Üí üîç Review
Risk < REVIEW_THRESHOLD ‚Üí ‚úÖ Keep

In [4]:
# ========== LOAD CONFIG ==========
with open(PROJECT_ROOT / "config/default.yaml", "r") as f:
    config = yaml.safe_load(f)

# ========== DECISION THRESHOLDS ==========
REJECT_THRESHOLD = config["decision"]["reject_threshold"]

# You can optionally define a review threshold
REVIEW_THRESHOLD = REJECT_THRESHOLD / 2

print("Reject threshold:", REJECT_THRESHOLD)
print("Review threshold:", REVIEW_THRESHOLD)


Reject threshold: 0.7
Review threshold: 0.35


## Load Combined Signals
‚úî Loads risk scores for all samples
‚úî Prepares data for decision making
‚úî Keeps pipeline modular

In [5]:
# ========== LOAD COMBINED SIGNALS ==========
combined_path = PROJECT_ROOT / "results/combined_signals.csv"
combined_df = pd.read_csv(combined_path)

print("Total samples:", len(combined_df))
combined_df.head()


Total samples: 6513


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,105,106,label,predicted_label,given_label_confidence,predicted_label_confidence,confidence_flag,confidence_risk,anomaly_risk,combined_risk_score
0,-0.772657,0.138566,1.132737,-0.146613,-0.217898,0.371553,0.0,0.0,0.0,0.0,...,0.0,0.0,<=50K,<=50K,0.91,0.91,False,0,0.28556,0.114224
1,-1.433042,-0.678059,-0.419226,-0.146613,-0.217898,-1.981349,0.0,0.0,0.0,0.0,...,0.0,0.0,<=50K,<=50K,1.0,1.0,False,0,0.188163,0.075265
2,0.621488,0.205797,1.132737,-0.146613,-0.217898,0.371553,0.0,0.0,0.0,0.0,...,0.0,0.0,>50K,>50K,0.91,0.91,False,0,0.067696,0.027079
3,1.135121,-0.501324,2.296709,-0.146613,-0.217898,-0.03412,0.0,0.0,0.0,0.0,...,0.0,0.0,>50K,>50K,0.58,0.58,False,0,0.441452,0.176581
4,0.621488,1.372661,-0.419226,-0.146613,-0.217898,-0.03412,0.0,0.0,0.0,0.0,...,0.0,0.0,<=50K,<=50K,0.97,0.97,False,0,0.251155,0.100462


## Generate Suggestions (CORE DECISION LOGIC)
#### This cell converts the continuous risk score into human-readable actions.
Converts numbers ‚Üí decisions
‚úî Fully rule-based & explainable
‚úî No ML black box

Interpretation:

KEEP ‚Üí trusted sample

REVIEW ‚Üí human-in-the-loop

REJECT ‚Üí likely mislabeled / problematic

In [7]:
# ========== SUGGESTION LOGIC ==========
def generate_suggestion(risk):
    if risk >= REJECT_THRESHOLD:
        return "REJECT"
    elif risk >= REVIEW_THRESHOLD:
        return "REVIEW"
    else:
        return "KEEP"

combined_df["suggestion"] = combined_df["combined_risk_score"].apply(generate_suggestion)

print("Suggestion counts:")
print(combined_df["suggestion"].value_counts())


Suggestion counts:
suggestion
KEEP      5536
REVIEW     627
REJECT     350
Name: count, dtype: int64


## Add Decision Explanation
‚úî Makes SLDCE transparent
‚úî Enables human trust
‚úî Excellent for thesis & demo
‚úî Shows why a sample is flagged

In [8]:
# ========== DECISION EXPLANATION ==========
def explain_decision(row):
    reasons = []

    if row["confidence_flag"]:
        reasons.append("Low confidence in given label")

    if row["anomaly_risk"] >= 0.5:
        reasons.append("Feature anomaly detected")

    if not reasons:
        reasons.append("No strong risk signals")

    return "; ".join(reasons)

combined_df["decision_reason"] = combined_df.apply(explain_decision, axis=1)

combined_df[["combined_risk_score", "suggestion", "decision_reason"]].head()


Unnamed: 0,combined_risk_score,suggestion,decision_reason
0,0.114224,KEEP,No strong risk signals
1,0.075265,KEEP,No strong risk signals
2,0.027079,KEEP,No strong risk signals
3,0.176581,KEEP,No strong risk signals
4,0.100462,KEEP,No strong risk signals


## Save Suggestions Output
results/
‚îî‚îÄ‚îÄ suggestions.csv
Combined risk score

Final decision (KEEP / REVIEW / REJECT)

Human-readable explanation

In [9]:
# ========== SAVE SUGGESTIONS ==========
results_path = PROJECT_ROOT / "results"
results_path.mkdir(parents=True, exist_ok=True)

output_path = results_path / "suggestions.csv"
combined_df.to_csv(output_path, index=False)

print("Suggestions saved successfully")
print("Saved at:", output_path)


Suggestions saved successfully
Saved at: C:\Project_Final_Year\results\suggestions.csv
