In [2]:
!pip install transformers shap lime xgboost scikit-learn pandas numpy tqdm torch

Collecting lime
  Downloading lime-0.2.0.1.tar.gz (275 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m275.7/275.7 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)

In [22]:
import shap
import lime
import numpy as np
import pandas as pd
from lime.lime_text import LimeTextExplainer
from transformers import pipeline


class FinancialRiskAnalyzer:
    def __init__(self, batch_size=10):
        """Initialize sentiment, risk models, and classifier."""
        self.sentiment_model = pipeline("sentiment-analysis", model="ProsusAI/finbert")
        self.roberta_model = pipeline("sentiment-analysis", model="soleimanian/financial-roberta-large-sentiment")

        self.batch_size = batch_size

    def analyze_text_batch(self, texts):
        """Analyze a batch of texts and return sentiment, risk, and SHAP/LIME-based explanations."""
        sentiment_results = self.sentiment_model(texts)
        roberta_results = self.roberta_model(texts)

        results = []
        for i, (sentiment, roberta) in enumerate(zip(sentiment_results, roberta_results)):
            sentiment_score = self._map_sentiment_to_risk(sentiment["label"], sentiment["score"])
            roberta_score = self._map_sentiment_to_risk(roberta["label"], roberta["score"])

            # Combine predictions with weighted average
            final_risk_score = self._calculate_final_risk(sentiment_score, roberta_score)
            final_confidence_score = self._calculate_confidence(sentiment_score, roberta_score)

            results.append({
                "Sentiment": sentiment["label"],
                "Sentiment_Confidence": round(sentiment["score"], 2),
                "Confidence_Score": round(final_confidence_score, 2),
                "risk_Score": final_risk_score,
            })
        return results

    def _map_sentiment_to_risk(self, sentiment, score):
        """Map sentiment to risk score."""
        sentiment_map = {"positive": 10, "neutral": 50, "negative": 90}
        return sentiment_map.get(sentiment.lower(), 50)

    def _map_risk_to_score(self, risk_category, score):
        """Map risk category to risk score."""
        risk_map = {
            "Financial Risk": 85,
            "Compliance Violation": 92,
            "Operational Risk": 75,
            "Reputational Risk": 65,
            "No Risk/Neutral": 20
        }
        return risk_map.get(risk_category, 50), score

    def _calculate_final_risk(self, finbert_sentiment_score, roberta_score):
        """Calculate final risk score with weighted average."""
        final_score = (0.45 * finbert_sentiment_score) + (0.55 * roberta_score)
        return round(final_score, 2)

    def _calculate_confidence(self, finbert_confidence, roberta_confidence):
        """Calculate confidence score with weighted average."""
        confidence_score = (0.45 * finbert_confidence) + (0.55 * roberta_confidence)
        return round(confidence_score, 2)

    def _get_risk_explanation(self, risk_category):
        """Provide explanation for risk category."""
        explanations = {
            "Financial Risk": "Potential for financial loss due to irregularities.",
            "Compliance Violation": "Non-compliance with regulations detected.",
            "Operational Risk": "Internal failures or process inefficiencies.",
            "Reputational Risk": "Damage to reputation from adverse events.",
            "No Risk/Neutral": "No significant risk detected."
        }
        return explanations.get(risk_category, "Unknown risk category.")


# Example usage
if __name__ == "__main__":
    analyzer = FinancialRiskAnalyzer(batch_size=5)

    sample_texts = [
        "The company reported a massive financial loss due to internal mismanagement.",
        "The audit found significant compliance violations.",
        "A class-action lawsuit was filed against the corporation for insider trading.",
        "Company XYZ is expanding to Other contries and setting up new manufacturing units",
        "Company had a good quarter and steady growth"
    ]

    results = analyzer.analyze_text_batch(sample_texts)
    df = pd.DataFrame(results)
    print(df.head())


Device set to use cpu
Device set to use cpu


  Sentiment  Sentiment_Confidence  Confidence_Score  risk_Score
0  negative                  0.97              90.0        90.0
1  negative                  0.95              90.0        90.0
2  negative                  0.77              90.0        90.0
3  positive                  0.71              10.0        10.0
4  positive                  0.95              10.0        10.0
