In [3]:
!pip install transformers shap lime xgboost scikit-learn pandas numpy tqdm torch



In [21]:
import shap
import lime
import numpy as np
import pandas as pd
from lime.lime_text import LimeTextExplainer
from transformers import pipeline


class FinancialRiskAnalyzer:
    def __init__(self, batch_size=10):
        """Initialize sentiment, risk models, and classifier."""
        self.sentiment_model = pipeline("sentiment-analysis", model="ProsusAI/finbert")
        self.roberta_model = pipeline("sentiment-analysis", model="soleimanian/financial-roberta-large-sentiment")

        self.batch_size = batch_size

    def analyze_text_batch(self, texts):
        """Analyze a batch of texts and return sentiment, risk, and SHAP/LIME-based explanations."""
        sentiment_results = self.sentiment_model(texts)
        roberta_results = self.roberta_model(texts)

        results = []
        for i, (sentiment, roberta) in enumerate(zip(sentiment_results, roberta_results)):
            sentiment_score = self._map_sentiment_to_risk(sentiment["label"], sentiment["score"])
            roberta_score = self._map_sentiment_to_risk(roberta["label"], roberta["score"])

            finbert_confidence = sentiment["score"]
            roberta_confidence = roberta["score"]

            # Combine predictions with weighted average
            final_risk_score = self._calculate_final_risk(sentiment_score, roberta_score)
            final_confidence_score = self._calculate_confidence(finbert_confidence, roberta_confidence)

            results.append({
                "Text": texts[i],
                "Sentiment": sentiment["label"],
                "Confidence_Score": round(final_confidence_score, 2),
                "risk_Score": final_risk_score,
            })
        return results

    def _map_sentiment_to_risk(self, sentiment, score):
        """Map sentiment to risk score."""
        sentiment_map = {"positive": 10, "neutral": 50, "negative": 90}
        return sentiment_map.get(sentiment.lower(), 50)

    def _map_risk_to_score(self, risk_category, score):
        """Map risk category to risk score."""
        risk_map = {
            "Financial Risk": 85,
            "Compliance Violation": 92,
            "Operational Risk": 75,
            "Reputational Risk": 65,
            "No Risk/Neutral": 20
        }
        return risk_map.get(risk_category, 50), score

    def _calculate_final_risk(self, finbert_sentiment_score, roberta_score):
        """Calculate final risk score with weighted average."""
        final_score = (0.45 * finbert_sentiment_score) + (0.55 * roberta_score)
        return round(final_score, 2)

    def _calculate_confidence(self, finbert_confidence, roberta_confidence):
        """Calculate confidence score with weighted average."""
        confidence_score = (0.45 * finbert_confidence) + (0.55 * roberta_confidence)
        return round(confidence_score, 2)

    def _get_risk_explanation(self, risk_category):
        """Provide explanation for risk category."""
        explanations = {
            "Financial Risk": "Potential for financial loss due to irregularities.",
            "Compliance Violation": "Non-compliance with regulations detected.",
            "Operational Risk": "Internal failures or process inefficiencies.",
            "Reputational Risk": "Damage to reputation from adverse events.",
            "No Risk/Neutral": "No significant risk detected."
        }
        return explanations.get(risk_category, "Unknown risk category.")


# Example usage
if __name__ == "__main__":
    analyzer = FinancialRiskAnalyzer(batch_size=5)

    sample_texts = [
        "The company is under SEC investigation for compliance violations.",
        "The firm faces allegations of insider trading and regulatory breaches.",
        "Company has shown steady profits and stocks are up by 25% since last quarter"
    ]
    for i in sample_texts:
        print(i)
    results = analyzer.analyze_text_batch(sample_texts)
    df = pd.DataFrame(results)
    print(df.head())


Device set to use cpu
Device set to use cpu


The company is under SEC investigation for compliance violations.
The firm faces allegations of insider trading and regulatory breaches.
Company has shown steady profits and stocks are up by 25% since last quarter
                                                Text Sentiment  \
0  The company is under SEC investigation for com...  negative   
1  The firm faces allegations of insider trading ...  negative   
2  Company has shown steady profits and stocks ar...  positive   

   Confidence_Score  risk_Score  
0              0.94        90.0  
1              0.97        90.0  
2              0.98        10.0  
