In [None]:
import spacy
import re
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import networkx as nx
from transformers import BertTokenizer, BertModel
import torch
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter

In [None]:
nlp = spacy.load("en_core_web_sm")

In [None]:
# Sentiment Analyzer
analyzer = SentimentIntensityAnalyzer()

In [None]:
# Function to extract POS tag features
def extract_pos_features(sentence):
    doc = nlp(sentence)
    pos_counts = {
        "NOUN": 0,
        "VERB": 0,
        "ADJ": 0,
        "ADV": 0,
        "OTHER": 0
    }
    for token in doc:
        if token.pos_ in pos_counts:
            pos_counts[token.pos_] += 1
        else:
            pos_counts["OTHER"] += 1
    return [pos_counts["NOUN"], pos_counts["VERB"], pos_counts["ADJ"], pos_counts["ADV"], pos_counts["OTHER"]]

In [None]:
# Function to extract dependency features
def extract_dependency_features(sentence):
    doc = nlp(sentence)
    dep_counts = {
        "nsubj": 0,
        "dobj": 0,
        "ROOT": 0,
        "amod": 0,
        "OTHER": 0
    }
    for token in doc:
        if token.dep_ in dep_counts:
            dep_counts[token.dep_] += 1
        else:
            dep_counts["OTHER"] += 1
    return [dep_counts["nsubj"], dep_counts["dobj"], dep_counts["ROOT"], dep_counts["amod"], dep_counts["OTHER"]]

In [None]:
# Function to extract sentiment features
def extract_sentiment_features(sentence):
    sentiment = analyzer.polarity_scores(sentence)
    return [sentiment["neg"], sentiment["neu"], sentiment["pos"], sentiment["compound"]]

In [None]:
def extract_graphical_features(sentence):
    doc = nlp(sentence)
    G = nx.DiGraph()
    for token in doc:
        G.add_node(token.text, pos=token.pos_, dep=token.dep_)
        if token.head != token:
            G.add_edge(token.head.text, token.text, label=token.dep_)
    
    # Extract features
    num_nodes = G.number_of_nodes()
    num_edges = G.number_of_edges()
    avg_node_degree = np.mean([deg for _, deg in G.degree()]) if num_nodes > 0 else 0
    density = nx.density(G) if num_nodes > 1 else 0
    root_token = [token for token in doc if token.dep_ == "ROOT"][0].text if num_nodes > 0 else ""
    
    return [num_nodes, num_edges, avg_node_degree, density, len(root_token)]

In [None]:
# Function to extract word embeddings using SpaCy
def extract_word_embeddings(sentence):
    doc = nlp(sentence)
    return np.mean([token.vector for token in doc], axis=0).tolist()

In [None]:
# Function to extract contextual embeddings using BERT
def extract_contextual_embeddings(sentence):
    inputs = bert_tokenizer(sentence, return_tensors="pt")
    with torch.no_grad():
        outputs = bert_model(**inputs)
    # Use the mean of the last hidden state to represent the sentence
    embeddings = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
    return embeddings.tolist()

In [None]:
# # Function to generate feature vector for each sentence
# def generate_feature_vector(sentence):
#     pos_features = extract_pos_features(sentence)
#     dep_features = extract_dependency_features(sentence)
#     sentiment_features = extract_sentiment_features(sentence)
#     graphical_features = extract_graphical_features(sentence)
#     word_embeddings = extract_word_embeddings(sentence)
#     contextual_embeddings = extract_contextual_embeddings(sentence)
#     return pos_features + dep_features + sentiment_features + graphical_features + word_embeddings + contextual_embeddings

In [None]:
# Function to generate feature vector for each sentence
def generate_feature_vector(sentence):
    pos_features = extract_pos_features(sentence)
    dep_features = extract_dependency_features(sentence)
    sentiment_features = extract_sentiment_features(sentence)
    graphical_features = extract_graphical_features(sentence)
    word_embeddings = extract_word_embeddings(sentence)
    # contextual_embeddings = extract_contextual_embeddings(sentence)
    return pos_features + dep_features + sentiment_features + graphical_features + word_embeddings # + contextual_embeddings

In [None]:
# Sample dataset for training the classifier
sentences = [
    "The black patient was prescribed medication.",
    "The white doctor treated the patient.",
    "The nurse assisted the patient during recovery.",
    "The hispanic family was very cooperative.",
    "The caucasian man was given special treatment.",
    "The patient was treated with care.",
    "The doctor was very professional.",
    "The nurse provided excellent support.",
    "The family was supportive throughout the treatment.",
    "The man was given the necessary treatment."
]

In [None]:
# Labels for the dataset (1 for biased, 0 for non-biased)
labels = [1, 1, 0, 1, 1, 0, 0, 0, 0, 0]

In [None]:
# Generate feature vectors for all sentences
feature_vectors = np.array([generate_feature_vector(sentence) for sentence in sentences])

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(feature_vectors, labels, test_size=0.2, random_state=42)

In [None]:
# Create and train the SVM classifier
svm_classifier = make_pipeline(StandardScaler(), SVC(kernel='linear'))
svm_classifier.fit(X_train, y_train)

In [None]:
# Evaluate the classifier
y_pred = svm_classifier.predict(X_test)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

In [None]:
# Function to classify if a given sentence is biased and detect biased phrases
def classify_bias(sentence):
    X_new = np.array([generate_feature_vector(sentence)])  # Generate feature vector for the new sentence
    is_biased = svm_classifier.predict(X_new)[0]  # Predict if the sentence is biased
    
    biased_phrases = []
    if is_biased:
        # Use the linguistic features to detect biased phrases
        biased_phrases = []  # No longer identifying specific biased phrases based on predefined racial terms
    
    return is_biased, biased_phrases

In [None]:
# Test the bias classification function
test_sentences = [
    "The black patient was prescribed medication.",
    "The white doctor treated the patient.",
    "The nurse assisted the patient during recovery.",
    "The hispanic family was very cooperative.",
    "The caucasian man was given special treatment."
]

In [None]:
# Classify bias in each sentence
for sentence in test_sentences:
    is_biased, biased_phrases = classify_bias(sentence)
    if is_biased:
        print(f"Biased Sentence: '{sentence}'")
        print(f"Biased Phrases Detected: {[(phrase[0], phrase[1], phrase[2]) for phrase in biased_phrases]}")
    else:
        print(f"Non-biased Sentence: '{sentence}'")

In [None]:
# Function to analyze training data behavior and plot dependency patterns
def plot_dependency_patterns(sentences, labels):
    dependency_patterns = []
    for sentence, label in zip(sentences, labels):
        if label == 1:  # Only consider biased sentences
            doc = nlp(sentence)
            for token in doc:
                dependency_patterns.append(token.dep_)
    
    # Count dependency patterns
    dep_counts = Counter(dependency_patterns)
    
    # Plot the dependency patterns
    plt.figure(figsize=(10, 6))
    sns.barplot(x=list(dep_counts.keys()), y=list(dep_counts.values()))
    plt.title("Most Observed Dependency Patterns in Biased Sentences")
    plt.xlabel("Dependency Type")
    plt.ylabel("Frequency")
    plt.xticks(rotation=45)
    plt.show()

In [None]:
# Function to plot the most contributing linguistic features
def plot_linguistic_features_importance(svm_classifier, feature_names):
    if hasattr(svm_classifier, 'coef_'):
        coefs = svm_classifier.named_steps['svc'].coef_.flatten()
        feature_importance = sorted(zip(coefs, feature_names), key=lambda x: abs(x[0]), reverse=True)
        top_features = feature_importance[:10]
        
        # Plot the top contributing features
        plt.figure(figsize=(10, 6))
        sns.barplot(x=[x[1] for x in top_features], y=[x[0] for x in top_features])
        plt.title("Top Contributing Linguistic Features")
        plt.xlabel("Feature")
        plt.ylabel("Coefficient Value")
        plt.xticks(rotation=45)
        plt.show()

In [None]:
# Define feature names for plotting importance
feature_names = [
    "NOUN_count", "VERB_count", "ADJ_count", "ADV_count", "OTHER_POS_count",
    "nsubj_count", "dobj_count", "ROOT_count", "amod_count", "OTHER_DEP_count",
    "negative_sentiment", "neutral_sentiment", "positive_sentiment", "compound_sentiment",
    "num_nodes", "num_edges", "avg_node_degree", "density", "root_token_length"
] + [f"word_embedding_{i}" for i in range(96)] + [f"contextual_embedding_{i}" for i in range(768)]


In [None]:
# Plot dependency patterns and linguistic feature importance
plot_dependency_patterns(sentences, labels)

In [None]:
plot_linguistic_features_importance(svm_classifier, feature_names)

In [None]:
# WEAT Implementation to analyze bias in word embeddings
def weat_score(target_words_1, target_words_2, attribute_words_1, attribute_words_2):
    def cosine_similarity(vec1, vec2):
        return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))

    def association(word, attribute_words):
        return np.mean([cosine_similarity(word, attr) for attr in attribute_words])

    # Convert words to embeddings using SpaCy
    target_vecs_1 = [nlp(word).vector for word in target_words_1]
    target_vecs_2 = [nlp(word).vector for word in target_words_2]
    attribute_vecs_1 = [nlp(word).vector for word in attribute_words_1]
    attribute_vecs_2 = [nlp(word).vector for word in attribute_words_2]

    # Calculate WEAT score
    mean_diff_1 = np.mean([association(target, attribute_vecs_1) - association(target, attribute_vecs_2) for target in target_vecs_1])
    mean_diff_2 = np.mean([association(target, attribute_vecs_1) - association(target, attribute_vecs_2) for target in target_vecs_2])
    return mean_diff_1 - mean_diff_2

In [None]:
# Example words for WEAT
# Target words representing different racial groups
target_words_1 = ["black", "african", "hispanic"]
target_words_2 = ["white", "caucasian", "european"]
# Attribute words representing healthcare-related associations
attribute_words_1 = ["caring", "supportive", "professional", "competent"]
attribute_words_2 = ["neglectful", "uncooperative", "incompetent", "rude"]

# Calculate and print WEAT score
weat_result = weat_score(target_words_1, target_words_2, attribute_words_1, attribute_words_2)
print(f"WEAT Score: {weat_result}")

In [None]:
import spacy
import re
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import networkx as nx
from transformers import BertTokenizer, BertModel
import torch

# Load the SpaCy model (English model in this case)
try:
    nlp = spacy.load("en_core_web_sm")  # Attempt to load the pre-trained SpaCy model
except OSError:
    import spacy.cli
    spacy.cli.download("en_core_web_sm")  # Download the model if not found
    nlp = spacy.load("en_core_web_sm")  # Load the model after downloading

# Load BERT tokenizer and model for contextual embeddings
bert_tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
bert_model = BertModel.from_pretrained("bert-base-uncased")

# List of racial terms and phrases to check for potential bias
# racial_terms = [
#     "black", "white", "hispanic", "asian", "indian", "native", "african",
#     "caucasian", "latino", "arab", "middle eastern"
# ]

# Sentiment Analyzer
analyzer = SentimentIntensityAnalyzer()

# Function to extract POS tag features
def extract_pos_features(sentence):
    doc = nlp(sentence)
    pos_counts = {
        "NOUN": 0,
        "VERB": 0,
        "ADJ": 0,
        "ADV": 0,
        "OTHER": 0
    }
    for token in doc:
        if token.pos_ in pos_counts:
            pos_counts[token.pos_] += 1
        else:
            pos_counts["OTHER"] += 1
    return [pos_counts["NOUN"], pos_counts["VERB"], pos_counts["ADJ"], pos_counts["ADV"], pos_counts["OTHER"]]

# Function to extract dependency features
def extract_dependency_features(sentence):
    doc = nlp(sentence)
    dep_counts = {
        "nsubj": 0,
        "dobj": 0,
        "ROOT": 0,
        "amod": 0,
        "OTHER": 0
    }
    for token in doc:
        if token.dep_ in dep_counts:
            dep_counts[token.dep_] += 1
        else:
            dep_counts["OTHER"] += 1
    return [dep_counts["nsubj"], dep_counts["dobj"], dep_counts["ROOT"], dep_counts["amod"], dep_counts["OTHER"]]

# Function to extract sentiment features
def extract_sentiment_features(sentence):
    sentiment = analyzer.polarity_scores(sentence)
    return [sentiment["neg"], sentiment["neu"], sentiment["pos"], sentiment["compound"]]

# Function to extract graphical features from the dependency graph
def extract_graphical_features(sentence):
    doc = nlp(sentence)
    G = nx.DiGraph()
    for token in doc:
        G.add_node(token.text, pos=token.pos_, dep=token.dep_)
        if token.head != token:
            G.add_edge(token.head.text, token.text, label=token.dep_)
    
    # Extract features
    num_nodes = G.number_of_nodes()
    num_edges = G.number_of_edges()
    avg_node_degree = np.mean([deg for _, deg in G.degree()]) if num_nodes > 0 else 0
    density = nx.density(G) if num_nodes > 1 else 0
    root_token = [token for token in doc if token.dep_ == "ROOT"][0].text if num_nodes > 0 else ""
    
    return [num_nodes, num_edges, avg_node_degree, density, len(root_token)]

# Function to extract word embeddings using SpaCy
def extract_word_embeddings(sentence):
    doc = nlp(sentence)
    return np.mean([token.vector for token in doc], axis=0).tolist()

# Function to extract contextual embeddings using BERT
def extract_contextual_embeddings(sentence):
    inputs = bert_tokenizer(sentence, return_tensors="pt")
    with torch.no_grad():
        outputs = bert_model(**inputs)
    # Use the mean of the last hidden state to represent the sentence
    embeddings = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
    return embeddings.tolist()

# Function to generate feature vector for each sentence
def generate_feature_vector(sentence):
    pos_features = extract_pos_features(sentence)
    dep_features = extract_dependency_features(sentence)
    sentiment_features = extract_sentiment_features(sentence)
    graphical_features = extract_graphical_features(sentence)
    word_embeddings = extract_word_embeddings(sentence)
    contextual_embeddings = extract_contextual_embeddings(sentence)
    return pos_features + dep_features + sentiment_features + graphical_features + word_embeddings + contextual_embeddings

# Sample dataset for training the classifier
sentences = [
    "The black patient was prescribed medication.",
    "The white doctor treated the patient.",
    "The nurse assisted the patient during recovery.",
    "The hispanic family was very cooperative.",
    "The caucasian man was given special treatment.",
    "The patient was treated with care.",
    "The doctor was very professional.",
    "The nurse provided excellent support.",
    "The family was supportive throughout the treatment.",
    "The man was given the necessary treatment."
]

# Labels for the dataset (1 for biased, 0 for non-biased)
labels = [1, 1, 0, 1, 1, 0, 0, 0, 0, 0]

# Generate feature vectors for all sentences
feature_vectors = np.array([generate_feature_vector(sentence) for sentence in sentences])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(feature_vectors, labels, test_size=0.2, random_state=42)

# Create and train the SVM classifier
svm_classifier = make_pipeline(StandardScaler(), SVC(kernel='linear'))
svm_classifier.fit(X_train, y_train)

# Function to classify if a given sentence is biased and detect biased phrases
def classify_bias(sentence):
    X_new = np.array([generate_feature_vector(sentence)])  # Generate feature vector for the new sentence
    is_biased = svm_classifier.predict(X_new)[0]  # Predict if the sentence is biased
    
    biased_phrases = []
    if is_biased:
        # Use the linguistic features to detect biased phrases
        biased_phrases = identify_biased_phrases(sentence)
    
    return is_biased, biased_phrases

# Function to identify biased phrases in a given sentence
def identify_biased_phrases(sentence):
    doc = nlp(sentence)
    biased_phrases = []
    for token in doc:
        if token.text.lower() in racial_terms:
            biased_phrases.append((token.text, token.pos_, token.dep_))
    for chunk in doc.noun_chunks:
        if any(term in chunk.text.lower() for term in racial_terms):
            biased_phrases.append((chunk.text, chunk.root.pos_, chunk.root.dep_))
    biased_phrases = list({phrase[0]: phrase for phrase in biased_phrases}.values())
    return biased_phrases

# Test the bias classification function
test_sentences = [
    "The black patient was prescribed medication.",
    "The white doctor treated the patient.",
    "The nurse assisted the patient during recovery.",
    "The hispanic family was very cooperative.",
    "The caucasian man was given special treatment."
]

# Classify bias in each sentence
for sentence in test_sentences:
    is_biased, biased_phrases = classify_bias(sentence)
    if is_biased:
        print(f"Biased Sentence: '{sentence}'")
        print(f"Biased Phrases Detected: {[(phrase[0], phrase[1], phrase[2]) for phrase in biased_phrases]}")
    else:
        print(f"Non-biased Sentence: '{sentence}'")

# Evaluate the classifier
y_pred = svm_classifier.predict(X_test)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
