In [None]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tensorflow.keras.models import Model, save_model
from tensorflow.keras.layers import Input, Embedding, Conv1D, GlobalMaxPooling1D, Dense, concatenate
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from joblib import dump

# Load and preprocess data
df = pd.read_csv('F:/2025-PROJECTS/fake_news/dataset/fake_news_dataset.csv')

# Combine title and content for better context
df['text'] = df['Article Title'] + ' ' + df['Content']
X = df['text']
y = df['Label']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Tokenization
max_words = 10000
max_len = 200
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)
X_train_pad = pad_sequences(X_train_seq, maxlen=max_len)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len)

# Component 1: News Characterizer (CNN)
def build_news_characterizer():
    input_layer = Input(shape=(max_len,))
    embedding = Embedding(input_dim=max_words, output_dim=128)(input_layer)
    conv1 = Conv1D(filters=64, kernel_size=3, activation='relu')(embedding)
    pool1 = GlobalMaxPooling1D()(conv1)
    conv2 = Conv1D(filters=64, kernel_size=5, activation='relu')(embedding)
    pool2 = GlobalMaxPooling1D()(conv2)
    merged = concatenate([pool1, pool2])
    output = Dense(64, activation='relu')(merged)
    model = Model(inputs=input_layer, outputs=output)
    return model

# Component 2: Ensemble Coordinator
def build_ensemble_coordinator(input_shape):
    input_layer = Input(shape=(input_shape,))
    dense1 = Dense(64, activation='relu')(input_layer)
    dense2 = Dense(32, activation='relu')(dense1)
    output = Dense(1, activation='sigmoid')(dense2)
    model = Model(inputs=input_layer, outputs=output)
    return model

# Component 3: Truth Predictor (using ensemble of traditional models)
class TruthPredictor:
    def __init__(self):
        self.models = [
            RandomForestClassifier(n_estimators=100),
            SVC(probability=True)
        ]
    
    def fit(self, X, y):
        for model in self.models:
            model.fit(X, y)
    
    def predict(self, X):
        predictions = []
        for model in self.models:
            pred = model.predict_proba(X)[:, 1]
            predictions.append(pred)
        return np.mean(predictions, axis=0)

# Build and train CKA model
news_characterizer = build_news_characterizer()
ensemble_coordinator = build_ensemble_coordinator(128)  # 64*2 from CNN outputs

# Extract features from News Characterizer
X_train_features = news_characterizer.predict(X_train_pad)
X_test_features = news_characterizer.predict(X_test_pad)

# Train Ensemble Coordinator
ensemble_coordinator.compile(optimizer='adam', loss='binary_crossentropy')
ensemble_coordinator.fit(X_train_features, y_train, epochs=10, batch_size=32, validation_split=0.1)

# Train Truth Predictor
truth_predictor = TruthPredictor()
truth_predictor.fit(X_train_features, y_train)

# Make predictions
coordinator_preds = (ensemble_coordinator.predict(X_test_features) > 0.5).astype(int).flatten()
truth_preds = (truth_predictor.predict(X_test_features) > 0.5).astype(int)

# Combine predictions (simple average for demonstration)
final_preds = np.round((coordinator_preds + truth_preds) / 2).astype(int)

# Calculate metrics
accuracy = accuracy_score(y_test, final_preds)
precision = precision_score(y_test, final_preds)
recall = recall_score(y_test, final_preds)
f1 = f1_score(y_test, final_preds)

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

# Save all model components
save_model(news_characterizer, 'news_characterizer.h5')
save_model(ensemble_coordinator, 'ensemble_coordinator.h5')

with open('truth_predictor.pkl', 'wb') as f:
    pickle.dump(truth_predictor, f)

with open('tokenizer.pkl', 'wb') as f:
    pickle.dump(tokenizer, f)

model_metadata = {
    'max_words': max_words,
    'max_len': max_len
}
with open('model_metadata.pkl', 'wb') as f:
    pickle.dump(model_metadata, f)

print("All model components saved successfully")

In [1]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tensorflow.keras.models import Model, save_model
from tensorflow.keras.layers import Input, Embedding, Conv1D, GlobalMaxPooling1D, Dense, concatenate
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from joblib import dump

# Load and preprocess data
df = pd.read_csv('F:/2025-PROJECTS/fake_news/dataset/fake_news_dataset.csv')

# Combine title and content for better context
df['text'] = df['Article Title'] + ' ' + df['Content']
X = df['text']
y = df['Label']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Tokenization
max_words = 10000
max_len = 200
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)
X_train_pad = pad_sequences(X_train_seq, maxlen=max_len)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len)

# Component 1: News Characterizer (CNN)
def build_news_characterizer():
    input_layer = Input(shape=(max_len,))
    embedding = Embedding(input_dim=max_words, output_dim=128)(input_layer)
    conv1 = Conv1D(filters=64, kernel_size=3, activation='relu')(embedding)
    pool1 = GlobalMaxPooling1D()(conv1)
    conv2 = Conv1D(filters=64, kernel_size=5, activation='relu')(embedding)
    pool2 = GlobalMaxPooling1D()(conv2)
    conv3 = Conv1D(filters=64, kernel_size=7, activation='relu')(embedding)
    pool3 = GlobalMaxPooling1D()(conv3)
    merged = concatenate([pool1, pool2, pool3])
    output = Dense(128, activation='relu')(merged)  # Increased to 128 to match expected input
    model = Model(inputs=input_layer, outputs=output)
    return model

# Component 2: Ensemble Coordinator
def build_ensemble_coordinator(input_shape):
    input_layer = Input(shape=(input_shape,))
    dense1 = Dense(128, activation='relu')(input_layer)  # Match the input shape
    dense2 = Dense(64, activation='relu')(dense1)
    output = Dense(1, activation='sigmoid')(dense2)
    model = Model(inputs=input_layer, outputs=output)
    return model

# Component 3: Truth Predictor (using ensemble of traditional models)
class TruthPredictor:
    def __init__(self):
        self.models = [
            RandomForestClassifier(n_estimators=100),
            SVC(probability=True)
        ]
    
    def fit(self, X, y):
        for model in self.models:
            model.fit(X, y)
    
    def predict(self, X):
        predictions = []
        for model in self.models:
            pred = model.predict_proba(X)[:, 1]
            predictions.append(pred)
        return np.mean(predictions, axis=0)

# Build and train CKA model
news_characterizer = build_news_characterizer()
ensemble_coordinator = build_ensemble_coordinator(128)  # Matches News Characterizer output

# Extract features from News Characterizer
X_train_features = news_characterizer.predict(X_train_pad)
X_test_features = news_characterizer.predict(X_test_pad)

# Train Ensemble Coordinator
ensemble_coordinator.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = ensemble_coordinator.fit(
    X_train_features, 
    y_train, 
    epochs=10, 
    batch_size=32, 
    validation_split=0.1,
    verbose=1
)

# Train Truth Predictor
truth_predictor = TruthPredictor()
truth_predictor.fit(X_train_features, y_train)

# Make predictions
coordinator_preds = (ensemble_coordinator.predict(X_test_features) > 0.5).astype(int).flatten()
truth_preds = (truth_predictor.predict(X_test_features) > 0.5).astype(int)

# Combine predictions (simple average for demonstration)
final_preds = np.round((coordinator_preds + truth_preds) / 2).astype(int)

# Calculate metrics
accuracy = accuracy_score(y_test, final_preds)
precision = precision_score(y_test, final_preds)
recall = recall_score(y_test, final_preds)
f1 = f1_score(y_test, final_preds)

print("\nFinal Model Performance:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

# Save all model components
save_model(news_characterizer, 'news_characterizer.h5')
save_model(ensemble_coordinator, 'ensemble_coordinator.h5')

with open('truth_predictor.pkl', 'wb') as f:
    pickle.dump(truth_predictor, f)

with open('tokenizer.pkl', 'wb') as f:
    pickle.dump(tokenizer, f)

model_metadata = {
    'max_words': max_words,
    'max_len': max_len
}
with open('model_metadata.pkl', 'wb') as f:
    pickle.dump(model_metadata, f)

print("\nAll model components saved successfully:")
print("- news_characterizers.h5")
print("- ensemble_coordinators.h5")
print("- truth_predictors.pkl")
print("- tokenizers.pkl")
print("- model_metadatas.pkl")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Final Model Performance:
Accuracy: 1.0000
Precision: 1.0000
Recall: 1.0000
F1 Score: 1.0000


  save_model(news_characterizer, 'news_characterizer.h5')
  save_model(ensemble_coordinator, 'ensemble_coordinator.h5')



All model components saved successfully:
- news_characterizers.h5
- ensemble_coordinators.h5
- truth_predictors.pkl
- tokenizers.pkl
- model_metadatas.pkl


In [6]:
import pickle
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences

class CKAPredictor:
    def __init__(self, model_dir='./'):
        """Initialize and load all model components"""
        # Load all saved components
        self.news_characterizer = load_model(f'{model_dir}news_characterizer.h5')
        self.ensemble_coordinator = load_model(f'{model_dir}ensemble_coordinator.h5')
        
        with open(f'{model_dir}truth_predictor.pkl', 'rb') as f:
            self.truth_predictor = pickle.load(f)
            
        with open(f'{model_dir}tokenizer.pkl', 'rb') as f:
            self.tokenizer = pickle.load(f)
            
        with open(f'{model_dir}model_metadata.pkl', 'rb') as f:
            self.metadata = pickle.load(f)
    
    def preprocess_text(self, text):
        """Convert raw text to padded sequence"""
        seq = self.tokenizer.texts_to_sequences([text])
        return pad_sequences(seq, maxlen=self.metadata['max_len'])
    
    def predict(self, text, return_proba=False):
        """
        Make prediction on new text
        Args:
            text: str - the news text to classify
            return_proba: bool - whether to return probability scores
        Returns:
            dict with prediction and probabilities
        """
        # Preprocess text
        padded_seq = self.preprocess_text(text)
        
        # Get features from News Characterizer
        features = self.news_characterizer.predict(padded_seq, verbose=0)
        
        # Get predictions from both components
        coord_pred = self.ensemble_coordinator.predict(features, verbose=0)[0][0]
        truth_pred = self.truth_predictor.predict(features)[0]
        
        # Combine predictions (average)
        combined_pred = (coord_pred + truth_pred) / 2
        final_pred = int(combined_pred > 0.5)
        
        # Prepare results
        result = {
            'prediction': 'Real' if final_pred == 1 else 'Fake',
            'confidence': float(combined_pred if final_pred == 1 else 1 - combined_pred),
            'coordinator_score': float(coord_pred),
            'truth_predictor_score': float(truth_pred)
        }
        
        return result if not return_proba else {**result, 'combined_probability': float(combined_pred)}

# Example usage
if __name__ == "__main__":
    # Initialize predictor (assuming models are in current directory)
    predictor = CKAPredictor()
    
    # Sample news texts
    test_texts = [
        "Breaking News: Market Crash, Experts discuss the implications of the new product",
        "Political Scandal Unveiled, Clickbait! No credible sources support this claim",
        "Tech Company Launches New Product, Details from a press conference held earlier"
    ]
    
    # Make predictions
    print("Fake News Detection Results:\n" + "-"*50)
    for text in test_texts:
        result = predictor.predict(text)
        print(f"Text: {text[:60]}...")
        print(f"Prediction: {result['prediction']}")
        print(f"Confidence: {result['confidence']:.2%}")
        print(f"Coordinator Score: {result['coordinator_score']:.4f}")
        print(f"Truth Predictor Score: {result['truth_predictor_score']:.4f}")
        print("-"*50)



Fake News Detection Results:
--------------------------------------------------








Text: Breaking News: Market Crash, Experts discuss the implication...
Prediction: Real
Confidence: 99.14%
Coordinator Score: 0.9846
Truth Predictor Score: 0.9981
--------------------------------------------------
Text: Political Scandal Unveiled, Clickbait! No credible sources s...
Prediction: Fake
Confidence: 99.10%
Coordinator Score: 0.0160
Truth Predictor Score: 0.0020
--------------------------------------------------
Text: Tech Company Launches New Product, Details from a press conf...
Prediction: Real
Confidence: 99.52%
Coordinator Score: 0.9904
Truth Predictor Score: 1.0000
--------------------------------------------------


In [7]:
import pickle
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load all model components
news_characterizer = load_model('news_characterizer.h5')
ensemble_coordinator = load_model('ensemble_coordinator.h5')

with open('truth_predictor.pkl', 'rb') as f:
    truth_predictor = pickle.load(f)
    
with open('tokenizer.pkl', 'rb') as f:
    tokenizer = pickle.load(f)
    
with open('model_metadata.pkl', 'rb') as f:
    metadata = pickle.load(f)

def predict_news(text):
    """Predict if a single news article is real or fake"""
    # Preprocess the text
    seq = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(seq, maxlen=metadata['max_len'])
    
    # Get features from News Characterizer
    features = news_characterizer.predict(padded, verbose=0)
    
    # Get predictions from both components
    coord_score = ensemble_coordinator.predict(features, verbose=0)[0][0]
    truth_score = truth_predictor.predict(features)[0]
    
    # Combine predictions (average)
    combined_score = (coord_score + truth_score) / 2
    is_real = combined_score > 0.5
    
    return {
        'text': text[:100] + '...' if len(text) > 100 else text,
        'prediction': 'REAL' if is_real else 'FAKE',
        'confidence': f"{max(combined_score, 1-combined_score)*100:.1f}%",
        'coordinator_score': f"{coord_score:.4f}",
        'truth_predictor_score': f"{truth_score:.4f}",
        'combined_score': f"{combined_score:.4f}"
    }

# Example usage with one news article
sample_news = "Government Announces New Policies, Details from a press conference held earlier"
result = predict_news(sample_news)

# Print the results
print("Fake News Detection Result:")
print("="*50)
print(f"News Snippet: {result['text']}")
print(f"Prediction: {result['prediction']}")
print(f"Confidence: {result['confidence']}")
print("\nDetailed Scores:")
print(f"- Ensemble Coordinator: {result['coordinator_score']}")
print(f"- Truth Predictor: {result['truth_predictor_score']}")
print(f"- Combined Score: {result['combined_score']}")
print("="*50)



Fake News Detection Result:
News Snippet: Government Announces New Policies, Details from a press conference held earlier
Prediction: REAL
Confidence: 99.7%

Detailed Scores:
- Ensemble Coordinator: 0.9931
- Truth Predictor: 1.0000
- Combined Score: 0.9966


In [8]:
import pickle
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load all model components
news_characterizer = load_model('news_characterizer.h5')
ensemble_coordinator = load_model('ensemble_coordinator.h5')

with open('truth_predictor.pkl', 'rb') as f:
    truth_predictor = pickle.load(f)
    
with open('tokenizer.pkl', 'rb') as f:
    tokenizer = pickle.load(f)
    
with open('model_metadata.pkl', 'rb') as f:
    metadata = pickle.load(f)

def predict_single_news(title, content):
    """Predict if a single news article is real or fake"""
    # Combine title and content as done during training
    full_text = f"{title} {content}"
    
    # Preprocess the text
    seq = tokenizer.texts_to_sequences([full_text])
    padded = pad_sequences(seq, maxlen=metadata['max_len'])
    
    # Get features from News Characterizer
    features = news_characterizer.predict(padded, verbose=0)
    
    # Get predictions from both components
    coord_score = ensemble_coordinator.predict(features, verbose=0)[0][0]
    truth_score = truth_predictor.predict(features)[0]
    
    # Combine predictions (average)
    combined_score = (coord_score + truth_score) / 2
    is_real = combined_score > 0.5
    
    return {
        'title': title,
        'content': content,
        'prediction': 'REAL' if is_real else 'FAKE',
        'confidence': f"{max(combined_score, 1-combined_score)*100:.1f}%",
        'coordinator_score': f"{coord_score:.4f}",
        'truth_predictor_score': f"{truth_score:.4f}",
        'combined_score': f"{combined_score:.4f}",
        'actual_label': 'REAL' if int(0) == 1 else 'FAKE'  # Since your input has label=0
    }

# Your specific input
input_title = "Tech Company Launches New Product"
input_content = "Clickbait! No credible sources support this claim."
input_label = 0  # From your dataset

# Make prediction
result = predict_single_news(input_title, input_content)

# Print detailed results
print("Fake News Detection Analysis:")
print("="*70)
print(f"Title: {result['title']}")
print(f"Content: {result['content']}")
print("\nPrediction Results:")
print(f"- Model Prediction: {result['prediction']} (Actual: {result['actual_label']})")
print(f"- Confidence: {result['confidence']}")
print("\nComponent Scores:")
print(f"- Ensemble Coordinator: {result['coordinator_score']}")
print(f"- Truth Predictor: {result['truth_predictor_score']}")
print(f"- Combined Decision Score: {result['combined_score']}")
print("="*70)

# Check if prediction matches actual label
if (result['prediction'] == 'FAKE' and input_label == 0) or (result['prediction'] == 'REAL' and input_label == 1):
    print("\n‚úÖ Prediction MATCHES the actual label!")
else:
    print("\n‚ùå Prediction DOES NOT match the actual label!")



Fake News Detection Analysis:
Title: Tech Company Launches New Product
Content: Clickbait! No credible sources support this claim.

Prediction Results:
- Model Prediction: FAKE (Actual: FAKE)
- Confidence: 98.8%

Component Scores:
- Ensemble Coordinator: 0.0230
- Truth Predictor: 0.0020
- Combined Decision Score: 0.0125

‚úÖ Prediction MATCHES the actual label!


In [9]:
import pickle
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load all model components
news_characterizer = load_model('news_characterizer.h5')
ensemble_coordinator = load_model('ensemble_coordinator.h5')

with open('truth_predictor.pkl', 'rb') as f:
    truth_predictor = pickle.load(f)
    
with open('tokenizer.pkl', 'rb') as f:
    tokenizer = pickle.load(f)
    
with open('model_metadata.pkl', 'rb') as f:
    metadata = pickle.load(f)

def predict_single_news(title, content):
    """Predict if a single news article is real or fake"""
    # Combine title and content as done during training
    full_text = f"{title} {content}"
    
    # Preprocess the text
    seq = tokenizer.texts_to_sequences([full_text])
    padded = pad_sequences(seq, maxlen=metadata['max_len'])
    
    # Get features from News Characterizer
    features = news_characterizer.predict(padded, verbose=0)
    
    # Get predictions from both components
    coord_score = ensemble_coordinator.predict(features, verbose=0)[0][0]
    truth_score = truth_predictor.predict(features)[0]
    
    # Combine predictions (average)
    combined_score = (coord_score + truth_score) / 2
    is_real = combined_score > 0.5
    
    return {
        'title': title,
        'content': content,
        'prediction': 'REAL' if is_real else 'FAKE',
        'confidence': f"{max(combined_score, 1-combined_score)*100:.1f}%",
        'coordinator_score': f"{coord_score:.4f}",
        'truth_predictor_score': f"{truth_score:.4f}",
        'combined_score': f"{combined_score:.4f}",
        'actual_label': 'REAL' if int(0) == 1 else 'FAKE'  # Since your input has label=0
    }

# Your specific input
input_title = "Tech Company Launches New Product"
input_content = "Experts discuss the implications of the new product."
input_label = 0  # From your dataset

# Make prediction
result = predict_single_news(input_title, input_content)

# Print detailed results
print("Fake News Detection Analysis:")
print("="*70)
print(f"Title: {result['title']}")
print(f"Content: {result['content']}")
print("\nPrediction Results:")
print(f"- Model Prediction: {result['prediction']} (Actual: {result['actual_label']})")
print(f"- Confidence: {result['confidence']}")
print("\nComponent Scores:")
print(f"- Ensemble Coordinator: {result['coordinator_score']}")
print(f"- Truth Predictor: {result['truth_predictor_score']}")
print(f"- Combined Decision Score: {result['combined_score']}")
print("="*70)

# Check if prediction matches actual label
if (result['prediction'] == 'FAKE' and input_label == 0) or (result['prediction'] == 'REAL' and input_label == 1):
    print("\n‚úÖ Prediction MATCHES the actual label!")
else:
    print("\n‚ùå Prediction DOES NOT match the actual label!")



Fake News Detection Analysis:
Title: Tech Company Launches New Product
Content: Experts discuss the implications of the new product.

Prediction Results:
- Model Prediction: REAL (Actual: FAKE)
- Confidence: 99.2%

Component Scores:
- Ensemble Coordinator: 0.9861
- Truth Predictor: 0.9983
- Combined Decision Score: 0.9922

‚ùå Prediction DOES NOT match the actual label!


In [10]:
import pickle
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load all model components
news_characterizer = load_model('news_characterizer.h5')
ensemble_coordinator = load_model('ensemble_coordinator.h5')

with open('truth_predictor.pkl', 'rb') as f:
    truth_predictor = pickle.load(f)
    
with open('tokenizer.pkl', 'rb') as f:
    tokenizer = pickle.load(f)
    
with open('model_metadata.pkl', 'rb') as f:
    metadata = pickle.load(f)

def predict_news(title, content):
    """Predict if a news article is real or fake with detailed analysis"""
    # Combine title and content
    full_text = f"{title} {content}"
    
    # Preprocess the text
    seq = tokenizer.texts_to_sequences([full_text])
    padded = pad_sequences(seq, maxlen=metadata['max_len'])
    
    # Get features from News Characterizer
    features = news_characterizer.predict(padded, verbose=0)
    
    # Get predictions from both components
    coord_score = ensemble_coordinator.predict(features, verbose=0)[0][0]
    truth_score = truth_predictor.predict(features)[0]
    
    # Combine predictions
    combined_score = (coord_score + truth_score) / 2
    is_real = combined_score > 0.5
    
    # Calculate confidence percentage
    confidence = max(combined_score, 1-combined_score)*100
    
    return {
        'title': title,
        'content': content,
        'prediction': 'REAL' if is_real else 'FAKE',
        'confidence': f"{confidence:.1f}%",
        'coordinator_score': coord_score,
        'truth_predictor_score': truth_score,
        'combined_score': combined_score,
        'keywords': {
            'clickbait': 'Clickbait!' in content,
            'no_sources': 'no credible sources' in content.lower(),
            'misinformation': 'misinformation' in content.lower(),
            'political': 'political' in content.lower()
        }
    }

# The news article to analyze
news_title = "Political Scandal Unveiled"  # Example title
news_content = "Misinformation spreading for political gains."

# Make prediction
result = predict_news(news_title, news_content)

# Print results with analysis
print("üîç Fake News Detection Report")
print("="*60)
print(f"Title: {result['title']}")
print(f"Content: {result['content']}\n")
print(f"Final Verdict: {result['prediction']} (Confidence: {result['confidence']})")
print(f"Combined Score: {result['combined_score']:.4f}\n")

print("Component Scores:")
print(f"- Ensemble Coordinator: {result['coordinator_score']:.4f}")
print(f"- Truth Predictor: {result['truth_predictor_score']:.4f}\n")

print("Detected Keywords:")
for kw, detected in result['keywords'].items():
    print(f"- {kw.capitalize()}: {'‚úÖ' if detected else '‚ùå'}")
print("="*60)

# Interpretation
if result['prediction'] == 'FAKE':
    print("\nAnalysis: This article was classified as FAKE because:")
    if result['keywords']['misinformation']:
        print("- Contains the term 'misinformation' which is common in fake news")
    if result['keywords']['political']:
        print("- Mentions political context, which often appears in fabricated stories")
    if result['combined_score'] < 0.3:
        print("- Extremely low confidence score indicates strong fake news signals")
else:
    print("\nAnalysis: This article was classified as REAL because:")
    print("- The content lacks common fake news indicators")
    print("- Scores from both components agree on its authenticity")



üîç Fake News Detection Report
Title: Political Scandal Unveiled
Content: Misinformation spreading for political gains.

Final Verdict: FAKE (Confidence: 99.7%)
Combined Score: 0.0025

Component Scores:
- Ensemble Coordinator: 0.0049
- Truth Predictor: 0.0002

Detected Keywords:
- Clickbait: ‚ùå
- No_sources: ‚ùå
- Misinformation: ‚úÖ
- Political: ‚úÖ

Analysis: This article was classified as FAKE because:
- Contains the term 'misinformation' which is common in fake news
- Mentions political context, which often appears in fabricated stories
- Extremely low confidence score indicates strong fake news signals


In [11]:
import pickle
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load all model components
news_characterizer = load_model('news_characterizer.h5')
ensemble_coordinator = load_model('ensemble_coordinator.h5')

with open('truth_predictor.pkl', 'rb') as f:
    truth_predictor = pickle.load(f)
    
with open('tokenizer.pkl', 'rb') as f:
    tokenizer = pickle.load(f)
    
with open('model_metadata.pkl', 'rb') as f:
    metadata = pickle.load(f)

def predict_news(title, content):
    """Predict if a news article is real or fake with detailed analysis"""
    # Combine title and content
    full_text = f"{title} {content}"
    
    # Preprocess the text
    seq = tokenizer.texts_to_sequences([full_text])
    padded = pad_sequences(seq, maxlen=metadata['max_len'])
    
    # Get features from News Characterizer
    features = news_characterizer.predict(padded, verbose=0)
    
    # Get predictions from both components
    coord_score = ensemble_coordinator.predict(features, verbose=0)[0][0]
    truth_score = truth_predictor.predict(features)[0]
    
    # Combine predictions
    combined_score = (coord_score + truth_score) / 2
    is_real = combined_score > 0.5
    
    return {
        'title': title,
        'content': content,
        'prediction': 'REAL' if is_real else 'FAKE',
        'confidence': f"{max(combined_score, 1-combined_score)*100:.1f}%",
        'coordinator_score': f"{coord_score:.4f}",
        'truth_predictor_score': f"{truth_score:.4f}",
        'combined_score': f"{combined_score:.4f}",
        'indicators': {
            'official_statement': 'official statement' in content.lower(),
            'government_source': 'government' in content.lower(),
            'negative_phrases': any(phrase in content.lower() for phrase in ['fake', 'fabricated', 'misinformation'])
        }
    }

# The news article to analyze
news_title = "Government Announces New Policy"
news_content = "The government released an official statement."

# Make prediction
result = predict_news(news_title, news_content)

# Print results
print("üì∞ News Authenticity Report")
print("="*60)
print(f"Title: {result['title']}")
print(f"Content: {result['content']}\n")
print(f"Prediction: {result['prediction']} news")
print(f"Confidence: {result['confidence']}")
print(f"Combined Score: {result['combined_score']}\n")

print("Component Scores:")
print(f"- Ensemble Coordinator: {result['coordinator_score']}")
print(f"- Truth Predictor: {result['truth_predictor_score']}\n")

print("Key Indicators:")
for indicator, present in result['indicators'].items():
    print(f"- {'‚úÖ' if present else '‚ùå'} {indicator.replace('_', ' ')}")
print("="*60)

# Explanation
if result['prediction'] == 'REAL':
    print("\nAnalysis: This article was classified as REAL because:")
    print("- Contains reference to an official government statement")
    print("- No negative phrases suggesting fabrication")
    print("- Both model components agreed on authenticity")
else:
    print("\nAnalysis: This article was classified as FAKE because:")
    if result['indicators']['negative_phrases']:
        print("- Contains phrases commonly found in fake news")
    if result['combined_score'] < 0.3:
        print("- Extremely low confidence score indicates suspicious content")



üì∞ News Authenticity Report
Title: Government Announces New Policy
Content: The government released an official statement.

Prediction: REAL news
Confidence: 87.2%
Combined Score: 0.8723

Component Scores:
- Ensemble Coordinator: 0.9124
- Truth Predictor: 0.8322

Key Indicators:
- ‚úÖ official statement
- ‚úÖ government source
- ‚ùå negative phrases

Analysis: This article was classified as REAL because:
- Contains reference to an official government statement
- No negative phrases suggesting fabrication
- Both model components agreed on authenticity


In [15]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Conv1D, GlobalMaxPooling1D, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.linear_model import LogisticRegression

# Load and preprocess data
df = pd.read_csv('D:/dataset/fake_news_dataset.csv')
df['text'] = df['Article Title'] + ' ' + df['Content']
X = df['text']
y = df['Label']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Tokenization
max_words = 8000
max_len = 150
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)
X_train_pad = pad_sequences(X_train_seq, maxlen=max_len)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len)

# Simple News Characterizer (CNN)
def build_simple_characterizer():
    input_layer = Input(shape=(max_len,))
    embedding = Embedding(input_dim=max_words, output_dim=64)(input_layer)
    conv = Conv1D(filters=32, kernel_size=5, activation='relu')(embedding)
    pool = GlobalMaxPooling1D()(conv)
    output = Dense(64, activation='relu')(pool)
    model = Model(inputs=input_layer, outputs=output)
    return model

# Truth Predictor: Logistic Regression
class SimpleTruthPredictor:
    def __init__(self):
        self.model = LogisticRegression(max_iter=1000)

    def fit(self, X, y):
        self.model.fit(X, y)

    def predict(self, X):
        return self.model.predict_proba(X)[:, 1]

# Build and train FNED model
characterizer = build_simple_characterizer()

# Extract features
X_train_features = characterizer.predict(X_train_pad)
X_test_features = characterizer.predict(X_test_pad)

# Train truth predictor
truth_predictor = SimpleTruthPredictor()
truth_predictor.fit(X_train_features, y_train)

# Predict and evaluate
probs = truth_predictor.predict(X_test_features)
preds = (probs > 0.5).astype(int)

# Evaluation
accuracy = accuracy_score(y_test, preds)
precision = precision_score(y_test, preds)
recall = recall_score(y_test, preds)
f1 = f1_score(y_test, preds)

print("\nSimplified FNED Performance:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")


[1m25/25[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m1s[0m 16ms/step
[1m7/7[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 66ms/step

Simplified FNED Performance:
Accuracy: 0.8150
Precision: 0.7661
Recall: 0.9223
F1 Score: 0.8370


In [16]:
#real
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # Suppress TensorFlow warnings

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Embedding, Conv1D, GlobalMaxPooling1D, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.linear_model import LogisticRegression

# Load dataset
df = pd.read_csv('D:/dataset/fake_news_dataset.csv')
df['text'] = df['Article Title'] + ' ' + df['Content']
X = df['text']
y = df['Label']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Tokenization
max_words = 8000
max_len = 150
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)
X_train_pad = pad_sequences(X_train_seq, maxlen=max_len)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len)

# Define News Characterizer (CNN)
def build_characterizer():
    input_layer = Input(shape=(max_len,))
    embedding = Embedding(input_dim=max_words, output_dim=64)(input_layer)
    conv = Conv1D(filters=32, kernel_size=5, activation='relu')(embedding)
    pool = GlobalMaxPooling1D()(conv)
    output = Dense(64, activation='relu')(pool)
    model = Model(inputs=input_layer, outputs=output)
    return model

# Truth Predictor
class TruthPredictor:
    def __init__(self):
        self.model = LogisticRegression(max_iter=1000)
    
    def fit(self, X, y):
        self.model.fit(X, y)
    
    def predict(self, X):
        return self.model.predict_proba(X)[:, 1]

# Build and train model
characterizer = build_characterizer()

# Compile manually if needed (in case you use .h5 model in future)
characterizer.compile(optimizer='adam', loss='binary_crossentropy')

# Feature extraction
X_train_feat = characterizer.predict(X_train_pad)
X_test_feat = characterizer.predict(X_test_pad)

# Train and predict
truth_predictor = TruthPredictor()
truth_predictor.fit(X_train_feat, y_train)
probs = truth_predictor.predict(X_test_feat)
final_preds = (probs > 0.5).astype(int)

# Evaluation
accuracy = accuracy_score(y_test, final_preds)
precision = precision_score(y_test, final_preds)
recall = recall_score(y_test, final_preds)
f1 = f1_score(y_test, final_preds)

# Output
print("\nüì∞ News Authenticity Report")
print("=" * 60)
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")


[1m25/25[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m1s[0m 15ms/step
[1m7/7[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 57ms/step

üì∞ News Authenticity Report
Accuracy : 0.6950
Precision: 0.6419
Recall   : 0.9223
F1 Score : 0.7570


In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Conv1D, GlobalMaxPooling1D, Dense, concatenate, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load dataset
df = pd.read_csv('D:/dataset/fake_news_dataset.csv')
df['text'] = df['Article Title'] + ' ' + df['Content']
X = df['text']
y = df['Label']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Tokenization
max_words = 10000
max_len = 250
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)
X_train_pad = pad_sequences(X_train_seq, maxlen=max_len)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len)

# Improved CNN Feature Extractor
def build_characterizer():
    input_layer = Input(shape=(max_len,))
    embedding = Embedding(input_dim=max_words, output_dim=128)(input_layer)
    
    conv3 = Conv1D(filters=64, kernel_size=3, activation='relu')(embedding)
    pool3 = GlobalMaxPooling1D()(conv3)
    
    conv5 = Conv1D(filters=64, kernel_size=5, activation='relu')(embedding)
    pool5 = GlobalMaxPooling1D()(conv5)
    
    conv7 = Conv1D(filters=64, kernel_size=7, activation='relu')(embedding)
    pool7 = GlobalMaxPooling1D()(conv7)
    
    merged = concatenate([pool3, pool5, pool7])
    dropout = Dropout(0.5)(merged)
    output = Dense(128, activation='relu')(dropout)
    
    model = Model(inputs=input_layer, outputs=output)
    return model

# Truth Predictor with scaling
class TruthPredictor:
    def __init__(self):
        self.scaler = StandardScaler()
        self.model = LogisticRegression(max_iter=1000)
    
    def fit(self, X, y):
        X_scaled = self.scaler.fit_transform(X)
        self.model.fit(X_scaled, y)
    
    def predict(self, X):
        X_scaled = self.scaler.transform(X)
        return self.model.predict_proba(X_scaled)[:, 1]

# Train pipeline
characterizer = build_characterizer()
characterizer.compile(optimizer='adam', loss='binary_crossentropy')

# Feature extraction
X_train_feat = characterizer.predict(X_train_pad, verbose=0)
X_test_feat = characterizer.predict(X_test_pad, verbose=0)

# Train and predict
truth_predictor = TruthPredictor()
truth_predictor.fit(X_train_feat, y_train)
probs = truth_predictor.predict(X_test_feat)
final_preds = (probs > 0.5).astype(int)

# Evaluation
accuracy = accuracy_score(y_test, final_preds)
precision = precision_score(y_test, final_preds)
recall = recall_score(y_test, final_preds)
f1 = f1_score(y_test, final_preds)

# Output
print("\nüì∞ News Authenticity Report")
print("=" * 60)
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")  



üì∞ News Authenticity Report
Accuracy : 1.0000
Precision: 1.0000
Recall   : 1.0000
F1 Score : 1.0000


In [None]:
# import os
# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # Suppress TensorFlow warnings

# import pandas as pd
# import numpy as np
# from sklearn.model_selection import train_test_split
# from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
# from sklearn.ensemble import VotingClassifier, RandomForestClassifier
# from sklearn.svm import SVC
# import matplotlib.pyplot as plt
# import seaborn as sns
# from transformers import BertTokenizer, TFBertModel
# import tensorflow as tf
# from tensorflow.keras.preprocessing.sequence import pad_sequences

# # Load dataset
# df = pd.read_csv('D:/dataset/fake_news_dataset.csv')
# df['text'] = df['Article Title'] + ' ' + df['Content']
# X = df['text']
# y = df['Label']

# # Split data
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# # Load BERT tokenizer and model
# tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# bert_model = TFBertModel.from_pretrained('bert-base-uncased')

# # Tokenization (BERT-specific)
# max_len = 512  # Max length for BERT
# X_train_tokens = tokenizer(list(X_train), padding=True, truncation=True, max_length=max_len, return_tensors="tf")
# X_test_tokens = tokenizer(list(X_test), padding=True, truncation=True, max_length=max_len, return_tensors="tf")

# # BERT Model for Feature Extraction
# def build_bert_model():
#     input_ids = tf.keras.Input(shape=(max_len,), dtype=tf.int32)
#     attention_mask = tf.keras.Input(shape=(max_len,), dtype=tf.int32)

#     bert_output = bert_model(input_ids, attention_mask=attention_mask)[1]  # [1] is pooled output
#     output = tf.keras.layers.Dense(128, activation='relu')(bert_output)
#     output = tf.keras.layers.Dense(1, activation='sigmoid')(output)  # Binary classification
#     model = tf.keras.Model(inputs=[input_ids, attention_mask], outputs=output)

#     model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
#     return model

# # Initialize and train the BERT model
# print("üîß Building and training BERT model...")
# bert_model = build_bert_model()

# # Train the model
# bert_model.fit(
#     [X_train_tokens['input_ids'], X_train_tokens['attention_mask']], 
#     y_train, 
#     epochs=3, 
#     batch_size=8, 
#     validation_split=0.1,
#     verbose=1
# )

# # Feature extraction from BERT model
# train_features = bert_model.predict([X_train_tokens['input_ids'], X_train_tokens['attention_mask']])
# test_features = bert_model.predict([X_test_tokens['input_ids'], X_test_tokens['attention_mask']])

# # Ensemble classifier (Voting Classifier with Random Forest + SVM)
# print("üß† Training Voting Classifier...")
# ensemble_model = VotingClassifier(
#     estimators=[
#         ('rf', RandomForestClassifier(n_estimators=100)),
#         ('svc', SVC(probability=True))
#     ], voting='soft'
# )

# ensemble_model.fit(train_features, y_train)

# # Predictions
# print("üîé Making predictions...")
# final_preds = ensemble_model.predict(test_features)

# # Evaluate
# accuracy = accuracy_score(y_test, final_preds)
# precision = precision_score(y_test, final_preds)
# recall = recall_score(y_test, final_preds)
# f1 = f1_score(y_test, final_preds)

# # Print results
# print("\nüì∞ News Authenticity Report")
# print("=" * 60)
# print(f"Accuracy : {accuracy:.4f}")
# print(f"Precision: {precision:.4f}")
# print(f"Recall   : {recall:.4f}")
# print(f"F1 Score : {f1:.4f}")

# # Confusion Matrix
# conf_matrix = confusion_matrix(y_test, final_preds)
# plt.figure(figsize=(6, 6))
# sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Real', 'Fake'], yticklabels=['Real', 'Fake'])
# plt.xlabel('Predicted')
# plt.ylabel('True')
# plt.title('Confusion Matrix')
# plt.show()


ModuleNotFoundError: No module named 'transformers'

In [22]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Conv1D, GlobalMaxPooling1D, Dense, concatenate, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load and preprocess dataset
df = pd.read_csv('D:/dataset/fake_news_dataset.csv')
df['text'] = df['Article Title'] + ' ' + df['Content']
X = df['text']
y = df['Label']

# Split data with stratification
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Tokenization
max_words = 10000
max_len = 250
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)
X_train_pad = pad_sequences(X_train_seq, maxlen=max_len)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len)

# CNN Feature Extractor
def build_characterizer():
    input_layer = Input(shape=(max_len,))
    embedding = Embedding(input_dim=max_words, output_dim=128)(input_layer)
    embedding = Dropout(0.3)(embedding)  # Extra regularization

    conv3 = Conv1D(filters=64, kernel_size=3, activation='relu')(embedding)
    pool3 = GlobalMaxPooling1D()(conv3)

    conv5 = Conv1D(filters=64, kernel_size=5, activation='relu')(embedding)
    pool5 = GlobalMaxPooling1D()(conv5)

    conv7 = Conv1D(filters=64, kernel_size=7, activation='relu')(embedding)
    pool7 = GlobalMaxPooling1D()(conv7)

    merged = concatenate([pool3, pool5, pool7])
    dropout = Dropout(0.5)(merged)
    output = Dense(128, activation='relu')(dropout)

    return Model(inputs=input_layer, outputs=output)

# Logistic Regression classifier
class TruthPredictor:
    def __init__(self):
        self.scaler = StandardScaler()
        self.model = LogisticRegression(max_iter=1000)
    
    def fit(self, X, y):
        X_scaled = self.scaler.fit_transform(X)
        self.model.fit(X_scaled, y)
    
    def predict(self, X):
        X_scaled = self.scaler.transform(X)
        return self.model.predict_proba(X_scaled)[:, 1]

# Build model and extract features
characterizer = build_characterizer()
characterizer.compile(optimizer='adam', loss='binary_crossentropy')

X_train_feat = characterizer.predict(X_train_pad, verbose=0)
X_test_feat = characterizer.predict(X_test_pad, verbose=0)

# Train and evaluate
truth_predictor = TruthPredictor()
truth_predictor.fit(X_train_feat, y_train)
probs = truth_predictor.predict(X_test_feat)
final_preds = (probs > 0.5).astype(int)

# Metrics
accuracy = accuracy_score(y_test, final_preds)
precision = precision_score(y_test, final_preds)
recall = recall_score(y_test, final_preds)
f1 = f1_score(y_test, final_preds)

# Output
print("\nüì∞ News Authenticity Report")
print("=" * 60)
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")



üì∞ News Authenticity Report
Accuracy : 1.0000
Precision: 1.0000
Recall   : 1.0000
F1 Score : 1.0000
