In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

In [3]:
#importing the data
news = pd.read_csv('./dataset_processed/fakeNews_titleStemming_Sampled.csv', index_col=0)
news.reset_index(inplace=True)

In [4]:
#recreating predictor and target variables

from sklearn.model_selection import train_test_split
train_news, test_news = train_test_split(news, test_size=0.2, 
                                         random_state=7)

train_news_predictors = train_news.drop(columns = ['title', 'label'])
train_news_target = train_news['label']
test_news_predictors = test_news.drop(columns = ['title', 'label'])
test_news_target = test_news['label']

In [6]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier

## Create list of models in the ensemble
base_models = [('logreg', LogisticRegression(penalty='none')),
              ('tree', DecisionTreeClassifier(max_depth=5)),
              ('knn', KNeighborsClassifier(n_neighbors=20))]

## Final model used to aggregate predicted probs
my_final_estimator = DecisionTreeClassifier(max_depth=3)

## Create stack
from sklearn.ensemble import StackingClassifier
my_stack = StackingClassifier(estimators = base_models, final_estimator = my_final_estimator, 
                              stack_method ='predict_proba', cv=5)

## Fit and Evaluate (cv is done internally in StackingClassifier)
fitted_stack = my_stack.fit(train_news_predictors, train_news_target)
train_news_target_predicted = fitted_stack.predict(train_news_predictors)
test_news_target_predicted = fitted_stack.predict(test_news_predictors)

# Accuracy
from sklearn.metrics import accuracy_score
train_accuracy_rnn = accuracy_score(train_news_target, train_news_target_predicted)
test_accuracy_rnn = accuracy_score(test_news_target, test_news_target_predicted)
print(f'Train Accuracy for RNN model is {train_accuracy_rnn}')
print(f'Test Accuracy for RNN model is {test_accuracy_rnn}')

# TODO: confusion matrix

# F1 Score
from sklearn.metrics import f1_score
train_f1_rnn = f1_score(train_news_target, 
                        train_news_target_predicted, average='macro')
test_f1_rnn = f1_score(test_news_target, 
                        test_news_target_predicted, average='macro')
print(f'Train F1 Score for RNN model is {train_f1_rnn}')
print(f'Test F1 Score for RNN model is {test_f1_rnn}')





Train Accuracy for RNN model is 0.8975
Test Accuracy for RNN model is 0.82
Train F1 Score for RNN model is 0.8955247650899825
Test F1 Score for RNN model is 0.8139727159983463


In [8]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier

## Create list of models in the ensemble
base_models = [('tree1', DecisionTreeClassifier(max_depth=3)),
              ('tree2', DecisionTreeClassifier(max_depth=4)),
              ('tree3', DecisionTreeClassifier(max_depth=5))]

## Final model used to aggregate predicted probs
my_final_estimator = DecisionTreeClassifier(max_depth=3)

## Create stack
from sklearn.ensemble import StackingClassifier
my_stack = StackingClassifier(estimators = base_models, final_estimator = my_final_estimator, 
                              stack_method ='predict_proba', cv=5)

## Fit and Evaluate (cv is done internally in StackingClassifier)
fitted_stack = my_stack.fit(train_news_predictors, train_news_target)
train_news_target_predicted = fitted_stack.predict(train_news_predictors)
test_news_target_predicted = fitted_stack.predict(test_news_predictors)

# Accuracy
from sklearn.metrics import accuracy_score
train_accuracy_rnn = accuracy_score(train_news_target, train_news_target_predicted)
test_accuracy_rnn = accuracy_score(test_news_target, test_news_target_predicted)
print(f'Train Accuracy for RNN model is {train_accuracy_rnn}')
print(f'Test Accuracy for RNN model is {test_accuracy_rnn}')

# TODO: confusion matrix

# F1 Score
from sklearn.metrics import f1_score
train_f1_rnn = f1_score(train_news_target, 
                        train_news_target_predicted, average='macro')
test_f1_rnn = f1_score(test_news_target, 
                        test_news_target_predicted, average='macro')
print(f'Train F1 Score for RNN model is {train_f1_rnn}')
print(f'Test F1 Score for RNN model is {test_f1_rnn}')





Train Accuracy for RNN model is 0.91
Test Accuracy for RNN model is 0.9
Train F1 Score for RNN model is 0.9094561711278366
Test F1 Score for RNN model is 0.8996386993175431
