In [1]:
import pandas as pd
import re

# Loading datasets

In [None]:

df_fake = pd.read_csv("../dataset/raw/Fake.csv")
df_truth = pd.read_csv("../dataset/raw/True.csv")
df_extra = pd.read_csv("../dataset/raw/extra.csv")

In [6]:
df_fake.head()

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [4]:
df_truth.head()

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


In [10]:
df_extra.head() # label 1 is unreliable and 0 is reliable

Unnamed: 0,id,title,author,text,label
0,0,House Dem Aide: We Didn’t Even See Comey’s Let...,Darrell Lucus,House Dem Aide: We Didn’t Even See Comey’s Let...,1
1,1,"FLYNN: Hillary Clinton, Big Woman on Campus - ...",Daniel J. Flynn,Ever get the feeling your life circles the rou...,0
2,2,Why the Truth Might Get You Fired,Consortiumnews.com,"Why the Truth Might Get You Fired October 29, ...",1
3,3,15 Civilians Killed In Single US Airstrike Hav...,Jessica Purkiss,Videos 15 Civilians Killed In Single US Airstr...,1
4,4,Iranian woman jailed for fictional unpublished...,Howard Portnoy,Print \nAn Iranian woman has been sentenced to...,1


# Processing and Merging Datasets

"(Reuters)" seems like a bias in df_truth, thus should be removed

In [5]:
# Remove "(Reuters)" Bias from Truth News 
def remove_reuters(text):
    return re.sub(r"\(Reuters\)", "", text).strip()

df_truth["text"] = df_truth["text"].apply(remove_reuters)
df_truth.head()

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON - The head of a conservative Repub...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON - Transgender people will be allow...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON - The special counsel investigatio...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON - Trump campaign adviser George Pa...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON - President Donald Trump c...,politicsNews,"December 29, 2017"


To combine the three datasets, we will only keep important parameters like title, text and author

In [11]:
df_fake = df_fake[["title", "text"]]
df_truth = df_truth[["title", "text"]]
df_extra = df_extra[["title", "text", "author", "label"]]

In [15]:
df_fake["unreliable"] = 1
df_fake["author"] = "Unknown"
df_truth["unreliable"] = 0
df_truth["author"] = "Unknown"
df_extra = df_extra.rename(columns={"label":"unreliable"})

In [16]:
df_fake.head()

Unnamed: 0,title,text,unreliable,author
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,1,Unknown
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,1,Unknown
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",1,Unknown
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",1,Unknown
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,1,Unknown


In [17]:
df_truth.head()

Unnamed: 0,title,text,unreliable,author
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON - The head of a conservative Repub...,0,Unknown
1,U.S. military to accept transgender recruits o...,WASHINGTON - Transgender people will be allow...,0,Unknown
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON - The special counsel investigatio...,0,Unknown
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON - Trump campaign adviser George Pa...,0,Unknown
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON - President Donald Trump c...,0,Unknown


In [18]:
df_extra.head()

Unnamed: 0,title,text,author,unreliable
0,House Dem Aide: We Didn’t Even See Comey’s Let...,House Dem Aide: We Didn’t Even See Comey’s Let...,Darrell Lucus,1
1,"FLYNN: Hillary Clinton, Big Woman on Campus - ...",Ever get the feeling your life circles the rou...,Daniel J. Flynn,0
2,Why the Truth Might Get You Fired,"Why the Truth Might Get You Fired October 29, ...",Consortiumnews.com,1
3,15 Civilians Killed In Single US Airstrike Hav...,Videos 15 Civilians Killed In Single US Airstr...,Jessica Purkiss,1
4,Iranian woman jailed for fictional unpublished...,Print \nAn Iranian woman has been sentenced to...,Howard Portnoy,1


In [19]:
#Merge All Datasets
merged_df = pd.concat([df_fake, df_truth, df_extra], ignore_index=True)
merged_df

Unnamed: 0,title,text,unreliable,author
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,1,Unknown
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,1,Unknown
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",1,Unknown
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",1,Unknown
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,1,Unknown
...,...,...,...,...
65693,Rapper T.I.: Trump a ’Poster Child For White S...,Rapper T. I. unloaded on black celebrities who...,0,Jerome Hudson
65694,"N.F.L. Playoffs: Schedule, Matchups and Odds -...",When the Green Bay Packers lost to the Washing...,0,Benjamin Hoffman
65695,Macy’s Is Said to Receive Takeover Approach by...,The Macy’s of today grew from the union of sev...,0,Michael J. de la Merced and Rachel Abrams
65696,"NATO, Russia To Hold Parallel Exercises In Bal...","NATO, Russia To Hold Parallel Exercises In Bal...",1,Alex Ansary


In [20]:
merged_df.to_csv("../dataset/processed/final_datasets.csv", index=False)

In [21]:
final_df = pd.read_csv("../dataset/processed/final_datasets.csv")
final_df

Unnamed: 0,title,text,unreliable,author
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,1,Unknown
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,1,Unknown
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",1,Unknown
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",1,Unknown
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,1,Unknown
...,...,...,...,...
65693,Rapper T.I.: Trump a ’Poster Child For White S...,Rapper T. I. unloaded on black celebrities who...,0,Jerome Hudson
65694,"N.F.L. Playoffs: Schedule, Matchups and Odds -...",When the Green Bay Packers lost to the Washing...,0,Benjamin Hoffman
65695,Macy’s Is Said to Receive Takeover Approach by...,The Macy’s of today grew from the union of sev...,0,Michael J. de la Merced and Rachel Abrams
65696,"NATO, Russia To Hold Parallel Exercises In Bal...","NATO, Russia To Hold Parallel Exercises In Bal...",1,Alex Ansary


Thus, Merging of datasets is complete