### Setup
Importing necessary libraries

In [19]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

## Splitting data for Test/Train
Splitting data into 1000 train 

In [13]:
num_trn, num_tst = 700, 300

data = pd.read_csv(f"fake_and_real_news.csv")
data.dropna(inplace=True)


# Separating data from main dataset
train = data.sample(n=num_trn)
test = data.sample(n=num_tst)

# Printing the data
print(f"Train shape: {train.shape}")
print(f"Test shape: {test.shape}\n")

print(f"Train data: \n{train.head()}\n")
print(f"Test data: \n{test.head()}")

Train shape: (700, 2)
Test shape: (300, 2)

Train data: 
                                                   Text label
1574   Trump’s ‘Major Meeting’ On Veterans Affairs N...  Fake
2955   Arizona GOP Poised To Ban Social Justice Clas...  Fake
4744  Trump to address Senate Republicans on Tuesday...  Real
8847  Chicago mayor unveils 'gimmick-free' budget fo...  Real
3949   Chuck Schumer: Republicans Have ‘Real Problem...  Fake

Test data: 
                                                   Text label
122    Pastor Decides Straight Couple Can’t Even SUP...  Fake
6398  Trump arrives in Philippines for Asian leaders...  Real
145    Hawaii Attorney General HUMILIATES Jeff Sessi...  Fake
6972   Member Of Trump Team Resigns In Disgrace To S...  Fake
3135   Racists DISGUSTINGLY Attack And Blame Preside...  Fake


### Separating X and Y values from data

In [17]:
x_train, y_train = train["Text"], train["label"]
x_test, y_test = test["Text"], test["label"]

# Previewing the data
print(f"x_train: {x_train.shape}")
print(f"y_train: {y_train.shape}")
print(f"x_test: {x_test.shape}")
print(f"y_test: {y_test.shape}")

print(f"x_train: {x_train.head()}")
print(f"y_train: {y_train.head()}")
print(f"x_test: {x_test.head()}")
print(f"y_test: {y_test.head()}")


x_train: (700,)
y_train: (700,)
x_test: (300,)
y_test: (300,)
x_train: 1574     Trump’s ‘Major Meeting’ On Veterans Affairs N...
2955     Arizona GOP Poised To Ban Social Justice Clas...
4744    Trump to address Senate Republicans on Tuesday...
8847    Chicago mayor unveils 'gimmick-free' budget fo...
3949     Chuck Schumer: Republicans Have ‘Real Problem...
Name: Text, dtype: object
y_train: 1574    Fake
2955    Fake
4744    Real
8847    Real
3949    Fake
Name: label, dtype: object
x_test: 122      Pastor Decides Straight Couple Can’t Even SUP...
6398    Trump arrives in Philippines for Asian leaders...
145      Hawaii Attorney General HUMILIATES Jeff Sessi...
6972     Member Of Trump Team Resigns In Disgrace To S...
3135     Racists DISGUSTINGLY Attack And Blame Preside...
Name: Text, dtype: object
y_test: 122     Fake
6398    Real
145     Fake
6972    Fake
3135    Fake
Name: label, dtype: object


### Using TF-IDF Vectorizer to convert text into values reflective of their frequency

In [None]:
vectorizer = TfidfVectorizer(stop_words="english", max_df=0.7)

# Fit and transform the training data
x_train = vectorizer.fit_transform(x_train)
x_test = vectorizer.transform(x_test)

#### Initializing and training the model data

In [None]:
# initialize model
model = LogisticRegression()

# Fit the model
model.fit(x_train, y_train)

## Make predictions and evaluate the accuracy of the model

In [None]:
# Code goes here