In [None]:
import pandas as pd
import numpy as np
import re

# scikit-learn
# -----------------------
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score

In [None]:
fake_df=pd.read_csv('../data/Fake.csv')
true_df=pd.read_csv('../data/True.csv')

In [None]:
fake_df.head()

In [None]:
fake_df.info(), true_df.info()

In [None]:
fake_df.drop(['subject', 'date'], axis=1, inplace=True)
true_df.drop(['subject', 'date'], axis=1, inplace=True)

In [None]:
fake_df['class'] = 0
true_df['class'] = 1

In [None]:
news_df = pd.concat([fake_df, true_df], ignore_index=True, sort=False)

In [None]:
news_df['text'] = news_df['title'] + news_df['text']
news_df.drop(['title'], axis=1, inplace=True)

In [None]:
def wordopt(text):
    text = text.lower()                               # Convert to lowercase
    text = re.sub(r'https?://\S+|www\.\S+', '', text) # Remove URLs
    text = re.sub(r'\W', ' ', text)                   # Replace non-word characters with a space
    text = re.sub(r'\n', '', text)                    # Remove newline characters
    text = re.sub(r' +', ' ', text)                   # Replace multiple spaces with a single space
    text = re.sub(r'^ ', '', text)                    # Remove leading space
    text = re.sub(r' $', '', text)                    # Remove trailing space
    return text

news_df['text'] = news_df['text'].apply(wordopt)

In [None]:
X = news_df['text']
y = news_df['class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=24)

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorization = TfidfVectorizer()
vect_X_train = vectorization.fit_transform(X_train)
vect_X_test = vectorization.transform(X_test)

In [None]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression()
lr.fit(vect_X_train,y_train)

In [None]:
lr_y_pred = lr.predict(vect_X_test)


In [None]:
# Using lr.score simplifies the process of calculating accuracy by directly providing a single method to evaluate 
# the trained logistic regression model on a specific test dataset. It's a convenient way to assess the performance 
# of the model in terms of classification accuracy without needing to manually compare predicted and true labels.

lr.score(vect_X_test, y_test)

In [None]:
accuracy_score(y_test, lr_y_pred)

In [None]:
confusion_matrix(y_test, lr_y_pred)

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test, lr_y_pred))

In [None]:
def test(text):
    text = wordopt(text)
    text = vectorization.transform(pd.Series(text))
    prediction = lr.predict(text)

    if prediction == 0:
        return 'Fake'
    elif prediction == 1:
        return 'True as fuck'

### Testing the model with a single input

In [None]:
news = str(input())
test(news)

# Save the model

In [None]:
# Import the pickle module
import pickle

# Save the linear regressor model to a file named RandomForestRegressor.plk
# Use the dump function to write the model object (rf) to the file in binary mode (wb)
pickle.dump(lr, open('../models/LinearRegressor.pkl', 'wb'))

# Save the linear regressor model to a file named RandomForestRegressor.plk
# Use the dump function to write the model object (rf) to the file in binary mode (wb)
pickle.dump(vectorization, open('../models/vectorization.pkl', 'wb'))

# # Load the random forest regressor model from the file named RandomForestRegressor.plk
# # Use the load function to read the model object from the file in binary mode (rb)
pickle_model = pickle.load(open('../models/vectorization.pkl', 'rb'))

# # Use the predict method of the model to make a prediction on the first row of the normalized feature matrix (X_norm)
# # Reshape the row into a 2D array with one row and -1 columns
# pickle_model.predict(X_norm[0].reshape(1,-1))

In [None]:
text = "Indonesia to buy $1.14 billion worth of Russian jetsJAKARTA (Reuters) - Indonesia will buy 11 Sukhoi fighter jets worth $1.14 billion from Russia in exchange for cash and Indonesian commodities, two cabinet ministers said on Tuesday. The Southeast Asian country has pledged to ship up to $570 million worth of commodities in addition to cash to pay for the Suhkoi SU-35 fighter jets, which are expected to be delivered in stages starting in two years. Indonesian Trade Minister Enggartiasto Lukita said in a joint statement with Defence Minister Ryamizard Ryacudu that details of the type and volume of commodities were  still being negotiated . Previously he had said the exports could include palm oil, tea, and coffee. The deal is expected to be finalised soon between Indonesian state trading company PT Perusahaan Perdangangan Indonesia and Russian state conglomerate Rostec. Russia is currently facing a new round of U.S.-imposed trade sanctions. Meanwhile, Southeast Asia s largest economy is trying to promote its palm oil products amid threats of a cut in consumption by European Union countries. Indonesia is also trying to modernize its ageing air force after a string of military aviation accidents. Indonesia, which had a $411 million trade surplus with Russia in 2016, wants to expand bilateral cooperation in tourism, education, energy, technology and aviation among others."

In [None]:
text = wordopt(text)
text

In [None]:
# Import the pickle module
import pickle

In [None]:
# Save the linear regressor model to a file named RandomForestRegressor.plk
# Use the dump function to write the model object (rf) to the file in binary mode (wb)
pickle.dump(vectorization, open('../models/vectorization.pkl', 'wb'))

In [None]:
# # Load the random forest regressor model from the file named RandomForestRegressor.plk
# # Use the load function to read the model object from the file in binary mode (rb)
vec_model = pickle.load(open('../models/vectorization.pkl', 'rb'))

In [None]:
textvec = vec_model.transform(pd.Series(text))


In [None]:
textvec

In [None]:
vectorization.transform(pd.Series(text))

In [None]:
pred_model = pickle.load(open('../models/LinearRegressor.pkl', 'rb'))

In [None]:
pred_model.predict([textvec].array.reshape(1,-1))

In [None]:
def test2(text):
    text = wordopt(text)
    text = vec_model.transform(pd.Series(text))
    prediction = pred_model.predict(text)

    if prediction == 0:
        return 'Fake News'
    elif prediction == 1:
        return 'True as fuck'

In [None]:
news = str(input())
test2(news)