In [24]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer

In [25]:
#Loading Dataset
df=pd.read_csv('news_dataset.csv')


In [26]:
#Checking and removing null values
print(df.isnull().sum())

Unnamed: 0      0
title         558
text           39
label           0
dtype: int64


In [27]:
df=df.dropna()
df=df.dropna(axis=1)

In [28]:
#Splitting into features and target
x=df['text']
y=df['label']

In [29]:
# Train_Test_Split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [30]:
#TF-IDF Vectorizer
vectorizer = TfidfVectorizer(stop_words='english', max_features=10000)
x_train_tfidf= vectorizer.fit_transform(x_train)
x_test_tfidf=vectorizer.transform(x_test)

In [31]:
# Model Training
model=LogisticRegression(class_weight='balanced', max_iter=1000)
model.fit(x_train_tfidf, y_train)

#Prediction
pred=model.predict(x_test_tfidf)
print(f'Prediction is: {pred}')

Prediction is: [1 0 0 ... 1 0 1]


In [32]:
# Model Evaluation
accuracy= accuracy_score(y_test,pred)
print(f'Accuracy of the model is: {accuracy}')

f1= f1_score(y_test,pred)
print(f'F1 Score of the given model is: {f1}')

Accuracy of the model is: 0.9408023483365949
F1 Score of the given model is: 0.9418548774627583


In [33]:
# Saving the model
import joblib

joblib.dump(model, 'model.pkl')



['model.pkl']

In [34]:
joblib.dump(vectorizer, 'vectorizer.pkl')

['vectorizer.pkl']