## Importing necessary libraries

In [1]:
import joblib
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import re
import nltk
from nltk.corpus import stopwords

## Loading the trained model and TF-IDF vectorizer

In [2]:
lr = joblib.load("./global_models_and_dataset/news_category_model.pkl")
print("Article category prediction model loaded.")

tfidf = joblib.load("./global_models_and_dataset/tfidf_vectorizer.pkl")
print("TF-IDF vectorizer loaded.")

Article category prediction model loaded.
TF-IDF vectorizer loaded.


# Prediction and Recommendation

In [3]:
# Download stopwords
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    # Convert to lowercase
    text = text.lower()
    # Remove special characters and numbers
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    # Remove extra spaces
    text = re.sub(r'\s+', ' ', text).strip()
    # Remove stopwords
    text = ' '.join(word for word in text.split() if word not in stop_words)
    return text

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\rehma\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [4]:
def recommend_articles(input_text, top_n=3):
    # Preprocess the input text
    cleaned_input_text = preprocess_text(input_text)

    # Transform the input text using the global TF-IDF vectorizer
    input_vector = tfidf.transform([cleaned_input_text])

    # Predict the category using the trained model
    predicted_category = lr.predict(input_vector)[0]
    print("Prdicted category:", predicted_category)

    # Load the dataset for the predicted category
    category_df = pd.read_csv(f'./categorized_models_and_datasets/cat_datasets/category_{predicted_category}.csv')

    # Load the pre-saved TF-IDF matrix for the predicted category
    category_tfidf_matrix = joblib.load(f'./categorized_models_and_datasets/cat_tfidf_matrix/category_{predicted_category}_tfidf.pkl')

    category_tfidf_vector = joblib.load(f'./categorized_models_and_datasets/cat_tfidf_vect/category_{predicted_category}_tfidf_vectorizer.pkl')

    input_vector_new = category_tfidf_vector.transform([cleaned_input_text])

    # Calculate cosine similarity between the input vector and the category's TF-IDF matrix
    similarities = cosine_similarity(input_vector_new, category_tfidf_matrix)

    # Get the indices of the top N most similar articles
    top_indices = np.argsort(similarities.flatten())[-top_n:][::-1]

    # Fetch the recommended articles based on top indices
    recommended_articles = category_df.iloc[top_indices]

    return recommended_articles, predicted_category

### Example usage of recommendation

In [5]:
sample_text = "The stock market has seen volatility, but experts predict growth in tech stocks and sustainable investing. Investors are advised to diversify their portfolios."


In [6]:
recommendations, predicted_category = recommend_articles(sample_text)

Prdicted category: personal finance


In [7]:
predicted_category

'personal finance'

In [8]:
for i, (index, row) in enumerate(recommendations.iterrows(), 1):
    print(f"Recommendation {i}: {row['target']}")
    print(f"{row['text']}\n")

Recommendation 1: personal finance
junk bonds learn day trading learn how to invest learn to invest learning to invest learn to invest money list of stocks listed companies longterm investments losers macd madoff market data market data and information market data history market information market maker market news market reports market research market statistics market stock trading market summary markets and investments money magazine money to invest money trading stocks money 101 mutual funds invest online brokers online stock trading information online swing trading paper trading personal finance piercing candlestick pattern portfolio theory pre-market analysis price history price volatility prices of stocks profitability public companies public company public company research publicly traded stocks purchase stocks quote server quote summary real time quotes reversal rich save money save your money selecting stocks shorting stocks short selling stocks split stock stochastics stock 