In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
# Load the dataset
df = pd.read_csv("uci-news-aggregator.csv")
df = df[['TITLE', 'URL', 'CATEGORY']]

In [4]:
# TF-IDF and KMeans
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(df['TITLE'])

In [5]:
num_clusters = 5
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
df['Cluster'] = kmeans.fit_predict(tfidf_matrix)

In [6]:
# Recommendation function
def recommend_articles_cluster(user_input, top_n=5):
    user_vec = vectorizer.transform([user_input])
    user_cluster = kmeans.predict(user_vec)[0]
    cluster_articles = df[df['Cluster'] == user_cluster].copy()
    cluster_articles['Similarity'] = cosine_similarity(user_vec, tfidf_matrix[df['Cluster'] == user_cluster]).flatten()
    top_recommendations = cluster_articles.sort_values(by="Similarity", ascending=False).head(top_n)
    return top_recommendations[['TITLE', 'URL', 'Similarity']]


In [7]:
# Sample input
user_query = "artificial intelligence and machine learning"
results = recommend_articles_cluster(user_query)

# Show results
for idx, row in results.iterrows():
    print(f"• {row['TITLE']}\n  🔗 {row['URL']}\n")

• Artificial intelligence
  🔗 http://missoulanews.bigskypress.com/missoula/artificial-intelligence/Content\?oid=1986651

• Transcendence is an artificial intelligence missing the intelligence
  🔗 http://www.tri-cityherald.com/2014/04/17/2930279/transcendence-is-an-artificial.html\?sp=/99/1191/

• REVIEW: Transcendence Has Only Artificial Intelligence
  🔗 http://time.com/64808/transcendence-movie-review/

• Film questions artificial intelligence
  🔗 http://dailytrojan.com/2014/04/16/film-questions-artificial-intelligence/

• Transcendence Review: Artificial Intelligence Takes Over
  🔗 http://www.moviefanatic.com/2014/04/transcendence-review-artificial-intelligence-takes-over/

