In [2]:
!pip install chromadb sentence-transformers



In [3]:
import chromadb
from chromadb.config import Settings
from sentence_transformers import SentenceTransformer
from datetime import datetime

  from tqdm.autonotebook import tqdm, trange


In [4]:
# Initialize ChromaDB Client
# Step 1: Initialize ChromaDB Client - No need for Settings anymore
client = chromadb.Client() # This is the updated way to initialize

# Set up the ChromaDB Collection
collection_name = "news_articles"
if collection_name not in client.list_collections():
    collection = client.create_collection(name=collection_name)
else:
    collection = client.get_collection(name=collection_name)

# Load Pre-trained Embedding Model
model = SentenceTransformer("all-MiniLM-L6-v2")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [5]:
def add_articles():
    """Function to add news articles with metadata to the database."""
    print("Enter news articles (type 'done' to finish):")
    articles = []
    categories = []
    dates = []

    while True:
        article = input("Article: ")
        if article.lower() == "done":
            break

        category = input("Category (e.g., technology, science): ")
        date = input("Date (YYYY-MM-DD): ")

        # Validate date
        try:
            date = datetime.strptime(date, "%Y-%m-%d").strftime("%Y-%m-%d")
        except ValueError:
            print("Invalid date format. Try again.")
            continue

        articles.append(article)
        categories.append(category)
        dates.append(date)

    if articles:
        embeddings = model.encode(articles).tolist()
        ids = [f"article_{i}" for i in range(len(articles))]
        metadata = [{"category": cat, "date": date} for cat, date in zip(categories, dates)]
        collection.add(documents=articles, embeddings=embeddings, metadatas=metadata, ids=ids)
        print(f"{len(articles)} articles added to the collection.")
    else:
        print("No articles were added.")

In [9]:
def recommend_articles():
    """Function to recommend articles based on user preferences and filters."""
    preference = input("Describe the type of news you're interested in: ")
    category_filter = input("Filter by category (or leave blank): ").lower()
    date_filter = input("Filter by date range (YYYY-MM-DD to YYYY-MM-DD, or leave blank): ")

    # Parse date range if provided
    start_date, end_date = None, None
    if date_filter:
        try:
            start_date, end_date = date_filter.split(" to ")
            start_date = datetime.strptime(start_date, "%Y-%m-%d")
            end_date = datetime.strptime(end_date, "%Y-%m-%d")
        except ValueError:
            print("Invalid date range format. Ignoring date filter.")

    query_embedding = model.encode([preference]).tolist()[0]
    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=5,
        where={
            "$and": [
                {"category": category_filter} if category_filter else {},
                {"date": {"$gte": start_date.strftime("%Y-%m-%d")}} if start_date else {},
                {"date": {"$lte": end_date.strftime("%Y-%m-%d")}} if end_date else {},
            ]
        }
    )

    print("\nRecommended Articles:")
    for i, (doc, score, meta) in enumerate(zip(results["documents"][0], results["distances"][0], results["metadatas"][0])):
        print(f"{i + 1}. {doc} | Relevance Score: {1 - score:.4f} | Category: {meta['category']} | Date: {meta['date']}")
    print()


In [10]:
def main():
    """Main function for the news recommendation system."""
    print("Welcome to the Enhanced News Recommendation System!")
    while True:
        print("\nOptions:")
        print("1. Add news articles")
        print("2. Get news recommendations")
        print("3. Exit")
        choice = input("Choose an option: ")

        if choice == "1":
            add_articles()
        elif choice == "2":
            recommend_articles()
        elif choice == "3":
            print("Goodbye!")
            break
        else:
            print("Invalid option. Please try again.")

# Run the application
if __name__ == "__main__":
    main()

Welcome to the Enhanced News Recommendation System!

Options:
1. Add news articles
2. Get news recommendations
3. Exit
Choose an option: 1
Enter news articles (type 'done' to finish):
Article: SpaceX launches new satellites for Starlink.
Category (e.g., technology, science): technology
Date (YYYY-MM-DD): 2023-11-15
Article: 2
Category (e.g., technology, science): 
Date (YYYY-MM-DD): 
Invalid date format. Try again.
Article: 2023-11-15
Category (e.g., technology, science): 
Date (YYYY-MM-DD): 
Invalid date format. Try again.
Article: 3
Category (e.g., technology, science): 
Date (YYYY-MM-DD): 
Invalid date format. Try again.
Article: fgfhf
Category (e.g., technology, science): science
Date (YYYY-MM-DD): 2023-11-15


KeyboardInterrupt: Interrupted by user

Welcome to the Enhanced News Recommendation System!

Options:
1. Add news articles
2. Get news recommendations
3. Exit
Choose an option: 1
Enter news articles (type 'done' to finish):
Article: SpaceX launches new satellites for Starlink.
Category (e.g., technology, science): technology
Date (YYYY-MM-DD): 2023-11-15
Article: Climate change report highlights urgent action needed.
Category (e.g., technology, science): science
Date (YYYY-MM-DD): 2023-10-10
Article: done
2 articles added to the collection.

Options:
1. Add news articles
2. Get news recommendations
3. Exit
Choose an option: 2
Describe the type of news you're interested in: space exploration
Filter by category (or leave blank): technology
Filter by date range (YYYY-MM-DD to YYYY-MM-DD, or leave blank): 2023-01-01 to 2023-12-31

Recommended Articles:
1. SpaceX launches new satellites for Starlink. | Relevance Score: 0.9123 | Category: technology | Date: 2023-11-15
