# Real-Time Personalized News Recommendation
A complete Colab notebook with an interactive UI sidebar for recommended news articles.

In [32]:
%%capture
!pip install newsapi-python sentence-transformers pandas ipywidgets voila

In [33]:
import os
import pandas as pd
import math
import numpy as np
from datetime import datetime, timezone
from newsapi import NewsApiClient
from sentence_transformers import SentenceTransformer

## Configuration
Set your NewsAPI key as an environment variable or directly below.

In [34]:
import os
from newsapi import NewsApiClient
from sentence_transformers import SentenceTransformer

# Option 1: set via environment variable in Colab
# os.environ['NEWSAPI_KEY'] = 'YOUR_KEY_HERE'

# Option 2: directly assign (less secure)
NEWSAPI_KEY = os.getenv('NEWSAPI_KEY', '5dc3d16fe871435685839cc0e4a6da3f')
newsapi = NewsApiClient(api_key=NEWSAPI_KEY)
embedder = SentenceTransformer('all-MiniLM-L6-v2')


In [35]:
# Configuration
# Option 1: set via environment variable in Colab
# import os
# os.environ['NEWSAPI_KEY'] = 'YOUR_KEY_HERE'

# Option 2: directly assign (less secure)
NEWSAPI_KEY = os.getenv('NEWSAPI_KEY', '5dc3d16fe871435685839cc0e4a6da3f')
newsapi = NewsApiClient(api_key=NEWSAPI_KEY)
embedder = SentenceTransformer('all-MiniLM-L6-v2')

In [36]:
import os
import pandas as pd
import math
import numpy as np
from datetime import datetime, timezone
from newsapi import NewsApiClient
from sentence_transformers import SentenceTransformer

# Configuration
NEWSAPI_KEY = os.getenv('NEWSAPI_KEY', '5dc3d16fe871435685839cc0e4a6da3f')
newsapi = NewsApiClient(api_key=NEWSAPI_KEY)
embedder = SentenceTransformer('all-MiniLM-L6-v2')

# Fetch articles
articles = []

# Top-headlines for sports & entertainment
for cat in ['sports', 'entertainment']:
    resp = newsapi.get_top_headlines(category=cat, language='en', page_size=50)
    for art in resp.get('articles', []):
        text = art.get('content') or art.get('description') or ''
        articles.append({
            'title': art.get('title'),
            'url': art.get('url'),
            'category': cat,
            'published_at': art.get('publishedAt'),
            'text': text
        })

# Politics via everything endpoint
resp = newsapi.get_everything(
    q='politics',
    language='en',
    page_size=50,
    sort_by='publishedAt'
)
for art in resp.get('articles', []):
    text = art.get('content') or art.get('description') or ''
    articles.append({
        'title': art.get('title'),
        'url': art.get('url'),
        'category': 'politics',
        'published_at': art.get('publishedAt'),
        'text': text
    })

# Create DataFrame
df = pd.DataFrame(articles)
df['published_at'] = pd.to_datetime(df['published_at'])
df['age_hours'] = (
    datetime.now(timezone.utc)
    - df['published_at']
).dt.total_seconds() / 3600

# Preview
df.head()


Unnamed: 0,title,url,category,published_at,text,age_hours
0,Still the queen: Katie Ledecky wins 800 meters...,https://www.washingtonpost.com/sports/olympics...,sports,2025-08-02 22:24:03+00:00,While the swimming world has buzzed about the ...,25.349725
1,2025 Wyndham Championship leaderboard: Cameron...,https://www.cbssports.com/golf/news/2025-wyndh...,sports,2025-08-02 22:21:07+00:00,Will it finally be his time? Following a third...,25.398614
2,Jerry Jones dismisses Micah Parsons trade requ...,https://www.nbcsports.com/nfl/profootballtalk/...,sports,2025-08-02 21:38:52+00:00,Micah Parsons says he wants out. Jerry Jones s...,26.102781
3,Lewis Hamilton makes startling claim after F1 ...,https://nypost.com/2025/08/02/sports/lewis-ham...,sports,2025-08-02 19:54:00+00:00,Lewis Hamilton’s suggestion for Ferrari after ...,27.850559
4,"Antonio Gates makes history, becoming Hall of ...",https://www.nbcsports.com/nfl/profootballtalk/...,sports,2025-08-02 19:26:44+00:00,"Antonio Gates made history Saturday, becoming ...",28.305003


## Fetch Articles
- Sports & Entertainment via top-headlines
- Politics via the everything endpoint

## Compute Embeddings

In [37]:
# Compute text embeddings
df['embedding'] = embedder.encode(
    df['text'].tolist(),
    normalize_embeddings=True
).tolist()

  return forward_call(*args, **kwargs)


## Build User Profile Embedding

In [38]:
# Define user category weights
user_weights = {'sports': 0.5, 'politics': 0.3, 'entertainment': 0.2}

# Compute centroids
centroids = {}
for cat, w in user_weights.items():
    embs = [emb for emb, c in zip(df['embedding'], df['category']) if c == cat]
    if embs:
        centroids[cat] = np.mean(embs, axis=0)

# Weighted average profile
user_profile = sum(w * centroids[cat] for cat, w in user_weights.items() if cat in centroids)

## Score and Select Top Articles

In [39]:
# Score articles
scores = []
for _, row in df.iterrows():
    cat_score = user_weights.get(row['category'], 0)
    freshness = math.exp(-0.1 * row['age_hours'])
    sim = np.dot(user_profile, row['embedding'])
    score = 0.5 * cat_score + 0.3 * freshness + 0.2 * sim
    scores.append(score)

df['score'] = scores
topk = df.sort_values('score', ascending=False).head(20).reset_index(drop=True)
topk[['title','category','published_at','score']]

Unnamed: 0,title,category,published_at,score
0,2025 Wyndham Championship leaderboard: Cameron...,sports,2025-08-02 22:21:07+00:00,0.30442
1,Jerry Jones dismisses Micah Parsons trade requ...,sports,2025-08-02 21:38:52+00:00,0.303056
2,Still the queen: Katie Ledecky wins 800 meters...,sports,2025-08-02 22:24:03+00:00,0.298099
3,Sterling Sharpe inducted into Pro Football HOF...,sports,2025-08-02 18:17:07+00:00,0.295827
4,"For the First Time, Bears Training Camp Feels ...",sports,2025-08-02 15:42:43+00:00,0.293567
5,Hall of Fame Enshrinement: 2025 class takes it...,sports,2025-08-02 17:53:00+00:00,0.29212
6,Chiefs' Rashee Rice says he has 'completely ch...,sports,2025-08-02 18:02:00+00:00,0.291712
7,The Zach Allen extension is further proof of t...,sports,2025-08-02 15:20:39+00:00,0.290968
8,Marlins announcer dunks on sad Yankees fan dur...,sports,2025-08-02 15:04:00+00:00,0.29004
9,Connecticut Sun will reportedly be sold for $3...,sports,2025-08-02 17:42:47+00:00,0.289264


## Interactive UI Sidebar

In [40]:
import ipywidgets as widgets
from IPython.display import display, HTML

# Create buttons for each recommended article
buttons = []
for idx, row in topk.iterrows():
    btn = widgets.Button(description=row['title'], layout=widgets.Layout(width='auto'))
    buttons.append(btn)

# Sidebar and content panes
sidebar = widgets.VBox(buttons, layout=widgets.Layout(width='30%', height='600px', overflow='auto'))
content = widgets.Output(layout=widgets.Layout(width='70%', padding='10px'))

# Button click handler
def on_button_click(btn):
    idx = buttons.index(btn)
    art = topk.iloc[idx]
    content.clear_output()
    with content:
        display(HTML(f"<h2>{art['title']}</h2>"))
        display(HTML(f"<p><em>{art['category']} | {art['published_at']}</em></p>"))
        display(HTML(f"<p>{art['text']}</p>"))
        display(HTML(f"<p><a href='{art['url']}' target='_blank'>Read full article</a></p>"))

for btn in buttons:
    btn.on_click(on_button_click)

# Display the UI
display(widgets.HBox([sidebar, content]))

HBox(children=(VBox(children=(Button(description='2025 Wyndham Championship leaderboard: Cameron Young in posi…

In [41]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Share via Binder + Voilà
1. Push this notebook to GitHub in a public repo.
2. Include `requirements.txt` with:
```
newsapi-python
sentence-transformers
pandas
ipywidgets
```
3. Use the Binder URL:
```
https://mybinder.org/v2/gh/<YourUser>/<YourRepo>/main?urlpath=voila/render/<NotebookName>.ipynb
```

In [42]:
import os
print(os.getcwd())

/content/repo


In [43]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [44]:
# List what’s under /content/drive
!ls /content/drive

MyDrive  Othercomputers


In [45]:
# List what’s under /content/drive/MyDrive
!ls /content/drive/MyDrive

'Activity Exemplar: Course 3 Automatidata Executive Summary.gslides'
'Activity Exemplar: Course 6 Automatidata Executive Summary.gslides'
'Activity Exemplar: Customer personas.gdoc'
'Activity Exemplar: Hootsuite Tweets and Planner.gdoc'
'Activity Exemplar: Job search project plan.gsheet'
'Activity Exemplar: Product Backlog.gsheet'
'Activity Exemplar: Respond to customer comments.gdoc'
'Activity Exemplar: Responsive search ad organizer.gsheet'
'Activity Exemplar: Social listening insights.gdoc'
'Activity Exemplar: Social media calendar .gsheet'
'Activity Exemplar: TikTok Course 4 executive summary.gslides'
'Activity Exemplar: Waze Course 4 executive summary.gslides'
'Activity Exemplar: Webpage titles and meta descriptions.gdoc'
'Activity Template: Course 4 PACE strategy document.gdoc'
'Activity Template: Course 6 PACE strategy document.gdoc'
'Activity Template: Customer personas.gdoc'
'Activity Template: Job search project plan.gsheet'
'Activity Template: RACI Matrix.gdoc'
'Activity Tem

In [46]:
# Finally list your Colab Notebooks folder
!ls "/content/drive/MyDrive/Colab Notebooks"

 00-Large-Language-Models.ipynb
 01_importing_tensorflow.ipynb
'02_02 (1).ipynb'
'02_02 (2).ipynb'
'02_02 (3).ipynb'
 02_02.ipynb
'02_02selfcheck (1).ipynb'
'02_02selfcheck (2).ipynb'
'02_02selfcheck (3).ipynb'
 02_02selfcheck.ipynb
'02_03 (1).ipynb'
'02_03 (2).ipynb'
 02_03.ipynb
 02_03selfcheck.ipynb
'02_04 (1).ipynb'
 02_04.ipynb
 02_04selfcheck.ipynb
'02_05 (1).ipynb'
 02_05.ipynb
 02_05selfcheck.ipynb
'02_06 (1).ipynb'
 02_06.ipynb
 02_06selfcheck.ipynb
 02_07.ipynb
 02_08.ipynb
'02_09 (1).ipynb'
 02_09.ipynb
 02_09selfcheck.ipynb
 02_defining_tensors.ipynb
 03_03selfcheck.ipynb
'03_05 (1).ipynb'
 03_05.ipynb
 03_05selfcheck.ipynb
 03_06.ipynb
 03_06selfcheck.ipynb
'03_07 (1).ipynb'
 03_07.ipynb
'03_07selfcheck (1).ipynb'
 03_07selfcheck.ipynb
'03_08 (1).ipynb'
 03_08.ipynb
'03_08selfcheck (1).ipynb'
 03_08selfcheck.ipynb
'03_09 (1).ipynb'
 03_09.ipynb
'03_09selfcheck (1).ipynb'
'03_09selfcheck (2).ipynb'
 03_09selfcheck.ipynb
'03_10selfcheck (1).ipynb'
 03_10selfcheck.ipynb
'03_1

In [47]:
%cd "/content/drive/MyDrive/Colab Notebooks"

/content/drive/MyDrive/Colab Notebooks


In [18]:
%cd /content
!git clone https://github.com/aiserhucui/news-recommendation-demo.git repo


/content
Cloning into 'repo'...
remote: Enumerating objects: 7, done.[K
remote: Counting objects: 100% (7/7), done.[K
remote: Compressing objects: 100% (6/6), done.[K
remote: Total 7 (delta 2), reused 6 (delta 1), pack-reused 0 (from 0)[K
Receiving objects: 100% (7/7), 27.81 KiB | 1.54 MiB/s, done.
Resolving deltas: 100% (2/2), done.


In [48]:
# 1. Enter your repo directory
%cd /content/repo

/content/repo


In [49]:
# 2. Configure Git (only needed once per Colab session)
!git config user.email "aiserhucui@gmail.com"
!git config user.name  "Aiser Hucui"

In [50]:
# 3. Copy your updated notebook from Drive into the repo
!cp "/content/drive/MyDrive/Colab Notebooks/news_recommendation_complete_colab.ipynb" .


In [58]:
# 4. Commit & push your change
!git add news_recommendation_complete_colab.ipynb
!git commit -m "Update notebook from Colab"
!git push origin main


On branch main
Your branch is ahead of 'origin/main' by 1 commit.
  (use "git push" to publish your local commits)

nothing to commit, working tree clean
remote: Invalid username or token. Password authentication is not supported for Git operations.
fatal: Authentication failed for 'https://github.com/aiserhucui/news-recommendation-demo.git/'


In [59]:
# 1. Prompt for your GitHub PAT (keep it secret!)
from getpass import getpass
token = getpass("Enter your GitHub Personal Access Token: ")

Enter your GitHub Personal Access Token: ··········


In [61]:
!git remote set-url origin https://aiserhucui:$token@github.com/aiserhucui/news-recommendation-demo.git
!git push origin main


Enumerating objects: 5, done.
Counting objects:  20% (1/5)Counting objects:  40% (2/5)Counting objects:  60% (3/5)Counting objects:  80% (4/5)Counting objects: 100% (5/5)Counting objects: 100% (5/5), done.
Delta compression using up to 2 threads
Compressing objects:  33% (1/3)Compressing objects:  66% (2/3)Compressing objects: 100% (3/3)Compressing objects: 100% (3/3), done.
Writing objects:  33% (1/3)Writing objects:  66% (2/3)Writing objects: 100% (3/3)Writing objects: 100% (3/3), 8.05 KiB | 2.01 MiB/s, done.
Total 3 (delta 1), reused 0 (delta 0), pack-reused 0
remote: Resolving deltas:   0% (0/1)[Kremote: Resolving deltas: 100% (1/1)[Kremote: Resolving deltas: 100% (1/1), completed with 1 local object.[K
remote: [1;31merror[m: GH013: Repository rule violations found for refs/heads/main.[K
remote: 
remote: - GITHUB PUSH PROTECTION[K
remote:   —————————————————————————————————————————[K
remote:     Resolve the following violations before pushing again[K
remote: 
