In [11]:
import hopsworks
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import os

In [12]:
import hsfs

# 1. Login
project = hopsworks.login()

# 2. Get the Feature Store (This triggers the metadata check)
try:
    fs = project.get_feature_store("A1ID2223")
    print(f"Successfully connected to Feature Store: {fs.name}")
except Exception as e:
    print(f"Feature Store Connection Error: {e}")

# 3. Check versions
print(f"HSFS Version: {hsfs.__version__}")

2026-01-08 23:20:43,273 INFO: Closing external client and cleaning up certificates.
Connection closed.
2026-01-08 23:20:43,276 INFO: Initializing external client
2026-01-08 23:20:43,276 INFO: Base URL: https://c.app.hopsworks.ai:443
2026-01-08 23:20:44,728 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1267871
Successfully connected to Feature Store: a1id2223_featurestore
HSFS Version: 4.2.10


In [13]:
# Get feature groups
sentiment_fg = fs.get_feature_group("sentiments", version=2)
opening_price_fg = fs.get_feature_group("opening_prices", version=2)

In [14]:
# Get yahoo ticker for news and stock price
ticker = yf.Ticker("AAPL")
sentiments = ticker.news
price = ticker.history(period="1d")

In [15]:
cleaned_sentiments = []
# Process sentiment -> [(title, summary)]
for sentiment in sentiments:
    content = sentiment["content"]
    title = content["title"]
    summary = content["summary"]
    cleaned_sentiments.append((title, summary))

In [16]:
import torch
from transformers import pipeline

# Quick check for device
device = "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using device: {device}")

# Simple FinBERT sentiment pipeline
classifier = pipeline("text-classification", model="ProsusAI/finbert")

# Optional: alias for compatibility with other cells
sentiment_nlp = classifier

# Initialise the sentiment scores (if needed later)
sentiment_neg, sentiment_pos, sentiment_neu = 0, 0, 0

Using device: mps


Device set to use mps:0


In [17]:
# Load FinBERT sentiment pipeline
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from transformers import pipeline as hf_pipeline

model_name = "ProsusAI/finbert"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
sentiment_nlp = hf_pipeline(
    "sentiment-analysis",
    model=model,
    tokenizer=tokenizer,
    return_all_scores=True
)

# Try to move model to Apple Silicon MPS if available
if torch.backends.mps.is_available():
    try:
        model.to("mps")
        print("FinBERT model moved to MPS device")
    except Exception as e:
        print(f"Could not move model to mps: {e}")

Device set to use mps:0



FinBERT model moved to MPS device


In [18]:
import pandas as pd
from datetime import timezone

rows = []
# Score each article with FinBERT and collect per-article probabilities
for item in sentiments or []:
    content = item.get("content", {})
    title = content.get("title") or ""
    summary = content.get("summary") or ""
    text = f"{title}. {summary}".strip()
    print(title)
    if not text:
        continue

    # Derive publish date from providerPublishTime (unix seconds)
    ts = item.get("providerPublishTime")
    if ts is None:
        dt = pd.Timestamp.utcnow().normalize()
    else:
        # Convert to UTC, drop timezone, normalize to date
        dt = pd.to_datetime(ts, unit="s", utc=True).tz_convert(None).normalize()

    # Run FinBERT and get probabilities for all classes
    all_scores = sentiment_nlp(text)[0]  # [{label: 'positive'|'negative'|'neutral', score: float}, ...]
    score_map = {s["label"].lower(): s["score"] for s in all_scores}

    pos = score_map.get("positive", 0.0)
    neg = score_map.get("negative", 0.0)
    neu = score_map.get("neutral", 0.0)
    polarity = pos - neg

    rows.append({
        "date": dt,
        "sentiment_pos": pos,
        "sentiment_neg": neg,
        "sentiment_neu": neu,
        "sentiment_polarity": polarity,
    })

article_df = pd.DataFrame(rows)
print(f"Scored {len(article_df)} articles")

Alphabet overtakes Apple as world's second-most-valuable company behind Nvidia
JPMorgan takes over Apple card, Paramount reaffirms WBD bid
The Apple Card headache for Goldman Sachs is almost over
Equities End Mixed Ahead of Friday's Nonfarm Payrolls Report; Defense Stocks Rise
Stock Market Today: Dow, Small Caps, Oil Move Higher; Defense Firms Gain As Valero Clears Base (Live Coverage)
If You'd Invested $1,000 in the Invesco QQQ ETF 27 Years Ago, Here's How Much You'd Have Today
Telecom Stocks 2026: As Wireless Price War Fears Grow, Is AT&T, T-Mobile Or Verizon The Best Buy?
These Stocks Are Moving the Most Today: Lockheed, Northrop, Alphabet, Applied Digital, Bloom Energy, Neogen, and More
Apple's Credit Card Is Switching Banks. Here's What That Means for Your Money
Alphabet Dethrones Apple Amid Tech Stock Shuffle
Scored 10 articles


In [19]:
# Aggregate to daily means to match backfill features
if not article_df.empty:
    article_df["date"] = pd.to_datetime(article_df["date"]).dt.normalize()
    sentiment_daily = (
        article_df.groupby("date").agg({
            "sentiment_polarity": "mean",
            "sentiment_neg": "mean",
            "sentiment_neu": "mean",
            "sentiment_pos": "mean",
        })
    )
    # Ensure timezone-naive index named 'date'
    sentiment_daily.index = sentiment_daily.index.tz_localize(None)
    sentiment_daily.index.name = "date"
    print(sentiment_daily.head())
else:
    sentiment_daily = pd.DataFrame(
        columns=["sentiment_polarity", "sentiment_neg", "sentiment_neu", "sentiment_pos"]
    )
    sentiment_daily.index.name = "date"
    print("No articles found for daily aggregation")

            sentiment_polarity  sentiment_neg  sentiment_neu  sentiment_pos
date                                                                       
2026-01-08            0.207758       0.190224       0.411793       0.397982


In [20]:
# Insert aggregated sentiments into Hopsworks Feature Store
if sentiment_daily is not None and not sentiment_daily.empty:
    df_to_insert = sentiment_daily.reset_index()
    df_to_insert.columns = df_to_insert.columns.str.lower()

    # Ensure feature group alignment with backfill
    fg = fs.get_or_create_feature_group(
        name="sentiments",
        description="AAPL stock sentiments",
        version=2,
        primary_key=["date"],
        event_time="date",
    )
    fg.insert(df_to_insert, wait=True)
    print("Inserted daily sentiments into feature store")
else:
    print("No sentiment data to insert today")

Uploading Dataframe: 100.00% |██████████| Rows 1/1 | Elapsed Time: 00:00 | Remaining Time: 00:00


Launching job: sentiments_2_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1267871/jobs/named/sentiments_2_offline_fg_materialization/executions
2026-01-08 23:21:16,588 INFO: Waiting for execution to finish. Current state: SUBMITTED. Final status: UNDEFINED
2026-01-08 23:21:26,303 INFO: Waiting for execution to finish. Current state: RUNNING. Final status: UNDEFINED
2026-01-08 23:23:26,033 INFO: Waiting for execution to finish. Current state: AGGREGATING_LOGS. Final status: SUCCEEDED
2026-01-08 23:23:26,192 INFO: Waiting for log aggregation to finish.
2026-01-08 23:23:48,369 INFO: Execution finished successfully.
Inserted daily sentiments into feature store


In [21]:
# Read all data from sentiments feature group
sentiments_fg = fs.get_feature_group("sentiments", version=2)
sentiments_df = sentiments_fg.read()

# Sort by date and show the most recent entries
latest = sentiments_df.sort_values('date', ascending=False).head(5)
print("Latest sentiment entries:")
print(latest)

# Verify today's date is present
today = pd.Timestamp.utcnow().normalize()
today_data = sentiments_df[sentiments_df['date'] == today]
print(f"\nToday's sentiment ({today.date()}):")
print(today_data)

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.00s) 
Latest sentiment entries:
                          date  sentiment_polarity  sentiment_neg  \
1582 2026-01-08 00:00:00+00:00            0.207758       0.190224   
1581 2026-01-07 00:00:00+00:00           -0.011626       0.390821   
1580 2026-01-06 00:00:00+00:00            0.168251       0.277924   
1579 2026-01-05 00:00:00+00:00            0.262844       0.159274   
1578 2026-01-02 00:00:00+00:00            0.069390       0.317900   

      sentiment_neu  sentiment_pos  
1582       0.411793       0.397982  
1581       0.229984       0.379195  
1580       0.275902       0.446175  
1579       0.418608       0.422118  
1578       0.294810       0.387290  

Today's sentiment (2026-01-08):
                          date  sentiment_polarity  sentiment_neg  \
1582 2026-01-08 00:00:00+00:00            0.207758       0.190224   

      sentiment_neu  sentiment_pos  
1582       0.411793       0.397982  


In [22]:
# # Insert today's stock price into Hopsworks Feature Store
# if not price.empty:
#     # Prepare stock data to match backfill format
#     stock_today = price[['Open']].copy()
    
#     # Remove timezone and normalize to date only
#     stock_today.index = stock_today.index.tz_convert(None).normalize()
#     stock_today.index.name = 'date'
    
#     # Reset index to get date as column and lowercase column names
#     stock_insert = stock_today.reset_index()
#     stock_insert.columns = stock_insert.columns.str.lower()
    
#     print("Stock data to insert:")
#     print(stock_insert)
    
#     # Get or create the feature group (should already exist from backfill)
#     opening_fg = fs.get_or_create_feature_group(
#         name="opening_prices",
#         description="AAPL opening prices",
#         version=1,
#         primary_key=["date"],
#         event_time="date",
#     )
#     opening_fg.insert(stock_insert, wait=True)
#     print("Inserted today's opening price into feature store")
# else:
#     print("No stock price data available for today")

In [None]:
import numpy as np

print(price)

# Insert today's stock price into opening_prices v2 (with placeholder target)
if not price.empty:
    # Refresh feature store connection to avoid timeouts
    fs = project.get_feature_store("A1ID2223")
    
    stock_today = price[["Open"]].copy()
    stock_today.index = stock_today.index.tz_convert(None).normalize()
    stock_today.index.name = 'date'

    stock_insert = stock_today.reset_index()
    stock_insert.columns = stock_insert.columns.str.lower()  # ['date','open']

    # Match FG schema: include target_open as unknown for today
    # Use np.nan instead of pd.NA for better compatibility with float columns
    stock_insert['target_open'] = np.nan

    opening_fg = fs.get_or_create_feature_group(
        name="opening_prices",
        description="AAPL opening prices with next-day target",
        version=2,
        primary_key=["date"],
        event_time="date",
    )
    opening_fg.insert(stock_insert, wait=True)
    print("Inserted today's opening price (with target_open NA) into feature store")
    print(stock_insert)
else:
    print("No stock price data available for today")

                                 Open        High         Low       Close  \
Date                                                                        
2026-01-08 00:00:00-05:00  256.994995  259.279999  255.699997  259.040009   

                             Volume  Dividends  Stock Splits  
Date                                                          
2026-01-08 00:00:00-05:00  50211734        0.0           0.0  


Uploading Dataframe: 100.00% |██████████| Rows 1/1 | Elapsed Time: 00:00 | Remaining Time: 00:00


Launching job: opening_prices_2_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1267871/jobs/named/opening_prices_2_offline_fg_materialization/executions
2026-01-08 23:24:19,039 INFO: Waiting for execution to finish. Current state: INITIALIZING. Final status: UNDEFINED
2026-01-08 23:24:22,277 INFO: Waiting for execution to finish. Current state: SUBMITTED. Final status: UNDEFINED
2026-01-08 23:24:25,490 INFO: Waiting for execution to finish. Current state: RUNNING. Final status: UNDEFINED
2026-01-08 23:26:29,008 INFO: Waiting for execution to finish. Current state: AGGREGATING_LOGS. Final status: SUCCEEDED
2026-01-08 23:26:29,173 INFO: Waiting for log aggregation to finish.
2026-01-08 23:26:51,380 INFO: Execution finished successfully.
Inserted today's opening price (with target_open NA) into feature store
        date        open  target_open
0 2026-01-08  256.994995          NaN
