In [1]:
import hopsworks
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import os

In [2]:
import hsfs

# 1. Login
project = hopsworks.login()

# 2. Get the Feature Store (This triggers the metadata check)
try:
    fs = project.get_feature_store("A1ID2223")
    print(f"Successfully connected to Feature Store: {fs.name}")
except Exception as e:
    print(f"Feature Store Connection Error: {e}")

# 3. Check versions
print(f"HSFS Version: {hsfs.__version__}")

2026-01-02 16:15:18,245 INFO: Initializing external client
2026-01-02 16:15:18,246 INFO: Base URL: https://c.app.hopsworks.ai:443
2026-01-02 16:15:20,003 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1267871
Successfully connected to Feature Store: a1id2223_featurestore
HSFS Version: 4.2.10


In [3]:
# Get feature groups
sentiment_fg = fs.get_feature_group("sentiments", version=2)
opening_price_fg = fs.get_feature_group("opening_prices", version=2)

In [4]:
# Get yahoo ticker for news and stock price
ticker = yf.Ticker("AAPL")
sentiments = ticker.news
price = ticker.history(period="1d")

In [5]:
cleaned_sentiments = []
# Process sentiment -> [(title, summary)]
for sentiment in sentiments:
    content = sentiment["content"]
    title = content["title"]
    summary = content["summary"]
    cleaned_sentiments.append((title, summary))

In [6]:
import torch
from transformers import pipeline

# Quick check for device
device = "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using device: {device}")

# Simple FinBERT sentiment pipeline
classifier = pipeline("text-classification", model="ProsusAI/finbert")

# Optional: alias for compatibility with other cells
sentiment_nlp = classifier

# Initialise the sentiment scores (if needed later)
sentiment_neg, sentiment_pos, sentiment_neu = 0, 0, 0

Using device: mps


Device set to use mps:0


In [7]:
# Load FinBERT sentiment pipeline
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from transformers import pipeline as hf_pipeline

model_name = "ProsusAI/finbert"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
sentiment_nlp = hf_pipeline(
    "sentiment-analysis",
    model=model,
    tokenizer=tokenizer,
    return_all_scores=True
)

# Try to move model to Apple Silicon MPS if available
if torch.backends.mps.is_available():
    try:
        model.to("mps")
        print("FinBERT model moved to MPS device")
    except Exception as e:
        print(f"Could not move model to mps: {e}")

Device set to use mps:0



FinBERT model moved to MPS device


In [8]:
import pandas as pd
from datetime import timezone

rows = []
# Score each article with FinBERT and collect per-article probabilities
for item in sentiments or []:
    content = item.get("content", {})
    title = content.get("title") or ""
    summary = content.get("summary") or ""
    text = f"{title}. {summary}".strip()
    print(title)
    if not text:
        continue

    # Derive publish date from providerPublishTime (unix seconds)
    ts = item.get("providerPublishTime")
    if ts is None:
        dt = pd.Timestamp.utcnow().normalize()
    else:
        # Convert to UTC, drop timezone, normalize to date
        dt = pd.to_datetime(ts, unit="s", utc=True).tz_convert(None).normalize()

    # Run FinBERT and get probabilities for all classes
    all_scores = sentiment_nlp(text)[0]  # [{label: 'positive'|'negative'|'neutral', score: float}, ...]
    score_map = {s["label"].lower(): s["score"] for s in all_scores}

    pos = score_map.get("positive", 0.0)
    neg = score_map.get("negative", 0.0)
    neu = score_map.get("neutral", 0.0)
    polarity = pos - neg

    rows.append({
        "date": dt,
        "sentiment_pos": pos,
        "sentiment_neg": neg,
        "sentiment_neu": neu,
        "sentiment_polarity": polarity,
    })

article_df = pd.DataFrame(rows)
print(f"Scored {len(article_df)} articles")

CES 2026: What to expect from the tech industry’s biggest show of the year
Apple Cuts Vision Pro Output, Marketing as Demand Weakens
Apple Cuts Vision Pro Production and Marketing After Weak Consumer Demand
Apple Stock Initiated At Neutral. Here's Why.
Equities to Get Boost From Robust Earnings in 2026 But Near-Term Fed Path Uncertain, Analysts Say
Apple Stock’s Growth Trajectory Is Challenged in 2026. Why This Analyst Is Staying on the Sidelines.
Jim Cramer Shares Very Important Insight About Corning (GLW) & Data Centers
Billionaire Peter Thiel Sold Nvidia and Tesla to Buy This Other AI Stock
Strong Performance Lifted Apple (APPL) in Q3
Stocks Rise Pre-Bell Ahead of First Trading Session of 2026
Scored 10 articles


In [9]:
# Aggregate to daily means to match backfill features
if not article_df.empty:
    article_df["date"] = pd.to_datetime(article_df["date"]).dt.normalize()
    sentiment_daily = (
        article_df.groupby("date").agg({
            "sentiment_polarity": "mean",
            "sentiment_neg": "mean",
            "sentiment_neu": "mean",
            "sentiment_pos": "mean",
        })
    )
    # Ensure timezone-naive index named 'date'
    sentiment_daily.index = sentiment_daily.index.tz_localize(None)
    sentiment_daily.index.name = "date"
    print(sentiment_daily.head())
else:
    sentiment_daily = pd.DataFrame(
        columns=["sentiment_polarity", "sentiment_neg", "sentiment_neu", "sentiment_pos"]
    )
    sentiment_daily.index.name = "date"
    print("No articles found for daily aggregation")

            sentiment_polarity  sentiment_neg  sentiment_neu  sentiment_pos
date                                                                       
2026-01-02             0.06939         0.3179        0.29481        0.38729


In [10]:
# Insert aggregated sentiments into Hopsworks Feature Store
if sentiment_daily is not None and not sentiment_daily.empty:
    df_to_insert = sentiment_daily.reset_index()
    df_to_insert.columns = df_to_insert.columns.str.lower()

    # Ensure feature group alignment with backfill
    fg = fs.get_or_create_feature_group(
        name="sentiments",
        description="AAPL stock sentiments",
        version=2,
        primary_key=["date"],
        event_time="date",
    )
    fg.insert(df_to_insert, wait=True)
    print("Inserted daily sentiments into feature store")
else:
    print("No sentiment data to insert today")

Uploading Dataframe: 0.00% |          | Rows 0/1 | Elapsed Time: 00:00 | Remaining Time: ?huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Uploading Dataframe: 100.00% |██████████| Rows 1/1 | Elapsed Time: 00:00 | Remaining Time: 00:00


Launching job: sentiments_2_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1267871/jobs/named/sentiments_2_offline_fg_materialization/executions
2026-01-02 16:16:19,033 INFO: Waiting for execution to finish. Current state: INITIALIZING. Final status: UNDEFINED
2026-01-02 16:16:22,255 INFO: Waiting for execution to finish. Current state: SUBMITTED. Final status: UNDEFINED
2026-01-02 16:16:25,470 INFO: Waiting for execution to finish. Current state: RUNNING. Final status: UNDEFINED
2026-01-02 16:18:02,618 INFO: Waiting for execution to finish. Current state: AGGREGATING_LOGS. Final status: SUCCEEDED
2026-01-02 16:18:02,794 INFO: Waiting for log aggregation to finish.
2026-01-02 16:18:11,475 INFO: Execution finished successfully.
Inserted daily sentiments into feature store


In [11]:
# Read all data from sentiments feature group
sentiments_fg = fs.get_feature_group("sentiments", version=2)
sentiments_df = sentiments_fg.read()

# Sort by date and show the most recent entries
latest = sentiments_df.sort_values('date', ascending=False).head(5)
print("Latest sentiment entries:")
print(latest)

# Verify today's date is present
today = pd.Timestamp.utcnow().normalize()
today_data = sentiments_df[sentiments_df['date'] == today]
print(f"\nToday's sentiment ({today.date()}):")
print(today_data)

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.88s) 
Latest sentiment entries:
                          date  sentiment_polarity  sentiment_neg  \
1578 2026-01-02 00:00:00+00:00            0.069390       0.317900   
1577 2025-12-31 00:00:00+00:00            0.238841       0.181459   
1576 2025-12-30 00:00:00+00:00            0.118052       0.183541   
1    2025-12-29 00:00:00+00:00           -0.028438       0.218143   
0    2025-12-23 00:00:00+00:00            0.298535       0.131626   

      sentiment_neu  sentiment_pos  
1578       0.294810       0.387290  
1577       0.398241       0.420300  
1576       0.514867       0.301592  
1          0.592153       0.189705  
0          0.438214       0.430160  

Today's sentiment (2026-01-02):
                          date  sentiment_polarity  sentiment_neg  \
1578 2026-01-02 00:00:00+00:00             0.06939         0.3179   

      sentiment_neu  sentiment_pos  
1578        0.29481        0.38729  


In [12]:
# # Insert today's stock price into Hopsworks Feature Store
# if not price.empty:
#     # Prepare stock data to match backfill format
#     stock_today = price[['Open']].copy()
    
#     # Remove timezone and normalize to date only
#     stock_today.index = stock_today.index.tz_convert(None).normalize()
#     stock_today.index.name = 'date'
    
#     # Reset index to get date as column and lowercase column names
#     stock_insert = stock_today.reset_index()
#     stock_insert.columns = stock_insert.columns.str.lower()
    
#     print("Stock data to insert:")
#     print(stock_insert)
    
#     # Get or create the feature group (should already exist from backfill)
#     opening_fg = fs.get_or_create_feature_group(
#         name="opening_prices",
#         description="AAPL opening prices",
#         version=1,
#         primary_key=["date"],
#         event_time="date",
#     )
#     opening_fg.insert(stock_insert, wait=True)
#     print("Inserted today's opening price into feature store")
# else:
#     print("No stock price data available for today")

In [13]:
import numpy as np

print(price)

# Insert today's stock price into opening_prices v2 (with placeholder target)
if not price.empty:
    # Refresh feature store connection to avoid timeouts
    fs = project.get_feature_store("A1ID2223")
    
    stock_today = price[["Open"]].copy()
    stock_today.index = stock_today.index.tz_convert(None).normalize()
    stock_today.index.name = 'date'

    stock_insert = stock_today.reset_index()
    stock_insert.columns = stock_insert.columns.str.lower()  # ['date','open']

    # Match FG schema: include target_open as unknown for today
    # Use np.nan instead of pd.NA for better compatibility with float columns
    stock_insert['target_open'] = np.nan

    opening_fg = fs.get_or_create_feature_group(
        name="opening_prices",
        description="AAPL opening prices with next-day target",
        version=2,
        primary_key=["date"],
        event_time="date",
    )
    opening_fg.insert(stock_insert, wait=True)
    print("Inserted today's opening price (with target_open NA) into feature store")
    print(stock_insert)
else:
    print("No stock price data available for today")

                                 Open        High         Low       Close  \
Date                                                                        
2026-01-02 00:00:00-05:00  272.049988  277.824799  271.950012  275.200012   

                            Volume  Dividends  Stock Splits  
Date                                                         
2026-01-02 00:00:00-05:00  8891473        0.0           0.0  


%3|1767367097.787|FAIL|rdkafka#consumer-4| [thrd:ssl://51.161.81.188:9093/1]: ssl://51.161.81.188:9093/1: SSL handshake failed: Disconnected: connection reset by peer: connecting to a PLAINTEXT broker listener? (after 117ms in state SSL_HANDSHAKE)
Uploading Dataframe: 100.00% |██████████| Rows 1/1 | Elapsed Time: 00:00 | Remaining Time: 00:00


Launching job: opening_prices_2_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1267871/jobs/named/opening_prices_2_offline_fg_materialization/executions
2026-01-02 16:18:32,795 INFO: Waiting for execution to finish. Current state: INITIALIZING. Final status: UNDEFINED
2026-01-02 16:18:36,048 INFO: Waiting for execution to finish. Current state: SUBMITTED. Final status: UNDEFINED
2026-01-02 16:18:39,268 INFO: Waiting for execution to finish. Current state: RUNNING. Final status: UNDEFINED
2026-01-02 16:20:19,592 INFO: Waiting for execution to finish. Current state: AGGREGATING_LOGS. Final status: SUCCEEDED
2026-01-02 16:20:19,768 INFO: Waiting for log aggregation to finish.
2026-01-02 16:20:28,451 INFO: Execution finished successfully.
Inserted today's opening price (with target_open NA) into feature store
        date        open  target_open
0 2026-01-02  272.049988          NaN
