In [26]:
import pandas as pd
from sqlalchemy import create_engine
from datetime import datetime, timezone
import os
from dotenv import load_dotenv

# Load environment variables and connect to DB
load_dotenv()
DATABASE_URL = os.getenv("DATABASE_URL")
engine = create_engine(DATABASE_URL)

# Load primary data
technical_df = pd.read_sql("SELECT * FROM technical_features", engine, parse_dates=["date"])
sentiment_df = pd.read_sql("SELECT * FROM sentiment_data", engine, parse_dates=["date"])

In [27]:

# Optional data loads
try:
    macro_df = pd.read_sql("SELECT * FROM macro_data", engine, parse_dates=["date"])
except Exception as e:
    print(f"⚠️ Skipping macro_data: {e}")
    macro_df = pd.DataFrame()

In [28]:
macro_df

Unnamed: 0,date,interest_rate,cpi,unemployment,fetched_at
0,1913-01-01,,9.800,,2025-06-17 05:30:07.372368
1,1913-02-01,,9.800,,2025-06-17 05:30:07.372368
2,1913-03-01,,9.800,,2025-06-17 05:30:07.372368
3,1913-04-01,,9.800,,2025-06-17 05:30:07.372368
4,1913-05-01,,9.700,,2025-06-17 05:30:07.372368
...,...,...,...,...,...
1344,2025-01-01,4.33,317.671,4.0,2025-06-17 05:30:07.372368
1345,2025-02-01,4.33,319.082,4.1,2025-06-17 05:30:07.372368
1346,2025-03-01,4.33,319.799,4.2,2025-06-17 05:30:07.372368
1347,2025-04-01,4.33,320.795,4.2,2025-06-17 05:30:07.372368


In [29]:

try:
    fundamentals_df = pd.read_sql("SELECT * FROM fundamental_data", engine, parse_dates=["date"])
except Exception as e:
    print(f"⚠️ Skipping fundamental_data: {e}")
    fundamentals_df = pd.DataFrame()

In [30]:
fundamentals_df

Unnamed: 0,symbol,date,calendarYear,period,revenuePerShare,netIncomePerShare,operatingCashFlowPerShare,freeCashFlowPerShare,cashPerShare,bookValuePerShare,...,daysSalesOutstanding,daysPayablesOutstanding,daysOfInventoryOnHand,receivablesTurnover,payablesTurnover,inventoryTurnover,roe,capexPerShare,ticker,fetched_at
0,NVDA,2025-01-26,2025,FY,5.314478,2.968031,2.610018,2.478233,1.759723,3.230584,...,64.512786,70.564356,112.724042,5.657793,5.172583,3.237996,0.918729,0.131786,NVDA,2025-06-17 05:29:57.895563
1,NVDA,2024-01-28,2024,FY,2.467477,1.205346,1.137708,1.094411,1.05241,1.740705,...,59.906684,59.270501,115.993623,6.092809,6.158207,3.146725,0.692447,0.043297,NVDA,2025-06-17 05:29:57.895563
2,NVDA,2023-01-29,2023,FY,1.0846,0.175633,0.226819,0.153116,0.53462,0.888661,...,51.785238,37.480203,162.079101,7.048341,9.738474,2.251987,0.197638,0.073703,NVDA,2025-06-17 05:29:57.895563
3,NVDA,2022-01-30,2022,FY,1.078285,0.390705,0.364904,0.325801,0.849679,1.066186,...,63.061975,68.947452,100.733658,5.787957,5.293887,3.623417,0.366451,0.039103,NVDA,2025-06-17 05:29:57.895563
4,NVDA,2021-01-31,2021,FY,0.675648,0.175527,0.2359,0.190194,0.468436,0.684481,...,53.168516,66.791687,106.145883,6.864965,5.464752,3.438664,0.256438,0.045705,NVDA,2025-06-17 05:29:57.895563


In [31]:
import pandas as pd
from sqlalchemy import create_engine
from datetime import datetime, timezone
import os
from dotenv import load_dotenv

# Load .env
load_dotenv()
DATABASE_URL = os.getenv("DATABASE_URL")
engine = create_engine(DATABASE_URL)

# Load technical and sentiment
technical_df = pd.read_sql("SELECT * FROM technical_features", engine, parse_dates=["date"])
sentiment_df = pd.read_sql("SELECT * FROM sentiment_data", engine, parse_dates=["date"])

# Optional macro + fundamentals
try:
    macro_df = pd.read_sql("SELECT * FROM macro_data", engine, parse_dates=["date"])
except Exception as e:
    print(f"⚠️ Skipping macro_data: {e}")
    macro_df = pd.DataFrame()

try:
    fundamentals_df = pd.read_sql("SELECT * FROM fundamental_data", engine, parse_dates=["date"])
except Exception as e:
    print(f"⚠️ Skipping fundamental_data: {e}")
    fundamentals_df = pd.DataFrame()

# Aggregate sentiment
sentiment_agg = sentiment_df.groupby(["ticker", "date"]).agg(
    sentiment_avg=("sentiment_score", "mean"),
    sentiment_std=("sentiment_score", "std"),
    sentiment_count=("sentiment_score", "count")
).reset_index()

# Merge technical + sentiment
merged = pd.merge(technical_df, sentiment_agg, on=["ticker", "date"], how="left")
merged = merged.sort_values(["ticker", "date"])

# 🔁 Merge most recent fundamental data per ticker
if not fundamentals_df.empty:
    fundamentals_df = fundamentals_df.sort_values(["symbol", "date"])
    if "ticker" not in fundamentals_df.columns:
        fundamentals_df.rename(columns={"symbol": "ticker"}, inplace=True)

    fundamentals_df.columns.name = None  # flatten any pivoted column names
    merged = pd.merge_asof(
        merged,
        fundamentals_df,
        by="ticker",
        on="date",
        direction="backward",
        allow_exact_matches=True
    )

# 🔁 Merge most recent macro data
if not macro_df.empty:
    macro_df = macro_df.sort_values("date")
    merged = pd.merge_asof(
        merged,
        macro_df,
        on="date",
        direction="backward",
        allow_exact_matches=True
    )

# Final metadata
merged["merged_at"] = datetime.now(timezone.utc)

# Save to DB
merged.to_sql("merged_features", engine, if_exists="replace", index=False)
print(f"✅ Merged features table created with {len(merged)} rows.")


✅ Merged features table created with 6641 rows.
