<a href="https://colab.research.google.com/github/Tongthebread/AIequity/blob/main/SwedishEquity.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Importing Libraries

In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
plt.style.use("fivethirtyeight")
%matplotlib inline

import yfinance as yf


# For time stamps
from datetime import datetime



Load Data


In [2]:


tickers = [
    "ABB.ST","ADDT-B.ST","ALFA.ST","ASSA-B.ST","AZN.ST","ATCO-A.ST","BOL.ST",
    "EPI-A.ST","EQT.ST","ERIC-B.ST","ESSITY-B.ST","EVO.ST","HM-B.ST","HEXA-B.ST",
    "INDU-C.ST","INVE-B.ST","LIFCO-B.ST","NIBE-B.ST","NDA-SE.ST","SAAB-B.ST",
    "SAND.ST","SEB-A.ST","SKA-B.ST","SKF-B.ST","SCA-B.ST","SHB-A.ST","SWED-A.ST",
    "TEL2-B.ST","TELIA.ST","VOLV-B.ST"
]

# Fetch data directly
data = yf.download(
    tickers,
    interval="1d",
    auto_adjust=True,
    group_by="ticker",
    progress=False
)

print(data.head())


Ticker       TEL2-B.ST                                               \
Price             Open        High         Low       Close   Volume   
Date                                                                  
2025-11-10  151.449997  151.449997  150.100006  151.100006  1071008   
2025-11-11  151.250000  151.850006  149.449997  149.750000  1458617   
2025-11-12  150.050003  150.750000  149.399994  150.750000  1261841   
2025-11-13  150.350006  150.449997  145.050003  145.050003  2804267   
2025-11-14  145.000000  146.750000  143.050003  146.750000  2136251   

Ticker       TELIA.ST                                            ...  \
Price            Open       High        Low      Close   Volume  ...   
Date                                                             ...   
2025-11-10  37.730000  37.990002  37.669998  37.770000  5305612  ...   
2025-11-11  37.810001  37.930000  37.549999  37.750000  5788137  ...   
2025-11-12  37.869999  38.180000  37.750000  38.130001  5344177  ...   

Flatten

In [3]:
frames = []
for t in tickers:
    d = data[t].copy()
    d.columns = [c.lower() for c in d.columns]       # open, high, low, close, volume
    d["ticker"] = t
    frames.append(d)

px = (pd.concat(frames)
        .reset_index()
        .sort_values(["ticker","Date"])
        .reset_index(drop=True))

# make timezone explicit
px["Date"] = pd.to_datetime(px["Date"])
if px["Date"].dt.tz is None:
    px["Date"] = px["Date"].dt.tz_localize("UTC")

# 3) Basic cleaning
px = px.dropna(subset=["close"])
px = px[px["volume"].fillna(0) >= 0]  # keep zeros, drop negatives if any

# 4) Features + target (quarter-ahead log return)
HORIZON  = 63   # ≈ one quarter
LOOKBACK = 60   # you’ll use this in the LSTM windowing

px["ret_1d"]   = px.groupby("ticker")["close"].transform(lambda s: np.log(s/s.shift(1)))
px["vol_roll"] = px.groupby("ticker")["ret_1d"].transform(lambda s: s.rolling(20).std())
px["mom_20"]   = px.groupby("ticker")["close"].transform(lambda s: s.pct_change(20))

px["target"] = np.log(px.groupby("ticker")["close"].shift(-HORIZON) / px["close"])

# Drop rows without target or features
px = px.dropna(subset=["target","ret_1d","vol_roll","mom_20"]).reset_index(drop=True)

# 5) Save for the LSTM pipeline
px.to_csv("data_omxs30.csv", index=False)
print("Saved data_omxs30.csv with columns:", list(px.columns))

Saved data_omxs30.csv with columns: ['Date', 'open', 'high', 'low', 'close', 'volume', 'ticker', 'ret_1d', 'vol_roll', 'mom_20', 'target']


LSTM construction