<a href="https://colab.research.google.com/github/anamitra-tech/anamitra-tech/blob/main/World_stock_price_predictions_usingLSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# IMPORTANT: SOME KAGGLE DATA SOURCES ARE PRIVATE
# RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES.
import kagglehub
kagglehub.login()


In [4]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error

# =============================
# 1. LOAD & PREPROCESS
# =============================
df = pd.read_csv("/World-Stock-Prices-Dataset.csv")

df["Date"] = pd.to_datetime(df["Date"])
df = df.sort_values(["Ticker", "Date"]).reset_index(drop=True)
df = df.dropna(subset=["Open","High","Low","Close","Volume"])

# scale prices
for col in ["Open","High","Low","Close"]:
    df[col] = df[col].astype(float) / 100

# encode categoricals
for col in ["Ticker","Industry_Tag","Country"]:
    df[col] = LabelEncoder().fit_transform(df[col].astype(str))

# =============================
# 2. FEATURE ENGINEERING (PER TICKER)
# =============================
def create_features(group):
    group = group.copy()
    group["return_1d"] = group["Close"].pct_change()
    group["return_5d"] = group["Close"].pct_change(5)
    group["sma_10"] = group["Close"].rolling(10, min_periods=1).mean()
    group["sma_20"] = group["Close"].rolling(20, min_periods=1).mean()
    group["volatility_10"] = group["return_1d"].rolling(10, min_periods=1).std()
    group["target_return"] = group["Close"].pct_change().shift(-1)
    return group.dropna(subset=["target_return"])

df = df.groupby("Ticker", group_keys=False).apply(create_features)

# log-scale target
df["target_return"] = np.sign(df["target_return"]) * np.log1p(np.abs(df["target_return"]))

# =============================
# 3. FEATURES
# =============================
features = [
    "Open","High","Low","Close","Volume",
    "return_1d","return_5d","sma_10","sma_20","volatility_10",
    "Industry_Tag","Country"
]

# =============================
# 4. COMPANY-WISE MODELS
# =============================
models = {}
results = []

for ticker, group in df.groupby("Ticker"):
    group = group.sort_values("Date")
    n = len(group)

    # skip tiny companies
    if n < 200:
        continue

    split = int(0.8 * n)
    train = group.iloc[:split]
    test  = group.iloc[split:]

    X_train = train[features]
    y_train = train["target_return"]
    X_test  = test[features]
    y_test  = test["target_return"]

    model = XGBRegressor(
        n_estimators=300,
        max_depth=6,
        learning_rate=0.05,
        subsample=0.8,
        colsample_bytree=0.8,
        objective="reg:squarederror",
        n_jobs=-1,
        random_state=42
    )

    model.fit(X_train, y_train)
    preds = model.predict(X_test)

    rmse = np.sqrt(mean_squared_error(y_test, preds))
    mae  = mean_absolute_error(y_test, preds)

    models[ticker] = model
    results.append((ticker, rmse, mae, len(test)))

# =============================
# 5. RESULTS SUMMARY
# =============================
results_df = pd.DataFrame(
    results,
    columns=["Ticker", "RMSE", "MAE", "Test_Size"]
)

print("Best companies:")
print(results_df.sort_values("RMSE").head(10))

print("\nWorst companies:")
print(results_df.sort_values("RMSE").tail(10))

print("\nAverage RMSE:", results_df["RMSE"].mean())
print("Average MAE :", results_df["MAE"].mean())


  df["Date"] = pd.to_datetime(df["Date"])
  df = df.groupby("Ticker", group_keys=False).apply(create_features)


Best companies:
    Ticker      RMSE       MAE  Test_Size
28      28  0.011607  0.008578       1294
25      25  0.012138  0.009233       1294
9        9  0.012924  0.009736       1294
41      41  0.012932  0.009846       1294
34      34  0.012989  0.009966       1294
57      58  0.013861  0.009834       1294
58      59  0.014341  0.010612        882
12      12  0.014987  0.010908       1294
15      15  0.015375  0.010996       1294
21      21  0.016623  0.012548       1294

Worst companies:
    Ticker      RMSE       MAE  Test_Size
44      44  0.035849  0.024312        655
47      47  0.036020  0.026266        228
27      27  0.036660  0.024991       1287
19      19  0.037338  0.023044       1294
50      50  0.038457  0.031417        376
40      40  0.039323  0.026815       1294
54      54  0.039388  0.028662        767
59      60  0.041716  0.030512        265
46      46  0.050293  0.035159        301
11      11  0.053662  0.037382        224

Average RMSE: 0.024792080069072908
Averag