In [2]:
# =====================================================
# 1. IMPORT LIBRARIES
# =====================================================
import pandas as pd
import numpy as np
import glob
from sklearn.ensemble import RandomForestClassifier


# =====================================================
# 2. LOAD NIFTY DATASET 
# =====================================================
csv_files = glob.glob("/kaggle/input/**/*.csv", recursive=True)
df = pd.read_csv(csv_files[0])

df = df.rename(columns={
    "Symbol": "Company",
    "LTP": "Close"
})

df = df[["Company", "Open", "High", "Low", "Close"]]


# =====================================================
# 3. CLEAN PRICE COLUMNS
# =====================================================
price_cols = ["Open", "High", "Low", "Close"]

for col in price_cols:
    df[col] = (
        df[col]
        .astype(str)
        .str.replace(",", "", regex=False)
        .str.replace("â‚¹", "", regex=False)
        .str.strip()
        .astype(float)
    )



df["PL"] = df["Close"] - df["Open"]
df["PL_percent"] = (df["PL"] / df["Open"]) * 100
df["Intraday_range_percent"] = ((df["High"] - df["Low"]) / df["Open"]) * 100


# =====================================================
# 5. ADVANCED FEATURES
# =====================================================
df["Close_position"] = (df["Close"] - df["Low"]) / (df["High"] - df["Low"] + 1e-6)
df["Body_strength"] = abs(df["Close"] - df["Open"]) / (df["High"] - df["Low"] + 1e-6)

df["Prev_close"] = df["Close"].shift(1)
df["Gap_percent"] = (df["Open"] - df["Prev_close"]) / (df["Prev_close"] + 1e-6) * 100
df["Gap_percent"] = df["Gap_percent"].fillna(0)

market_return = df["PL_percent"].mean()
df["Relative_return"] = df["PL_percent"] - market_return

df["Range_to_body"] = (df["High"] - df["Low"]) / (abs(df["Close"] - df["Open"]) + 1e-6)
df["Return_zscore"] = (df["PL_percent"] - df["PL_percent"].mean()) / (df["PL_percent"].std() + 1e-6)

df["Market_mood"] = np.where(market_return > 0, 1, -1)


# =====================================================
# 6. AI TARGET LABELS
# =====================================================
def make_label(x):
    if x >= 1.0:
        return 2   # Buy
    elif x <= -1.0:
        return 0   # Sell
    else:
        return 1   # Hold

df["Target"] = df["PL_percent"].apply(make_label)


# =====================================================
# 7. AI DATA PREP
# =====================================================
features = [
    "Open", "High", "Low", "Close",
    "Intraday_range_percent",
    "Close_position",
    "Body_strength",
    "Gap_percent",
    "Relative_return",
    "Range_to_body",
    "Return_zscore",
    "Market_mood"
]

X = df[features]
y = df["Target"]


# =====================================================
# 8. TRAIN AI MODEL
# =====================================================
model = RandomForestClassifier(
    n_estimators=200,
    max_depth=6,
    random_state=42
)
model.fit(X, y)


# =====================================================
# 9. AI PREDICTIONS + CONFIDENCE
# =====================================================
df["ai signal"] = model.predict(X)
df["confidence rate"] = model.predict_proba(X).max(axis=1)

signal_map = {2: "Buy", 1: "Hold", 0: "Sell"}
df["ai signal"] = df["ai signal"].map(signal_map)



# =====================================================
# 10. OUTPUTS
# =====================================================
print("OPEN vs CLOSE PROFIT / LOSS")
display(
    df[["Company", "Open", "Close", "PL", "PL_percent"]]
    .sort_values("PL_percent", ascending=False)
)

display(
    df.sort_values("PL_percent", ascending=False)
      .head(5)[["Company", "PL_percent", "ai signal", "confidence rate"]]

)

display(
    df[["Company", "PL_percent", "ai signal", "confidence rate"]]
    .sort_values("PL_percent", ascending=False)
)


print("MARKET SUMMARY")
print("Buy :", (df["ai signal"] == "Buy").sum())
print("Hold:", (df["ai signal"] == "Hold").sum())
print("Sell:", (df["ai signal"] == "Sell").sum())


OPEN vs CLOSE PROFIT / LOSS


Unnamed: 0,Company,Open,Close,PL,PL_percent
9,CIPLA,892.0,965.0,73.0,8.183857
12,DRREDDY,4580.0,4750.0,170.0,3.71179
11,DIVISLAB,4770.0,4940.0,170.0,3.563941
1,ASIANPAINT,3101.0,3138.0,37.0,1.193163
32,NESTLEIND,19148.85,19250.0,101.15,0.52823
44,TCS,3425.0,3439.2,14.2,0.414599
8,BRITANNIA,3560.0,3566.6,6.6,0.185393
21,HINDUNILVR,2344.0,2340.9,-3.1,-0.132253
17,HDFCBANK,1500.0,1489.5,-10.5,-0.7
15,HCLTECH,1120.0,1111.65,-8.35,-0.745536


Unnamed: 0,Company,PL_percent,ai signal,confidence rate
9,CIPLA,8.183857,Buy,0.88
12,DRREDDY,3.71179,Buy,0.88
11,DIVISLAB,3.563941,Buy,0.9
1,ASIANPAINT,1.193163,Buy,0.69
32,NESTLEIND,0.52823,Hold,0.905


Unnamed: 0,Company,PL_percent,ai signal,confidence rate
9,CIPLA,8.183857,Buy,0.88
12,DRREDDY,3.71179,Buy,0.88
11,DIVISLAB,3.563941,Buy,0.9
1,ASIANPAINT,1.193163,Buy,0.69
32,NESTLEIND,0.52823,Hold,0.905
44,TCS,0.414599,Hold,0.97
8,BRITANNIA,0.185393,Hold,0.91
21,HINDUNILVR,-0.132253,Hold,0.995
17,HDFCBANK,-0.7,Hold,0.785
15,HCLTECH,-0.745536,Hold,0.95


MARKET SUMMARY
Buy : 4
Hold: 8
Sell: 38
