<Font size =5> Creating a dataset for the past 365 days using a Nepse API

In [None]:
import requests
import pandas as pd
import datetime
import time

from nepse_scraper import Nepse_scraper

scraper = Nepse_scraper()
end_date = datetime.date.today()
start_date = end_date - datetime.timedelta(days=365)

all_data = []

# loop day by day
current = start_date
while current <= end_date:
    try:
        # fetch one day's snapshot
        daily_response = scraper.get_today_price(current.strftime("%Y-%m-%d"))
        
        # NEW: handle JSON structure with "content" key
        companies = daily_response.get("content", [])
        
        for c in companies:
            all_data.append({
                "date": c.get("businessDate"),
                "symbol": c.get("symbol"),
                "securityId": c.get("securityId"),
                "securityName": c.get("securityName"),
                "open": c.get("openPrice"),
                "high": c.get("highPrice"),
                "low": c.get("lowPrice"),
                "close": c.get("closePrice"),
                "volume": c.get("totalTradedQuantity"),
                "turnover": c.get("totalTradedValue"),
                "prevClose": c.get("previousDayClosePrice"),
                "52wHigh": c.get("fiftyTwoWeekHigh"),
                "52wLow": c.get("fiftyTwoWeekLow"),
                "trades": c.get("totalTrades"),
                "avgPrice": c.get("averageTradedPrice"),
                "marketCap": c.get("marketCapitalization")
            })
        
        print(f"Collected data for {current}")
    
    except Exception as e:
        print(f"Skipped {current}: {e}")
    except KeyboardInterrupt:
        pass
    
    current += datetime.timedelta(days=1)
    time.sleep(0.1)  # be polite to NEPSE

  import pkg_resources


Collected data for 2024-08-24
Collected data for 2024-08-25
Collected data for 2024-08-26
Collected data for 2024-08-27
Collected data for 2024-08-28
Collected data for 2024-08-29
Collected data for 2024-08-30
Collected data for 2024-08-31
Collected data for 2024-09-01
Collected data for 2024-09-02
Collected data for 2024-09-03
Collected data for 2024-09-04
Collected data for 2024-09-05
Collected data for 2024-09-06
Collected data for 2024-09-07
Collected data for 2024-09-08
Collected data for 2024-09-09
Collected data for 2024-09-10
Collected data for 2024-09-12
Collected data for 2024-09-13
Collected data for 2024-09-14
Collected data for 2024-09-15
Collected data for 2024-09-16
Collected data for 2024-09-17
Collected data for 2024-09-18
Collected data for 2024-09-19
Collected data for 2024-09-20
Collected data for 2024-09-21
Collected data for 2024-09-22
Collected data for 2024-09-23
Collected data for 2024-09-24
Collected data for 2024-09-25
Collected data for 2024-09-26
Collected 

: 

: 

<font size = 5> Creating target values for classification task

In [1]:
import pandas as pd

# Load your dataset
df = pd.read_csv("nepse_full_1year.csv")

# Sort by symbol and date to keep order
df = df.sort_values(by=["symbol", "date"])

# Shift close price by -1 (next day’s close)
df["next_close"] = df.groupby("symbol")["close"].shift(-1)

# Create target column: 1 if next day close is higher, else 0
df["target"] = (df["next_close"] > df["close"]).astype(int)
df.to_csv("nepse_full_1year.csv", index=False)
print(df[["date", "symbol", "close", "next_close", "target"]].head(15))

          date  symbol   close  next_close  target
0   2024-08-25  ACLBSL  1170.1      1170.0       0
1   2024-08-27  ACLBSL  1170.0      1144.9       0
2   2024-08-28  ACLBSL  1144.9      1138.0       0
3   2024-08-29  ACLBSL  1138.0      1081.0       0
4   2024-09-01  ACLBSL  1081.0      1134.2       1
5   2024-09-02  ACLBSL  1134.2      1116.0       0
6   2024-09-03  ACLBSL  1116.0      1095.0       0
7   2024-09-04  ACLBSL  1095.0      1115.0       1
8   2024-09-05  ACLBSL  1115.0      1143.0       1
9   2024-09-08  ACLBSL  1143.0      1104.0       0
10  2024-09-09  ACLBSL  1104.0      1081.0       0
11  2024-09-10  ACLBSL  1081.0      1077.1       0
12  2024-09-11  ACLBSL  1077.1      1089.0       1
13  2024-09-12  ACLBSL  1089.0      1066.0       0
14  2024-09-15  ACLBSL  1066.0      1031.1       0


In [3]:

df["ma_5"] = df.groupby("symbol")["close"].transform(lambda x: x.rolling(5).mean())
df["volatility_10"] = df.groupby("symbol")["close"].transform(lambda x: x.pct_change().rolling(10).std())
df.to_csv("nepse_full_1year.csv", index=False)


In [21]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np
scaler = MinMaxScaler()

feature_cols = ["open", "high", "low", "close", "volume", "ma_5", "volatility_10"]

for feature in feature_cols:
    df[feature] = df.groupby("symbol")[feature].transform(
        lambda x: scaler.fit_transform(x.values.reshape(-1,1)).flatten()
    )

df = df.fillna(method='ffill')
df = df.fillna(method='bfill')# 5. Build sequences (sliding windows per company)
def build_sequences(group, window_size=7):
    X, y = [], []
    data = group[feature_cols].values
    labels = group["target"].values
    for i in range(len(data) - window_size):
        X.append(data[i:i+window_size])
        y.append(labels[i+window_size])
    return np.array(X), np.array(y)


window_size = 7

valid_companies = []
for symbol, group in df.groupby("symbol"):
    if len(group) >= window_size + 1:  # needs at least window_size + 1 rows
        valid_companies.append(symbol)
    else:
        print(f" Skipping {symbol}: only {len(group)} rows")

df = df[df["symbol"].isin(valid_companies)]

X_list, y_list = [], []

for symbol, group in df.groupby("symbol"):
    X_sym, y_sym = build_sequences(group, window_size=7)
    X_list.append(X_sym)
    y_list.append(y_sym)


print(X_list[0].shape)  # Example shapes for one company
# Combine all companies into one dataset
X = np.concatenate(X_list, axis=0)
y = np.concatenate(y_list, axis=0)

print(" Final shapes:")
print("X:", X.shape)  # (samples, 30, num_features)
print("y:", y.shape)  # (samples,)

  df = df.fillna(method='ffill')
  df = df.fillna(method='bfill')# 5. Build sequences (sliding windows per company)


 Skipping ACLBSLP: only 2 rows
 Skipping ALBSLP: only 5 rows
 Skipping ALICLP: only 2 rows
 Skipping BHCL: only 2 rows
 Skipping CBBLPO: only 2 rows
 Skipping CYCLP: only 1 rows
 Skipping CZBILP: only 7 rows
 Skipping EDBLPO: only 1 rows
 Skipping FMDBLP: only 3 rows
 Skipping GILBPO: only 1 rows
 Skipping GMFILP: only 1 rows
 Skipping GUFLPO: only 4 rows
 Skipping HBLPO: only 6 rows
 Skipping ICFCPO: only 1 rows
 Skipping IGIPO: only 3 rows
 Skipping JFLPO: only 1 rows
 Skipping JSLBBP: only 1 rows
 Skipping KMCDBP: only 3 rows
 Skipping LBBLPO: only 1 rows
 Skipping MBLPO: only 5 rows
 Skipping MDBPO: only 2 rows
 Skipping MLBBLP: only 1 rows
 Skipping MLBLPO: only 2 rows
 Skipping MLBSLP: only 1 rows
 Skipping MPFLPO: only 1 rows
 Skipping NABBCP: only 5 rows
 Skipping NABILP: only 1 rows
 Skipping NFSPO: only 1 rows
 Skipping NICLPO: only 1 rows
 Skipping NIFRAP: only 2 rows
 Skipping NLICLP: only 3 rows
 Skipping NLICP: only 4 rows
 Skipping NMLBBLP: only 5 rows
 Skipping PFLPO: o