# NSE Stocks Prediction

##Installing dependencies

In [3]:
!pip install yfinance
!pip install scipy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


## Importting Libraries

In [4]:
# Importing PyTorch libraries
import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

# Importing Yahoo Finance libraries
import yfinance as yf

# Importing Python libraries
import pandas as pd
from pandas_datareader import data as pdr
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import requests
import csv
import numpy as np
import os
import sys

# Importing SciPy libraries
from scipy.special import softmax
from scipy.signal import savgol_filter

## Data Preparation

###Importing Stock List

In [27]:
# Importing Stock List from csv to a dataframe

CSV_URL = "https://raw.githubusercontent.com/chaitanya-rane/NSE_stocks_prediction/main/stock_list_short.csv"
stock_list = pd.read_csv(CSV_URL)

In [6]:
# Overriding Pandas Dataframe
yf.pdr_override()

###Get stock data

In [7]:
# Get stock value data for stock list for the past 40 days
def get_stock_data(stock_list, time_stamp, time_delta):
    
    stock_data_df = pd.DataFrame()
    
    for stock in stock_list["Symbol"]:
        
        stock_name = stock + ".NS"
        
        # Importing stock values using Yahoo Finance
        old_stdout = sys.stdout # backup current stdout
        sys.stdout = open(os.devnull, "w")

        stock_data = pdr.get_data_yahoo(stock_name, start = time_stamp - timedelta(days=time_delta), end = time_stamp)

        sys.stdout = old_stdout # reset old stdout
        
        stock_data = stock_data.head(40).reset_index()
        stock_data_close = stock_data["Close"].squeeze()
        
        # Adding stock data to stock data dataframe
        stock_data_df[stock] = stock_data_close
        
    return stock_data_df

###Split data into training and prediction parts

In [8]:
# Splitting stock data into training data and prediction data
def split_data(stock_data_df):
    
    train_data = pd.DataFrame(index = range(0,30), columns = stock_data_df.columns)
    prediction_data = pd.DataFrame(index = range(0,10), columns = stock_data_df.columns)
    
    # Reversing the order of rows
    stock_data_df = stock_data_df.iloc[::-1]
    
    # Splitting training and predictions data
    for column in stock_data_df.columns:
        
        train_data[column] = stock_data_df[column].head(30).values
        prediction_data[column] = stock_data_df[column][30:40].values
    
    return train_data, prediction_data

###Get derivatives for train data

In [16]:
def generate_derivates(train_data):

  for column in train_data.columns:
    
    # Normalising stock values
    train_data[column] = [value/train_data[column].max() for value in train_data[column]]

    # Filtering and generating derivatives
    filtered_data = savgol_filter(train_data[column], window_length = 3, polyorder = 1, deriv = 1)
    train_data[column] = filtered_data

  return train_data

###Generate prediction labels

In [20]:
def generate_prediction_labels(prediction_data):

  growth_data = np.empty(len(prediction_data.columns))
  prediction_labels = pd.DataFrame(columns = prediction_data.columns)

  for i, column in enumerate(prediction_data.columns):

    # Normalise prediction data
    prediction_data[column] = [value/prediction_data[column].max() for value in prediction_data[column]]

    # Calculate growth
    growth = (1 - prediction_data[column][0])/prediction_data[column][0]
    growth_data[i] = growth

  growth_data = softmax(growth_data)
  
  for i, column in enumerate(prediction_data.columns):

    prediction_labels.at[0, column] = growth_data[i]

  return prediction_labels

###Convert data to tensors

In [23]:
def create_tensors(train_data, prediction_labels):

  # Convert train data to train tensor
  train_tensor = torch.tensor(train_data.values)
  train_tensor = torch.transpose(train_tensor, 0, 1)

  # Convert prediction labels to a labels tensor
  label_tensor = torch.empty((1, 10))
  for n, col in enumerate(prediction_labels):
    label_tensor[0][n] = prediction_labels.at[0,col]

  label_tensor = label_tensor.squeeze(dim=0)

  return train_tensor, label_tensor

###Build a super function

In [25]:
def generate_data(stock_list, start_date, end_date):

  dates_list = [start_date - timedelta(days = x) for x in range((start_date - end_date).days)]
  TIME_DELTA = 100

  tensor_size = len(dates_list)

  train_tensor_super = torch.empty(tensor_size, 10, 30)
  label_tensor_super = torch.empty(tensor_size, 10)

  DATA_SPLIT = int(0.8*len(dates_list))

  for i, date in enumerate(dates_list):

    # Get stock data
    stock_data_df = get_stock_data(stock_list, date, TIME_DELTA)

    # Split stock data into training and prediction
    train_split, prediction_split = split_data(stock_data_df)

    # Get derivates for train data
    train_split_derivatives = generate_derivates(train_split)

    # Generate prediction labels
    prediction_split_bools = generate_prediction_labels(prediction_split)

    # Convert to Tensors
    train_tensor, label_tensor = create_tensors(train_split_derivatives, prediction_split_bools)

    # Append to main tensors
    train_tensor_super[i] = train_tensor
    label_tensor_super[i] = label_tensor

    if i%10 == 0:
      print(f"Data prepared for {i+1} days.")

  train_data = train_tensor_super[:DATA_SPLIT]
  test_data = train_tensor_super[DATA_SPLIT:]

  train_labels = label_tensor_super[:DATA_SPLIT]
  test_labels = label_tensor_super[DATA_SPLIT:]

  return train_data, train_labels, test_data, test_labels

In [26]:
START_DATE = datetime.strptime("2023/05/31", '%Y/%m/%d')
END_DATE = datetime.strptime("2023/01/01", '%Y/%m/%d')

train_data, train_labels, test_data, test_labels = generate_data(stock_list, START_DATE, END_DATE)

Data prepared for 1 days.
Data prepared for 11 days.
Data prepared for 21 days.
Data prepared for 31 days.
Data prepared for 41 days.
Data prepared for 51 days.
Data prepared for 61 days.
Data prepared for 71 days.
Data prepared for 81 days.
Data prepared for 91 days.
Data prepared for 101 days.
Data prepared for 111 days.
Data prepared for 121 days.
Data prepared for 131 days.
Data prepared for 141 days.
