In [1]:
import numpy as np
import pandas as pd
from scipy.stats import skew, kurtosis

import sys
sys.path.append('..')
from Scripts.preprocess_data import load_parquet
from Scripts.analysis import groupByColumn

Checking parquet data before analysis, helps in Data integrity check + confidence before analysis.

In [2]:
stock_data=load_parquet()
stock_data.head()

Unnamed: 0,date,close,high,low,open,volume,ticker
0,2018-01-01,562.450073,566.72687,557.47704,560.75923,6943234,AXISBANK.NS
1,2018-01-01,143.172302,145.299935,142.596072,142.729044,7408273,BANKBARODA.NS
2,2018-01-01,432.688995,438.813604,431.755722,436.935379,6580516,HDFCBANK.NS
3,2018-01-01,298.055054,301.609058,296.902425,301.609058,7739502,ICICIBANK.NS
4,2018-01-01,53.558346,54.095397,52.972477,53.118946,4483875,IDFCFIRSTB.NS


In [3]:
stock_data['ticker'].nunique()

11

In [4]:
print(stock_data['date'].min(), stock_data['date'].max())


2018-01-01 00:00:00 2024-12-12 00:00:00


In [5]:
is_sorted = stock_data['date'].is_monotonic_increasing
print("✅ Dates are sorted:", is_sorted)


✅ Dates are sorted: True


Stock overview

In [6]:
summary = (
    stock_data
    .groupby('ticker')
    .agg(
        close_min=('close', 'min'),
        close_max=('close', 'max'),
        close_mean=('close', 'mean'),
        close_median=('close', 'median'),
        close_std=('close', 'std'),
        close_skew=('close', lambda x: skew(x, nan_policy='omit')),
        close_kurt=('close', lambda x: kurtosis(x, nan_policy='omit')),
        price_range=('close', lambda x: x.max() - x.min()),
        total_return_pct=('close', lambda x: (x.iloc[-1] / x.iloc[0] - 1) * 100)
    )
    .round(2)
    .sort_values(by='total_return_pct', ascending=False)
)

print(summary)

               close_min  close_max  close_mean  close_median  close_std  \
ticker                                                                     
ICICIBANK.NS      251.52    1326.27      656.51        626.47     296.33   
SBIN.NS           152.66     887.82      421.85        398.87     189.66   
HDFCBANK.NS       385.43     922.53      650.36        678.68     129.31   
AXISBANK.NS       324.10    1316.18      771.35        738.75     215.54   
NSEBANK         17249.10   53603.55    35380.40      35085.04    9010.98   
KOTAKBANK.NS      994.75    2111.08     1622.86       1740.69     272.12   
BANKBARODA.NS      36.35     279.26      124.76        105.85      63.54   
IDFCFIRSTB.NS      19.59      98.88       52.86         48.78      17.54   
PNB.NS             25.47     163.72       62.78         55.18      31.92   
INDUSINDBK.NS     302.46    1943.40     1216.42       1232.41     354.75   
YESBANK.NS         10.90     377.67       75.57         21.22     106.69   

           

  .groupby('ticker')


In [7]:
stock_data['ticker'].unique()


['AXISBANK.NS', 'BANKBARODA.NS', 'HDFCBANK.NS', 'ICICIBANK.NS', 'IDFCFIRSTB.NS', ..., 'KOTAKBANK.NS', 'PNB.NS', 'SBIN.NS', 'YESBANK.NS', 'NSEBANK']
Length: 11
Categories (11, object): ['AXISBANK.NS', 'BANKBARODA.NS', 'HDFCBANK.NS', 'ICICIBANK.NS', ..., 'NSEBANK', 'PNB.NS', 'SBIN.NS', 'YESBANK.NS']

In [8]:
stock_data=stock_data.sort_index()

In [9]:
pnb = stock_data[stock_data['ticker']=='PNB.NS']
print(pnb['close'].iloc[0], pnb['close'].iloc[-1])


157.5030517578125 104.78369903564452


In [10]:
print(pnb)

           date       close        high         low        open     volume  \
7    2018-01-01  157.503052  160.889710  156.714371  160.472179    7869149   
18   2018-01-02  155.461761  162.003127  154.023587  160.332989   15801115   
29   2018-01-03   93.759537   96.264737   91.671868   93.434783  122105493   
40   2018-01-06   76.965401   78.728323   76.130339   77.939649   20636225   
51   2018-01-08   81.094353   82.300557   78.403578   78.960288   42891508   
...         ...         ...         ...         ...         ...        ...   
7472 2024-12-07  114.424347  116.426341  114.191108  116.426341   26896471   
7483 2024-12-08  111.372772  112.626446  110.605018  111.712914   15808638   
7494 2024-12-09  105.658356  106.387235  103.986791  105.638915   20816903   
7505 2024-12-11  100.808884  103.345377  100.439580  102.529036   26013763   
7516 2024-12-12  104.783699  106.047093  104.210317  105.930470   18431604   

      ticker  
7     PNB.NS  
18    PNB.NS  
29    PNB.NS  
40 