### Import Libraries

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)

### Load the data

In [4]:
df = pd.read_csv('Nifty 50 Historical Data.csv')
df.head()

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,23-01-2026,25048.65,25344.6,25347.95,25025.3,393.94M,-0.95%
1,22-01-2026,25289.9,25344.15,25435.75,25168.5,486.40M,0.53%
2,21-01-2026,25157.5,25141.0,25300.95,24919.8,395.62M,-0.30%
3,20-01-2026,25232.5,25580.3,25585.0,25171.35,409.77M,-1.38%
4,19-01-2026,25585.5,25653.1,25653.3,25494.35,443.09M,-0.42%


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2477 entries, 0 to 2476
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Date      2477 non-null   object
 1   Price     2477 non-null   object
 2   Open      2477 non-null   object
 3   High      2477 non-null   object
 4   Low       2477 non-null   object
 5   Vol.      2473 non-null   object
 6   Change %  2477 non-null   object
dtypes: object(7)
memory usage: 135.6+ KB


In [6]:
df['Date'] = pd.to_datetime(df['Date'], format = '%d-%m-%Y')
df = df.sort_values('Date')
df.head()

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
2476,2016-01-27,7437.75,7469.6,7477.9,7419.7,193.39M,0.02%
2475,2016-01-28,7424.65,7426.5,7468.85,7409.6,283.04M,-0.18%
2474,2016-01-29,7563.55,7413.35,7575.65,7402.8,307.93M,1.87%
2473,2016-02-01,7555.95,7589.5,7600.45,7541.25,206.64M,-0.10%
2472,2016-02-02,7455.55,7566.65,7576.3,7428.05,237.34M,-1.33%


In [7]:
df.reset_index(drop = True, inplace = True)

In [8]:
df.head()

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,2016-01-27,7437.75,7469.6,7477.9,7419.7,193.39M,0.02%
1,2016-01-28,7424.65,7426.5,7468.85,7409.6,283.04M,-0.18%
2,2016-01-29,7563.55,7413.35,7575.65,7402.8,307.93M,1.87%
3,2016-02-01,7555.95,7589.5,7600.45,7541.25,206.64M,-0.10%
4,2016-02-02,7455.55,7566.65,7576.3,7428.05,237.34M,-1.33%


In [9]:
price_cols = ['Price','Open','High','Low']
for col in price_cols:
    df[col] = df[col].str.replace(',','').astype('float')

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2477 entries, 0 to 2476
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   Date      2477 non-null   datetime64[ns]
 1   Price     2477 non-null   float64       
 2   Open      2477 non-null   float64       
 3   High      2477 non-null   float64       
 4   Low       2477 non-null   float64       
 5   Vol.      2473 non-null   object        
 6   Change %  2477 non-null   object        
dtypes: datetime64[ns](1), float64(4), object(2)
memory usage: 135.6+ KB


In [11]:
df['Change %'] = df['Change %'].str.replace('%','').astype('float')

In [12]:
def convert_vol(vol):
    if pd.isna(vol):
        return np.nan
        
    vol = str(vol).strip()

    if vol.endswith('M'):
        return float(vol.replace(',','').replace('M',''))*1_000_000
    elif vol.endswith('B'):
        return float(vol.replace(',','').replace('B',''))*1_000_000_000
    else:
        return float(vol.replace(',',''))

df['Vol.'] = df['Vol.'].apply(convert_vol)

In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2477 entries, 0 to 2476
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   Date      2477 non-null   datetime64[ns]
 1   Price     2477 non-null   float64       
 2   Open      2477 non-null   float64       
 3   High      2477 non-null   float64       
 4   Low       2477 non-null   float64       
 5   Vol.      2473 non-null   float64       
 6   Change %  2477 non-null   float64       
dtypes: datetime64[ns](1), float64(6)
memory usage: 135.6 KB


In [14]:
df.head()

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,2016-01-27,7437.75,7469.6,7477.9,7419.7,193390000.0,0.02
1,2016-01-28,7424.65,7426.5,7468.85,7409.6,283040000.0,-0.18
2,2016-01-29,7563.55,7413.35,7575.65,7402.8,307930000.0,1.87
3,2016-02-01,7555.95,7589.5,7600.45,7541.25,206640000.0,-0.1
4,2016-02-02,7455.55,7566.65,7576.3,7428.05,237340000.0,-1.33


In [31]:
df.to_csv('Nifty 50 Historical Data - Cleaned.csv', index = False)