# Live Stock Price Prediction

## Tasks:

1. Shortlist models
2. Check performance using mean_squared_error
3. Train model

## Import Libraries

In [101]:
import pandas as pd
import numpy as np
import joblib

## Load data

In [102]:
df = pd.read_csv("SWIGGY_minute.csv")
df

Unnamed: 0,date,open,high,low,close,volume
0,2024-11-13 09:15:00,364.0,364.0,364.0,364.0,0
1,2024-11-13 09:38:00,420.0,420.0,420.0,420.0,2037114
2,2024-11-13 09:39:00,420.0,420.0,420.0,420.0,0
3,2024-11-13 09:42:00,420.0,420.0,420.0,420.0,0
4,2024-11-13 09:45:00,420.0,420.0,420.0,420.0,0
...,...,...,...,...,...,...
68239,2025-08-06 16:44:00,392.7,392.7,392.7,392.7,0
68240,2025-08-06 17:54:00,392.7,392.7,392.7,392.7,0
68241,2025-08-06 17:57:00,392.7,392.7,392.7,392.7,0
68242,2025-08-06 18:13:00,392.7,392.7,392.7,392.7,0


## Clean Data

### Convert date column to index

In [103]:
df['date'] = pd.to_datetime(df['date'])
df = df.set_index('date')
df

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-11-13 09:15:00,364.0,364.0,364.0,364.0,0
2024-11-13 09:38:00,420.0,420.0,420.0,420.0,2037114
2024-11-13 09:39:00,420.0,420.0,420.0,420.0,0
2024-11-13 09:42:00,420.0,420.0,420.0,420.0,0
2024-11-13 09:45:00,420.0,420.0,420.0,420.0,0
...,...,...,...,...,...
2025-08-06 16:44:00,392.7,392.7,392.7,392.7,0
2025-08-06 17:54:00,392.7,392.7,392.7,392.7,0
2025-08-06 17:57:00,392.7,392.7,392.7,392.7,0
2025-08-06 18:13:00,392.7,392.7,392.7,392.7,0


## Clean missing values

We will ffill gaps only upto two rows. We will delete rest of the rows.

In [104]:
df = df.sort_index()
df = df.asfreq('1min')
df = df.ffill(limit = 2)
df.dropna(inplace=True)
df.to_csv('clean_data2.csv')

## Convert Data

Now, we will convert the "close" column into sliding window type data.\
Sliding window type data can be explained using an example:\
[1,2,3] -> 4\
[2,3,4] -> 5\
[3,4,5] -> 6\
...

Here, the arrays are features and numbers after '->' are labels.

Note: We will create a different window for each day.

In [105]:
grp_obj = df.groupby(df.index.date)
features = []
labels = []
for _,j in grp_obj:
    arr = j['close'].copy()
    a = 0
    while a+3<len(arr):
        b = a+1
        c = a+2
        d = a + 3
        features.append([arr[a],arr[b],arr[c]])
        labels.append(arr[d])
        a+=1
features = np.array(features)
labels = np.array(labels)

  features.append([arr[a],arr[b],arr[c]])
  labels.append(arr[d])


## Split Data

In [106]:
split_len = int(0.8 * len(features))
train_features = features[:split_len]
train_labels = labels[:split_len]
test_features = features[split_len:]
test_labels = labels[split_len:]
train = (train_features, train_labels)
test = (test_features, test_labels)
joblib.dump(train, 'train.pkl')
joblib.dump(test, 'test.pkl')

['test.pkl']