# Stock Price Prediction for New York Stock Exchange

## Preprocessing of the dataset

### Loading the Libraries and datasets

In [44]:
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

In [45]:
data = pd.read_csv('nyse/prices-split-adjusted.csv')


In [46]:
data_1 = data[data['symbol'] == 'WLTW'][['date', 'open', 'close', 'low', 'high', 'volume']]
data_1.head()

Unnamed: 0,date,open,close,low,high,volume
0,2016-01-05,123.43,125.839996,122.309998,126.25,2163600.0
1,2016-01-06,125.239998,119.980003,119.940002,125.540001,2386400.0
2,2016-01-07,116.379997,114.949997,114.93,119.739998,2489500.0
3,2016-01-08,115.480003,116.620003,113.5,117.440002,2006300.0
4,2016-01-11,117.010002,114.970001,114.089996,117.330002,1408600.0


In [47]:
data_1.dtypes

date       object
open      float64
close     float64
low       float64
high      float64
volume    float64
dtype: object

### As our data is not in datetime format, lets convert it to datetime

In [48]:
dateparse = lambda dates: pd.datetime.strptime(dates, '%Y-%m-%d')
data = pd.read_csv('nyse/prices-split-adjusted.csv', parse_dates=['date'], index_col= 'date',date_parser=dateparse)
data_1 = data[data['symbol'] == 'WLTW'][['open', 'close', 'low', 'high', 'volume']]
data_1.head()

Unnamed: 0_level_0,open,close,low,high,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-01-05,123.43,125.839996,122.309998,126.25,2163600.0
2016-01-06,125.239998,119.980003,119.940002,125.540001,2386400.0
2016-01-07,116.379997,114.949997,114.93,119.739998,2489500.0
2016-01-08,115.480003,116.620003,113.5,117.440002,2006300.0
2016-01-11,117.010002,114.970001,114.089996,117.330002,1408600.0


In [49]:
data_1.dtypes

open      float64
close     float64
low       float64
high      float64
volume    float64
dtype: object

### Now the date format is in datetime , so we can convert the dataset in timeseries and play along

In [50]:
data_open = data_1['close']
data_open.head()

date
2016-01-05    125.839996
2016-01-06    119.980003
2016-01-07    114.949997
2016-01-08    116.620003
2016-01-11    114.970001
Name: close, dtype: float64

### Lets retrieve some close index by giving the time as an input

In [51]:
data_open['2016-01-01' : '2016-01-31'] #stock price for the month of jaunary

date
2016-01-05    125.839996
2016-01-06    119.980003
2016-01-07    114.949997
2016-01-08    116.620003
2016-01-11    114.970001
2016-01-12    115.550003
2016-01-13    112.849998
2016-01-14    114.379997
2016-01-15    112.529999
2016-01-19    110.379997
2016-01-20    109.300003
2016-01-21    110.000000
2016-01-22    111.949997
2016-01-25    110.120003
2016-01-26    111.000000
2016-01-27    110.709999
2016-01-28    112.580002
2016-01-29    114.470001
Name: close, dtype: float64