# Import Libraries

In [1]:
import pandas as pd

# Obtain Data

Read S&P 500 data csv into pandas df

In [2]:
df = pd.read_csv('Data/Prices.csv')
df.head()

Unnamed: 0,Date,Open,High,Low,Close
0,12/31/19,3215.18,3231.72,3212.03,3230.78
1,12/30/19,3240.09,3240.92,3216.57,3221.29
2,12/27/19,3247.23,3247.93,3234.37,3240.02
3,12/26/19,3227.2,3240.08,3227.2,3239.91
4,12/24/19,3225.45,3226.43,3220.51,3223.38


# Scrub Data

See what data scrubbing needed with df.info

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2724 entries, 0 to 2723
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Date    2724 non-null   object 
 1    Open   2724 non-null   float64
 2    High   2724 non-null   float64
 3    Low    2724 non-null   float64
 4    Close  2724 non-null   float64
dtypes: float64(4), object(1)
memory usage: 106.5+ KB


Drop df columns not useful for modeling

In [4]:
df.drop(columns=[' Open', ' High', ' Low'], inplace=True)
df.head()

Unnamed: 0,Date,Close
0,12/31/19,3230.78
1,12/30/19,3221.29
2,12/27/19,3240.02
3,12/26/19,3239.91
4,12/24/19,3223.38


Rename Close column to S&P 500 Closing Price

In [5]:
df.rename(columns= {' Close': 'S&P Closing Price'}, inplace=True)
df.head()

Unnamed: 0,Date,S&P Closing Price
0,12/31/19,3230.78
1,12/30/19,3221.29
2,12/27/19,3240.02
3,12/26/19,3239.91
4,12/24/19,3223.38


Reformat Date column as datetime object

In [6]:
df['Date'] = pd.to_datetime(df['Date'], dayfirst=False)
df.head()

Unnamed: 0,Date,S&P Closing Price
0,2019-12-31,3230.78
1,2019-12-30,3221.29
2,2019-12-27,3240.02
3,2019-12-26,3239.91
4,2019-12-24,3223.38


Order datapoints in ascending order by date

In [7]:
df.sort_values(by='Date', inplace=True)
df.reset_index(drop=True, inplace=True)
df.head()

Unnamed: 0,Date,S&P Closing Price
0,2009-03-09,676.53
1,2009-03-10,719.6
2,2009-03-11,721.36
3,2009-03-12,750.74
4,2009-03-13,756.55


Confirm dataset now scrubbed with df.info

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2724 entries, 0 to 2723
Data columns (total 2 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   Date               2724 non-null   datetime64[ns]
 1   S&P Closing Price  2724 non-null   float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 42.7 KB
