In [1]:
import pandas as pd

In [2]:
# Load the stock data into a dataframe

df_apple_stock_data = pd.read_csv('apple-stock-data/apple_stock_data.csv')

In [3]:
# Get info about stock for EDA

df_apple_stock_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3773 entries, 0 to 3772
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Date         3773 non-null   object 
 1   Close Price  3773 non-null   float64
 2   High Price   3773 non-null   float64
 3   Low Price    3773 non-null   float64
 4   Open Price   3773 non-null   float64
 5   Volume       3773 non-null   int64  
dtypes: float64(4), int64(1), object(1)
memory usage: 177.0+ KB


### Observation

We can see from .info() method that our dataset is clean and doesn't have null values. It also shows that we have 3773 entries.
We can continue pre-processing our data so we can do some analysis with it.

In [5]:
# Convert the datetype to pandas datetime

df_apple_stock_data["Date"] = pd.to_datetime(df_apple_stock_data["Date"], format = "%d-%m-%Y")

In [6]:
# Set Date as index

df_apple_stock_data.set_index("Date", inplace=True)
df_apple_stock_data.sort_index(inplace=True)
df_apple_stock_data.head(10)

Unnamed: 0_level_0,Close Price,High Price,Low Price,Open Price,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-04,6.431897,6.446623,6.382908,6.414465,493729600
2010-01-05,6.443017,6.479382,6.409055,6.449629,601904800
2010-01-06,6.340532,6.468563,6.33392,6.443017,552160000
2010-01-07,6.32881,6.371488,6.282827,6.363974,477131200
2010-01-08,6.370885,6.371487,6.283128,6.320394,447610800
2010-01-11,6.314685,6.401542,6.264795,6.395531,462229600
2010-01-12,6.242856,6.304467,6.203785,6.287035,594459600
2010-01-13,6.330913,6.339328,6.134059,6.247363,605892000
2010-01-14,6.294249,6.325205,6.281926,6.314686,432894000
2010-01-15,6.189058,6.359466,6.187255,6.339329,594067600


In [9]:
# Further check our data

df_apple_stock_data.describe()

Unnamed: 0,Close Price,High Price,Low Price,Open Price,Volume
count,3773.0,3773.0,3773.0,3773.0,3773.0
mean,67.384922,68.038019,66.661499,67.328267,229970500.0
std,65.458097,66.066089,64.759818,65.377731,218079400.0
min,5.771908,5.89062,5.717809,5.781523,23234700.0
25%,17.438068,17.616169,17.29318,17.438647,84923800.0
50%,35.238148,35.654103,34.916149,35.290561,140560800.0
75%,126.721405,127.80763,124.474395,126.025083,308151200.0
max,258.396667,259.474086,257.010028,257.568678,1880998000.0


### Additional Columns

We are going to create columns that might help us explore analysis later down the data pipeline.

In [20]:
# Create column for daily_return

df_apple_stock_data['daily_return'] = df_apple_stock_data['Close Price'].pct_change()

In [21]:
# Create column for volatility

df_apple_stock_data['volatility'] = df_apple_stock_data['daily_return'].rolling(window=20).std()

In [22]:
# Create CSV file from the dataframe we've explored.

df_apple_stock_data.to_csv("apple_stock_data_explored.csv")