# Import dependencies

Lets Import necessary packages and modules

In [63]:
import os
import sys
import yfinance as yf
import pandas as pd
from datetime import date, timedelta 
pd.set_option('display.max_columns', None)  # to show all columns
pd.set_option('display.max_rows', None)  # to show all rows

# Fetching data

Let's start by fetching stock price data using the `yfinance` library and saving it as `stock_data.csv` in the `data` directory.


## Step 1: Create the directory structure if it doesn't exist

In [64]:
# Step 1: Set up the root directory for the project
root_dir = os.path.abspath("../")
data_dir = os.path.join(root_dir, "data")
os.makedirs(data_dir, exist_ok=True)

## Step 2: Define the stock ticker and time period for data extraction

In [65]:
ticker = "AAPL"  # Apple Inc.
start_date = "2015-01-01"
end_date = "2023-01-01"

## Step 3: Fetch stock data using the yfinance library

In [66]:
stock_data = yf.download(ticker, start=start_date, end=end_date)
stock_data.head()

[*********************100%***********************]  1 of 1 completed


Price,Adj Close,Close,High,Low,Open,Volume
Ticker,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2015-01-02,24.347176,27.3325,27.860001,26.8375,27.8475,212818400
2015-01-05,23.661274,26.5625,27.1625,26.352501,27.0725,257142000
2015-01-06,23.663506,26.565001,26.8575,26.157499,26.635,263188400
2015-01-07,23.995317,26.9375,27.049999,26.674999,26.799999,160423600
2015-01-08,24.917267,27.9725,28.0375,27.174999,27.307501,237458000


## Step 4: Reset the index to ensure the data format is correct

In [67]:
stock_data.reset_index(inplace=True)

## Step 5: Convert the "Date" column to datetime format if not already

In [68]:
stock_data['Date'] = pd.to_datetime(stock_data['Date'])

## Step 6: Save the data as a Parquet file in the `data` directory

In [69]:
parquet_file_path = os.path.join(data_dir, "stock_data.parquet")
stock_data.to_parquet(parquet_file_path, index=False)

## Step 7: Confirm the data has been saved correctly

In [70]:
stock_data.head()

Price,Date,Adj Close,Close,High,Low,Open,Volume
Ticker,Unnamed: 1_level_1,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL
0,2015-01-02,24.347176,27.3325,27.860001,26.8375,27.8475,212818400
1,2015-01-05,23.661274,26.5625,27.1625,26.352501,27.0725,257142000
2,2015-01-06,23.663506,26.565001,26.8575,26.157499,26.635,263188400
3,2015-01-07,23.995317,26.9375,27.049999,26.674999,26.799999,160423600
4,2015-01-08,24.917267,27.9725,28.0375,27.174999,27.307501,237458000
