# Extracting stock market data from Yahoo Finance and saving to CSV

## Installation of essential libraries
The following libraries are used in this project: pandas and yfinance.

In [37]:
!pip install yfinance pandas
import yfinance as yf
import pandas as pd



## 1. Choosing a stock and time period

In [38]:
TICKER = "AAPL"
START_DATE = "2015-01-01"
END_DATE = "2025-01-01"

In [39]:
df = yf.download(TICKER, start = START_DATE, end = END_DATE, auto_adjust = False)

df.reset_index(inplace=True)

if isinstance(df.columns, pd.MultiIndex):
    df.columns = df.columns.get_level_values(0)

df.head()

[*********************100%***********************]  1 of 1 completed


Price,Date,Adj Close,Close,High,Low,Open,Volume
0,2015-01-02,24.237547,27.3325,27.860001,26.8375,27.8475,212818400
1,2015-01-05,23.554745,26.5625,27.1625,26.352501,27.0725,257142000
2,2015-01-06,23.556957,26.565001,26.8575,26.157499,26.635,263188400
3,2015-01-07,23.887278,26.9375,27.049999,26.674999,26.799999,160423600
4,2015-01-08,24.805079,27.9725,28.0375,27.174999,27.307501,237458000


## 2. Data cleaning and preprocessing


In [40]:
df.isnull().sum()  # Number of missing values in each column

Price
Date         0
Adj Close    0
Close        0
High         0
Low          0
Open         0
Volume       0
dtype: int64

In [41]:
df.dropna(inplace=True) # Removing missing data

df = df[df["Volume"] > 0] # Removing non-working days

df.sort_values("Date", inplace=True)

df.head()

Price,Date,Adj Close,Close,High,Low,Open,Volume
0,2015-01-02,24.237547,27.3325,27.860001,26.8375,27.8475,212818400
1,2015-01-05,23.554745,26.5625,27.1625,26.352501,27.0725,257142000
2,2015-01-06,23.556957,26.565001,26.8575,26.157499,26.635,263188400
3,2015-01-07,23.887278,26.9375,27.049999,26.674999,26.799999,160423600
4,2015-01-08,24.805079,27.9725,28.0375,27.174999,27.307501,237458000


## 3. Saving data as a csv file

In [42]:
import os

output_path = "../data"
os.makedirs(output_path, exist_ok=True)

df.to_csv(f"{output_path}/{TICKER}data.csv", index=False)