# Creating the .csv

In [1]:
import yfinance as yf
import pandas as pd
import os.path
import time
import datetime
import pytz
from pandas import Timestamp

First, we are going to create a .csv file for the first month of 2023.

In [2]:
# define the stock symbol and time range
symbol = 'AAPL'
start_date = '2022-03-01'
end_date = '2023-04-01'

# retrieve the stock price data
data = yf.download(symbol, start=start_date, end=end_date, interval='1h')

# save the data as a CSV file
data.to_csv(f'{symbol}_prices.csv')

print(data.tail())

[*********************100%***********************]  1 of 1 completed
                                 Open        High         Low       Close  \
Datetime                                                                    
2023-03-31 11:30:00-04:00  163.097504  163.899994  162.919998  163.889999   
2023-03-31 12:30:00-04:00  163.884995  164.074997  163.570007  163.884995   
2023-03-31 13:30:00-04:00  163.880005  164.175003  163.649994  164.074997   
2023-03-31 14:30:00-04:00  164.074997  164.940002  164.000000  164.880005   
2023-03-31 15:30:00-04:00  164.880005  165.000000  164.125000  164.839996   

                            Adj Close    Volume  
Datetime                                         
2023-03-31 11:30:00-04:00  163.889999   4921522  
2023-03-31 12:30:00-04:00  163.884995   6106203  
2023-03-31 13:30:00-04:00  164.074997   4040886  
2023-03-31 14:30:00-04:00  164.880005   8018199  
2023-03-31 15:30:00-04:00  164.839996  12697522  


# Updating the .csv

Now, we do check that the csv is created

In [3]:
# check if the CSV file exists
filename = f'{symbol}_prices.csv'
if os.path.isfile(filename):
    # if the file exists, read it in as a DataFrame
    data = pd.read_csv(filename, index_col='Datetime', parse_dates=True)
else:
    # if the file doesn't exist, create an empty DataFrame
    data = pd.DataFrame()

Finally, we keep updating the .csv with the latest data

In [4]:
while True:
    # retrieve the latest available stock price data
    tz = pytz.timezone('America/New_York')
    now = datetime.datetime.now(tz)
    end_date = Timestamp(now)
    latest_data = yf.download(symbol, period='1d', interval='1h').iloc[-1]

    # check if the latest data is different from the last row in the existing DataFrame
    if not data.empty and not latest_data.equals(data.tail(1).iloc[0]):
        # retrieve any new stock price data since the last update
        new_data = yf.download(symbol, start=data.index[-1] + pd.Timedelta(hours=1), end=end_date, interval='1h')

        # append the new data to the existing DataFrame
        data = pd.concat([data, new_data])

        # save the updated data as a CSV file
        data.to_csv(filename)

    # wait for 1 hour before checking again
    time.sleep(1800)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
