# Creating the .csv

In [71]:
import yfinance as yf
import pandas as pd
import os.path
import time
import datetime
import pytz
from pandas import Timestamp

First, we are going to create a .csv file for the first month of 2023.

In [72]:
# define the stock symbol and time range
symbol = 'AAPL'
start_date = '2023-01-01'
end_date = '2023-02-01'

# retrieve the stock price data
data = yf.download(symbol, start=start_date, end=end_date, interval='1h')

# save the data as a CSV file
data.to_csv(f'{symbol}_prices.csv')

print(data.tail())

[*********************100%***********************]  1 of 1 completed
                                 Open        High         Low       Close  \
Datetime                                                                    
2023-01-31 11:30:00-05:00  143.535004  143.639999  142.979996  143.535004   
2023-01-31 12:30:00-05:00  143.520004  143.660004  143.160004  143.414993   
2023-01-31 13:30:00-05:00  143.419998  143.910004  143.299896  143.824997   
2023-01-31 14:30:00-05:00  143.830002  144.039993  143.259995  143.304993   
2023-01-31 15:30:00-05:00  143.300003  144.339996  143.119995  144.289993   

                            Adj Close   Volume  
Datetime                                        
2023-01-31 11:30:00-05:00  143.535004  4771951  
2023-01-31 12:30:00-05:00  143.414993  3376057  
2023-01-31 13:30:00-05:00  143.824997  3694091  
2023-01-31 14:30:00-05:00  143.304993  5273359  
2023-01-31 15:30:00-05:00  144.289993  8519107  


# Updating the .csv

Now, we do check that the csv is created

In [73]:
# check if the CSV file exists
filename = f'{symbol}_prices.csv'
if os.path.isfile(filename):
    # if the file exists, read it in as a DataFrame
    data = pd.read_csv(filename, index_col='Datetime', parse_dates=True)
else:
    # if the file doesn't exist, create an empty DataFrame
    data = pd.DataFrame()

Finally, we keep updating the .csv with the latest data

In [74]:
while True:
    # retrieve the latest available stock price data
    tz = pytz.timezone('America/New_York')
    now = datetime.datetime.now(tz)
    end_date = Timestamp(now)
    latest_data = yf.download(symbol, period='1d', interval='1h').iloc[-1]

    # check if the latest data is different from the last row in the existing DataFrame
    if not data.empty and not latest_data.equals(data.tail(1).iloc[0]):
        # retrieve any new stock price data since the last update
        new_data = yf.download(symbol, start=data.index[-1] + pd.Timedelta(hours=1), end=end_date, interval='1h')

        # append the new data to the existing DataFrame
        data = pd.concat([data, new_data])

        # save the updated data as a CSV file
        data.to_csv(filename)

    # wait for 1 hour before checking again
    time.sleep(1800)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


KeyboardInterrupt: 

In [69]:
data

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
2023-01-03 09:30:00-05:00,130.279999,130.899994,125.230003,125.459999,125.459999,27775505.0
2023-01-03 10:30:00-05:00,125.459999,125.870003,124.730003,125.345001,125.345001,18104999.0
2023-01-03 11:30:00-05:00,125.350197,125.370003,124.349098,124.660004,124.660004,11248777.0
2023-01-03 12:30:00-05:00,124.660004,124.940002,124.190002,124.649902,124.649902,8860080.0
2023-01-03 13:30:00-05:00,124.669899,125.000000,124.190002,124.570000,124.570000,8388062.0
...,...,...,...,...,...,...
2023-04-06 14:30:00-04:00,164.809998,164.940002,164.470001,164.514999,164.514999,3861450.0
2023-04-06 15:30:00-04:00,164.520004,164.759903,164.320007,164.649994,164.649994,4488051.0
2023-04-10 09:30:00-04:00,161.419998,162.000000,160.080002,160.154999,160.154999,15225797.0
2023-04-10 10:30:00-04:00,160.160004,161.399994,160.145004,161.134995,161.134995,6669628.0
