# Creating the .csv

In [2]:
import yfinance as yf
import pandas as pd
import os.path
import time
import datetime

First, we are going to create a .csv file for the first month of 2023.

In [3]:
# define the stock symbol and time range
symbol = 'AAPL'
start_date = '2023-01-01'
end_date = '2023-02-01'

# retrieve the stock price data
data = yf.download(symbol, start=start_date, end=end_date, interval='1h')

# save the data as a CSV file
data.to_csv(f'{symbol}_prices.csv')

print(data.tail())

[*********************100%***********************]  1 of 1 completed
                                 Open        High         Low       Close  \
Datetime                                                                    
2023-01-31 11:30:00-05:00  143.535004  143.639999  142.979996  143.535004   
2023-01-31 12:30:00-05:00  143.520004  143.660004  143.160004  143.414993   
2023-01-31 13:30:00-05:00  143.419998  143.910004  143.299896  143.824997   
2023-01-31 14:30:00-05:00  143.830002  144.039993  143.259995  143.304993   
2023-01-31 15:30:00-05:00  143.300003  144.339996  143.119995  144.289993   

                            Adj Close   Volume  
Datetime                                        
2023-01-31 11:30:00-05:00  143.535004  4771951  
2023-01-31 12:30:00-05:00  143.414993  3376057  
2023-01-31 13:30:00-05:00  143.824997  3694091  
2023-01-31 14:30:00-05:00  143.304993  5273359  
2023-01-31 15:30:00-05:00  144.289993  8519107  


# Updating the .csv

Now, we do check that the csv is created

In [4]:
# check if the CSV file exists
filename = f'{symbol}_prices.csv'
if os.path.isfile(filename):
    # if the file exists, read it in as a DataFrame
    data = pd.read_csv(filename, index_col='Datetime', parse_dates=True)
else:
    # if the file doesn't exist, create an empty DataFrame
    data = pd.DataFrame()

Finally, we keep updating the .csv with the latest data

In [5]:
symbol = 'AAPL'
end_date = datetime.date.today()

while True:
    # retrieve the latest available stock price data
    latest_data = yf.download(symbol, period='1d', interval='1h').iloc[-1]

    # check if the latest data is different from the last row in the existing DataFrame
    if not data.empty and not latest_data.equals(data.tail(1).iloc[0]):
        # retrieve any new stock price data since the last update
        new_data = yf.download(symbol, start=data.index[-1] + pd.Timedelta(hours=1), end=end_date, interval='1h')

        # append the new data to the existing DataFrame
        data = pd.concat([data, new_data])

        # save the updated data as a CSV file
        data.to_csv(filename)

    # wait for 30 seconds before checking again
    time.sleep(4)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


KeyboardInterrupt: 

In [6]:
print(data)

                                 Open        High         Low       Close  \
Datetime                                                                    
2023-01-03 09:30:00-05:00  130.279999  130.899994  125.230003  125.459999   
2023-01-03 10:30:00-05:00  125.459999  125.870003  124.730003  125.345001   
2023-01-03 11:30:00-05:00  125.350197  125.370003  124.349098  124.660004   
2023-01-03 12:30:00-05:00  124.660004  124.940002  124.190002  124.649902   
2023-01-03 13:30:00-05:00  124.669899  125.000000  124.190002  124.570000   
...                               ...         ...         ...         ...   
2023-04-05 11:30:00-04:00  163.279999  163.520004  162.110001  162.229996   
2023-04-05 12:30:00-04:00  162.199997  162.970001  161.800003  162.869995   
2023-04-05 13:30:00-04:00  162.869995  163.119202  162.649994  162.697601   
2023-04-05 14:30:00-04:00  162.690002  163.399994  162.669998  163.255005   
2023-04-05 15:30:00-04:00  163.259995  163.949997  163.210007  163.759995   