# yFinance
Links for References used for this excercise: 
1. https://github.com/ranaroussi/yfinance
2. https://www.geeksforgeeks.org/python/multithreaded-download-of-yahoo-stock-history-with-python-yfinance/
3. For handling dates and time: 
    *  https://docs.python.org/3/library/datetime.html for calling date and time info
    *  https://stackoverflow.com/questions/32490629/getting-todays-date-in-yyyy-mm-dd-in-python for formatting the date info 
    *  https://www.geeksforgeeks.org/python/python-datetime-strptime-function/ for strptime changing Date and time into a string to be used in the filename
4. Saving dataframe to csv files https://www.datacamp.com/tutorial/save-as-csv-pandas-dataframe
5. Info about saving files into different directories in codespaces https://docs.github.com/en/codespaces/about-codespaces/deep-dive
6. Translate function for stripping out multiple characters from string filename https://www.geeksforgeeks.org/python/python-replace-multiple-characters-at-once/

In [16]:
# Problem 1: Data from yfinance

# import required modules 
import yfinance as yf
import time
import datetime
import pandas as pd
from pathlib import Path
import os

# define program start time and dates for use in calculations 
start= time.time()
today = datetime.date.today()
fivedaysago = today - datetime.timedelta(days=5)

# Convert dates to string type data as is required by the yf.download function
strtoday = str(today)
strfivedaysago =str(fivedaysago)

#Checking the formats of the dates are correct
    #print (type(strtoday))
    #print (type(strfivedaysago))
print (f"Today's date is ",today)
print (f"Five days ago was ",fivedaysago)

# define a list of stocks to track
ticker_list =['META','AAPL','AMZN','NFLX','GOOG']

# Define path to root file
datadir = "./data/"

# Define get_data() function to execute this section of code on call
def get_data():

    # Define the functon for collecting the data we want.
        # Start date will be 5 days ago as per instructions. 
        # End date will be the date the script is ran on. 
        # Tickers list is the list of stocks to watch. 
        # Threads allows for the multithread download of stock data - this reduces the time taken to run the script.  
        # group_by allows for the data to be organised by different columns. 

    # Execute the yf.download operation and populating the data into a dataframe.  
    data =yf.download( 
        start= strfivedaysago,
        end= strtoday,
        tickers = ticker_list,
        threads=True,
        group_by='ticker',
        auto_adjust = True,
    )

    # Transpose the data to allow data to be read from dataframe. 
    data = data.T 

    # Read out each line into the dataframe one line at a time
    for t in ticker_list: 
        print(t)   
        print(data.loc[t])
        print("\n")

    # Get date and time information for file name into the correct format
    time_data = str(datetime.datetime.now())
    format_data = "%Y-%m-%d %H:%M:%S.%f"
    filename = datetime.datetime.strptime(time_data, format_data)

    # Convert the filename into a string format
    strfilename = str(filename)

    # remove the : and - characters from the time segment
    # First we define out list of charachters to replace in a matched set called replacements. 
    replacements = str.maketrans({":": "", "-":""})

    # Then we execute the translate function
    strfilename = strfilename.translate(replacements)

    # Remove the microseconds from the end of the string
    strfilename = strfilename[:-7]
    print (strfilename)


    print (f"path is ", datadir)
    # comment

    # Write data to csv file. 
    data.to_csv(datadir+strfilename+".csv", sep=',')

# run the get_data function
get_data()

# This segment allows for monitoring the time taken to complete the execution of the program. 
print ('The program takes ', time.time()-start,'seconds.')

[*********************100%***********************]  5 of 5 completed

Today's date is  2025-10-10
Five days ago was  2025-10-05
META
Date      2025-10-06    2025-10-07    2025-10-08    2025-10-09
Price                                                         
Open    7.051900e+02  7.177200e+02  7.134500e+02  7.182800e+02
High    7.168800e+02  7.185000e+02  7.196500e+02  7.335100e+02
Low     6.905100e+02  7.057500e+02  7.078100e+02  7.124400e+02
Close   7.156600e+02  7.130800e+02  7.178400e+02  7.335100e+02
Volume  2.165470e+07  1.206290e+07  1.079060e+07  1.271720e+07


AAPL
Date      2025-10-06    2025-10-07    2025-10-08    2025-10-09
Price                                                         
Open    2.579900e+02  2.568100e+02  2.565200e+02  2.578100e+02
High    2.590700e+02  2.574000e+02  2.585200e+02  2.580000e+02
Low     2.550500e+02  2.554300e+02  2.561100e+02  2.531400e+02
Close   2.566900e+02  2.564800e+02  2.580600e+02  2.540400e+02
Volume  4.466410e+07  3.195580e+07  3.649690e+07  3.832200e+07


AMZN
Date      2025-10-06    2025-10-07    202




In [24]:
# Problem 2: Plotting Data
# https://stackoverflow.com/questions/43074685/find-file-in-directory-with-the-highest-number-in-the-filename for filename with highest number, as this will be the latest created file

import re

# list all files in the directory data 
list_of_files = os.listdir(datadir)
print (list_of_files)

def extract_number(f):
    s = re.findall("\d+$",f)
    return (int(s[0]) if s else -1,f)

print(max(list_of_files,key=extract_number))


#recentfile = max(sorted(list_of_files, key=os.path.getctime))
#print (recentfile)
#df = pd.read_csv(datadir+recentfile+".csv")


# pull the closing prices from the main dataframe and put them in their own dataframe
#ax1=data.plot(kind = 'line', x = 'Date', y = 'Close', c = 'red' , label = 'Close')
#pt.show()

# Create the plot_data() function
#def plot_data():
    #function goes here.
    #gibberish

#plot_data()


['20251010 210410.csv', '20251010 210356.csv', '20251010 210256.csv', '20251009 222639.csv', '20251010 145322.csv', '20251010 145916.csv', '20251010 150415.csv', '2025-10-09 221400.csv', '20251010 211004.csv', '20251010 144331.csv', '20251009 222548.csv', '20251010 144404.csv', '20251010 145034.csv', '20251010 150510.csv', '20251010 211542.csv', '20251010 143014.csv']
20251010 211542.csv


  s = re.findall("\d+$",f)


## End