# yFinance
Links for References used for this excercise: 
1. https://github.com/ranaroussi/yfinance
2. https://www.geeksforgeeks.org/python/multithreaded-download-of-yahoo-stock-history-with-python-yfinance/
3. For handling dates and time: 
    *  https://docs.python.org/3/library/datetime.html for calling date and time info
    *  https://stackoverflow.com/questions/32490629/getting-todays-date-in-yyyy-mm-dd-in-python for formatting the date info 
    *  https://www.geeksforgeeks.org/python/python-datetime-strptime-function/ for strptime changing Date and time into a string to be used in the filename
4. Saving dataframe to csv files https://www.datacamp.com/tutorial/save-as-csv-pandas-dataframe
5. Info about saving files into different directories in codespaces https://docs.github.com/en/codespaces/about-codespaces/deep-dive
6. Translate function for stripping out multiple characters from string filename https://www.geeksforgeeks.org/python/python-replace-multiple-characters-at-once/

In [None]:
# Define a list of stocks to track
ticker_list =['META','AAPL','AMZN','NFLX','GOOG']

# Define path to root file
datadir = "./data/"# Problem 1: Data from yfinance

# import required modules 
import yfinance as yf
import time
import datetime
import pandas as pd
from pathlib import Path
import os

In [None]:
# define program start time and dates for use in calculations 
start= time.time()
today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1)
fivedaysago = today - datetime.timedelta(days=5)

# Convert dates to string type data as is required by the yf.download function
strtoday = str(today)
strfivedaysago =str(fivedaysago)

#Checking the formats of the dates are correct
    #print (type(strtoday))
    #print (type(strfivedaysago))
print (f"Today's date is ",today)
print (f"Five days ago was ",fivedaysago)

Today's date is  2025-12-07
Five days ago was  2025-12-02


In [None]:
# Define get_data() function to execute this section of code on call
def get_data():

    # Define the functon for collecting the data we want.
        # Start date will be 5 days ago as per instructions. 
        # End date will be the date the script is ran on. 
        # Tickers list is the list of stocks to watch. 
        # Threads allows for the multithread download of stock data - this reduces the time taken to run the script.  
        # group_by allows for the data to be organised by different columns. 

    # Execute the yf.download operation and populating the data into a dataframe.  
    data =yf.download( 
        start= strfivedaysago,
        end= strtoday,
        tickers = ticker_list,
        threads=True,
        group_by='ticker',
        auto_adjust = True,
    )

    # Transpose the data to allow data to be read from dataframe. 
    data = data.T 

    # Read out each line into the dataframe one line at a time
    for t in ticker_list: 
        print(t)   
        print(data.loc[t])
        print("\n")

    # Get date and time information for file name into the correct format
    time_data = str(datetime.datetime.now())
    format_data = "%Y-%m-%d %H:%M:%S.%f"
    filename = datetime.datetime.strptime(time_data, format_data)

    # Convert the filename into a string format
    strfilename = str(filename)

    # remove the : and - characters from the time segment
    # First we define out list of charachters to replace in a matched set called replacements. 
    replacements = str.maketrans({":": "", "-":""})

    # Then we execute the translate function
    strfilename = strfilename.translate(replacements)

    # Remove the microseconds from the end of the string
    strfilename = strfilename[:-7]
    print (strfilename)


    print (f"path is ", datadir)
    # comment

    # Write data to csv file. 
    data.to_csv(datadir+strfilename+".csv", sep=',')

In [None]:
# run the get_data function
get_data()

[*********************100%***********************]  5 of 5 completed


META
Date      2025-12-02    2025-12-03    2025-12-04    2025-12-05
Price                                                         
Open    6.423400e+02  6.444100e+02  6.760000e+02  6.640000e+02
High    6.478700e+02  6.488500e+02  6.761000e+02  6.746900e+02
Low     6.380700e+02  6.375500e+02  6.600500e+02  6.623900e+02
Close   6.471000e+02  6.396000e+02  6.615300e+02  6.734200e+02
Volume  1.164090e+07  1.113430e+07  2.987460e+07  2.116690e+07


AAPL
Date      2025-12-02    2025-12-03    2025-12-04    2025-12-05
Price                                                         
Open    2.830000e+02  2.862000e+02  2.841000e+02  2.805400e+02
High    2.874000e+02  2.886200e+02  2.847300e+02  2.811400e+02
Low     2.826300e+02  2.833000e+02  2.785900e+02  2.780500e+02
Close   2.861900e+02  2.841500e+02  2.807000e+02  2.787800e+02
Volume  5.366950e+07  4.353870e+07  4.398910e+07  4.724400e+07


AMZN
Date      2025-12-02    2025-12-03    2025-12-04    2025-12-05
Price                               

In [5]:
# This segment allows for monitoring the time taken to complete the execution of the program. 
print ('The program takes ', time.time()-start,'seconds.')

The program takes  0.46162986755371094 seconds.


## Problem 2: Plotting Data

In [11]:
# Problem 2: Plotting Data
# https://stackoverflow.com/questions/43074685/find-file-in-directory-with-the-highest-number-in-the-filename for filename with highest number, as this will be the latest created file

# Import the Rational Expressions module (re). 
import re
import matplotlib 

# List all files in the directory data 
list_of_files = os.listdir(datadir)
#print (list_of_files)

# list newest file amother way
#os.listdir('./data/')
list_of_files.sort(reverse = True)

#show this list
list_of_files

['20251207 222436.csv',
 '20251207 222244.csv',
 '20251207 222059.csv',
 '20251207 220622.csv',
 '20251207 220332.csv',
 '20251207 220307.csv',
 '20251207 220132.csv',
 '20251207 215510.csv',
 '20251207 214855.csv',
 '20251207 214845.csv',
 '20251207 213858.csv',
 '20251207 213657.csv',
 '20251207 212741.csv',
 '20251206 215647.csv',
 '20251126 211628.csv',
 '20251011 125240.csv',
 '20251010 221236.csv',
 '20251010 220006.csv',
 '20251010 215932.csv',
 '20251010 214240.csv',
 '20251010 213904.csv',
 '20251010 213843.csv',
 '20251010 211542.csv',
 '20251010 211004.csv',
 '20251010 210410.csv',
 '20251010 210356.csv',
 '20251010 210256.csv',
 '20251010 150510.csv',
 '20251010 150415.csv',
 '20251010 145916.csv',
 '20251010 145322.csv',
 '20251010 145034.csv',
 '20251010 144404.csv',
 '20251010 144331.csv',
 '20251010 143014.csv',
 '20251009 222639.csv',
 '20251009 222548.csv']

In [12]:
# show the latest file
list_of_files[0]

'20251207 222436.csv'

In [13]:
# Determine which filename has the highest number (N.B. this only works if there are no files with text names in the folder. - 
# See if i can implement a way to only select fro csv files)
def extract_number(f):
    s = re.findall("\d+",f)
    return (int(s[0]) if s else -1,f)

recentfile = (max(list_of_files,key=extract_number))
recentfile = str(recentfile)
print (recentfile)



20251207 222436.csv


  s = re.findall("\d+",f)


In [14]:
# create a dataframe from the file selected as most recent
df = pd.read_csv(datadir+recentfile)
print (df)

# Create a dataframe that only contains the closing prices for each stock for each day
closing_prices = (df['Price']== "Close")
close_df = df.loc[closing_prices]
print (close_df)




   Ticker   Price    2025-12-02    2025-12-03    2025-12-04    2025-12-05
0    GOOG    Open  3.169000e+02  3.160300e+02  3.230450e+02  3.200000e+02
1    GOOG    High  3.186000e+02  3.220400e+02  3.230990e+02  3.238300e+02
2    GOOG     Low  3.142200e+02  3.142800e+02  3.155900e+02  3.199800e+02
3    GOOG   Close  3.160200e+02  3.206200e+02  3.183900e+02  3.220900e+02
4    GOOG  Volume  2.466820e+07  3.228510e+07  2.083160e+07  1.569370e+07
5    META    Open  6.423400e+02  6.444100e+02  6.760000e+02  6.640000e+02
6    META    High  6.478700e+02  6.488500e+02  6.761000e+02  6.746900e+02
7    META     Low  6.380700e+02  6.375500e+02  6.600500e+02  6.623900e+02
8    META   Close  6.471000e+02  6.396000e+02  6.615300e+02  6.734200e+02
9    META  Volume  1.164090e+07  1.113430e+07  2.987460e+07  2.116690e+07
10   AMZN    Open  2.350100e+02  2.333500e+02  2.327700e+02  2.303200e+02
11   AMZN    High  2.389700e+02  2.333800e+02  2.335000e+02  2.312400e+02
12   AMZN     Low  2.335500e+02  2.306

In [15]:
#Read a unique label for each unique stock label from column labelled 'Ticker'
unique_stock = df['Ticker'].unique()
print(unique_stock)

df1 = df[df['Ticker'] == unique_stock[0]] #Netflix
df2 = df[df['Ticker'] == unique_stock[1]] #Apple
df3 = df[df['Ticker'] == unique_stock[2]] #Amazon
df4 = df[df['Ticker'] == unique_stock[3]] #Meta
df5 = df[df['Ticker'] == unique_stock[4]] #Google

# print each individual stock
print (df1)
print (df2)
print (df3)
print (df4)
print (df5)


#date = yesterday
#print (date)

#plot
ax = matplotlib.pyplot.gca()
close_df.plot( x = '2025-10-09', y=['NFLX', 'AAPL', 'AMZN', 'META', 'GOOG'] , ax = ax )


['GOOG' 'META' 'AMZN' 'NFLX' 'AAPL']
  Ticker   Price    2025-12-02    2025-12-03    2025-12-04    2025-12-05
0   GOOG    Open  3.169000e+02  3.160300e+02  3.230450e+02  3.200000e+02
1   GOOG    High  3.186000e+02  3.220400e+02  3.230990e+02  3.238300e+02
2   GOOG     Low  3.142200e+02  3.142800e+02  3.155900e+02  3.199800e+02
3   GOOG   Close  3.160200e+02  3.206200e+02  3.183900e+02  3.220900e+02
4   GOOG  Volume  2.466820e+07  3.228510e+07  2.083160e+07  1.569370e+07
  Ticker   Price    2025-12-02    2025-12-03    2025-12-04    2025-12-05
5   META    Open  6.423400e+02  6.444100e+02  6.760000e+02  6.640000e+02
6   META    High  6.478700e+02  6.488500e+02  6.761000e+02  6.746900e+02
7   META     Low  6.380700e+02  6.375500e+02  6.600500e+02  6.623900e+02
8   META   Close  6.471000e+02  6.396000e+02  6.615300e+02  6.734200e+02
9   META  Volume  1.164090e+07  1.113430e+07  2.987460e+07  2.116690e+07
   Ticker   Price    2025-12-02    2025-12-03    2025-12-04    2025-12-05
10   AMZN    

AttributeError: module 'matplotlib' has no attribute 'pyplot'

In [None]:
# define a plot for the closing price over the last 5 days
#ax=close_df.plot(kind = 'line', x = '2025-10-09', y = 'NFLX', marker = '.', c = 'red' , label = 'Netflix')
#pt.show()

# Create the plot_data() function
#def plot_data():
    #function goes here.
    #gibberish

#plot_data()

## End