# Downloading Data from Yahoo Finance

Before we can do anything interesting, we need to get data. Downloading it ourselves can be a pain so let's automated it!

In [239]:
from pprint import pprint
import numpy as np
# Let's import the yahoo finance API
import yfinance as yf

def download_data(ticker, start_date, end_date):
    """
    Download historical price data from Yahoo Finance.

    Parameters
    ----------
    ticker : str
        Stock ticker symbol (e.g. 'AAPL')
    start_date : str
        Start date in 'YYYY-MM-DD'
    end_date : str
        End date in 'YYYY-MM-DD'

    Returns
    -------
    pandas.DataFrame
        Historical price data
    """
    df = yf.download(
        ticker,
        start=start_date,
        end=end_date,
        progress=False
    )
    return df

# And a little test with Walmart
pprint(download_data('WMT', '2016-02-10', '2016-02-16'))


Price           Close       High        Low       Open    Volume
Ticker            WMT        WMT        WMT        WMT       WMT
Date                                                            
2016-02-10  18.277872  18.500129  18.238977  18.389001  29127900
2016-02-11  18.147295  18.269537  17.997271  18.063948  33560100
2016-02-12  18.386217  18.405664  18.022271  18.202856  29086500


  df = yf.download(


# Dealing with Multiple Stocks

The previous code will only allow us to query on a single stock, so let's wrap it so we can iterate over multiple stocks.

In [240]:
def download_from_yf(tickers, start_date, end_date):
    data = {}
    for ticker in tickers:
        data[ticker] = download_data(ticker, start_date, end_date)
    return data

In [241]:
tickers = ['WMT', 'XOM']

import datetime
from dateutil.relativedelta import relativedelta

end_date = datetime.date.today()
start_date = end_date - relativedelta(months=1)

s_date = start_date.isoformat()
e_date = end_date.isoformat()

print("Fetch stock data for %s from %s to %s" % (", ".join(tickers), s_date, e_date))
data = download_from_yf(tickers, s_date, e_date)


  df = yf.download(
  df = yf.download(


Fetch stock data for WMT, XOM from 2025-11-17 to 2025-12-17


But the Yahoo Finance API wants the dates as strings, so we need to convert from a datetime.date object to a string!
* The string is formatted as YYYY-MM-DD (which also happens to be the default format Python uses)

In [242]:
date_format = '%Y-%m-%d' # YYYY-MM-DD in Python
s_date = start_date.strftime(date_format)

# We could have alternatively cast the dates into a
# string since %Y-%m-%d is the the default date format
# s_date = str(start_date)

e_date = end_date.strftime(date_format)

In [243]:
data = download_from_yf(tickers, s_date, e_date)
from pprint import pprint
pprint(data)

  df = yf.download(
  df = yf.download(


{'WMT': Price            Close        High         Low        Open    Volume
Ticker             WMT         WMT         WMT         WMT       WMT
Date                                                                
2025-11-17  102.740562  103.109812  101.892293  102.371320  15289400
2025-11-18  101.183739  103.129769  101.173757  102.770508  17604500
2025-11-19  100.405327  101.503088   99.477218  101.373355  22157000
2025-11-20  106.892105  107.690481  102.471117  103.728556  50375900
2025-11-21  105.105743  107.929988  104.506966  107.760336  41420800
2025-11-24  103.848305  106.053808  103.548920  105.145663  42519200
2025-11-25  106.782326  107.201469  104.017964  104.397188  20212900
2025-11-26  108.878059  109.367060  106.951985  106.981923  17783300
2025-11-28  110.285194  110.474803  108.778264  109.077657   9846500
2025-12-01  111.303116  111.532651  110.135498  110.285194  17236800
2025-12-02  112.181328  112.420837  110.564621  111.243238  18711800
2025-12-03  114.177261  11

# What if we want weekly data?

Unfortunately, YF doesn't seem to provide weekly and monthly data through their API. 

We need to generate this data ourselves

* Open Price is the opening price we observe on the Monday
* Similarly, closing price is determined by the closing price on Friday
 * Actually this isn't completely true due to fixed intervals, holidays, etc... but we will leave those details for the implementation
* Adjusted Close works the same as close
* Low is the lowest 'low' in a week
* High is the highest 'high' in the week
* Volume is average volume over the week
 * $Volume = \frac{1}{N} \sum_{i=1}^{N} Volume_i$
 * Yahoo Finance actually truncates the number since they track volume to the hundreds place

In [244]:
import datetime
import numpy as np
import pandas as pd
import yfinance as yf

date_format = "%Y-%m-%d"

def download_data(ticker, start_date, end_date):
    # Importante: auto_adjust=False para mantener Adj Close (si no, no existe)
    df = yf.download(ticker, start=start_date, end=end_date, progress=False, auto_adjust=False)

    # Por si yfinance devuelve columnas MultiIndex en algunos casos
    if isinstance(df.columns, pd.MultiIndex):
        if ticker in df.columns.get_level_values(-1):
            df = df.xs(ticker, axis=1, level=-1)
        elif ticker in df.columns.get_level_values(0):
            df = df.xs(ticker, axis=1, level=0)

    df = df.reset_index()

    # Normalizar nombre de columna
    if "Adj Close" in df.columns:
        df = df.rename(columns={"Adj Close": "Adj_Close"})
    if "Adj_Close" not in df.columns:
        df["Adj_Close"] = df["Close"]

    rows = []
    for _, r in df.iterrows():
        rows.append({
            "Date": r["Date"].strftime(date_format),
            "Open": str(r["Open"]),
            "High": str(r["High"]),
            "Low": str(r["Low"]),
            "Close": str(r["Close"]),
            "Adj_Close": str(r["Adj_Close"]),
            "Volume": str(int(r["Volume"])) if not pd.isna(r["Volume"]) else "0",
            "Symbol": ticker,
        })
    return rows

def download_from_yf(tickers, start_date, end_date):
    return {t: download_data(t, start_date, end_date) for t in tickers}

def convert_to_weekly(data):
    def __convert_to_weekly(rows):
        rows = sorted(rows, key=lambda row: datetime.datetime.strptime(row["Date"], date_format))
        weeks = {}

        for row in rows:
            d = datetime.datetime.strptime(row["Date"], date_format)
            d_start = (d - datetime.timedelta(days=d.weekday())).strftime(date_format)

            if d_start not in weeks:
                weeks[d_start] = dict(row)
                weeks[d_start]["Volume"] = [row["Volume"]]
            else:
                weeks[d_start]["Volume"].append(row["Volume"])
                weeks[d_start]["Close"] = row["Close"]
                weeks[d_start]["Adj_Close"] = row["Adj_Close"]

                if float(weeks[d_start]["High"]) < float(row["High"]):
                    weeks[d_start]["High"] = row["High"]
                if float(weeks[d_start]["Low"]) > float(row["Low"]):
                    weeks[d_start]["Low"] = row["Low"]

        out = []
        for v in weeks.values():
            vols = [int(float(x)) for x in v["Volume"]]   # <- robusto para strings
            v["Volume"] = float(np.mean(vols))            # promedio semanal (como en tu notebook)
            out.append(v)

        return out

    return {ticker: __convert_to_weekly(rows) for ticker, rows in data.items()}


In [245]:
from pprint import pprint

tickers = ["WMT", "XOM"]
s_date = "2016-02-23"
e_date = "2016-03-23"

data = download_from_yf(tickers, s_date, e_date)
weekly_data = convert_to_weekly(data)
pprint(weekly_data)


{'WMT': [{'Adj_Close': '18.4779052734375',
          'Close': '22.170000076293945',
          'Date': '2016-02-23',
          'High': '22.733333587646484',
          'Low': '21.760000228881836',
          'Open': '21.946666717529297',
          'Symbol': 'WMT',
          'Volume': 31716600.0},
         {'Adj_Close': '18.55291175842285',
          'Close': '22.260000228881836',
          'Date': '2016-02-29',
          'High': '22.426666259765625',
          'Low': '21.626667022705078',
          'Open': '22.209999084472656',
          'Symbol': 'WMT',
          'Volume': 30478560.0},
         {'Adj_Close': '18.799415588378906',
          'Close': '22.389999389648438',
          'Date': '2016-03-07',
          'High': '22.82666778564453',
          'Low': '22.09000015258789',
          'Open': '22.206666946411133',
          'Symbol': 'WMT',
          'Volume': 29540880.0},
         {'Adj_Close': '18.737842559814453',
          'Close': '22.316667556762695',
          'Date': '2016-03-1

# Getting monthly data

It's easier than weekly since you don't have to deal with weekdays! I am not going to go into the details...

# Aside on Numerical Analysis & Processing

Python defaults to representing numbers as floating point numbers (floats). This is fine for many applications, but finance is not one of them. Many finance companies choose to work with integers (i.e. MasterCard) and convert 123.45 into 12345.

Floats are great because they can store data in a massive range: $1.175494 × 10^{-38} \leq x \leq 3.402823 × 10^{38}$, but they cannot represent every rational number or even every integer in that range. The easy way to think about it is there are a finite number of representations, which is much smaller than the range. To represent a number, we want to minimize the error between the actual number and it's representation. 

If we start doing lots of math with floating point number, we will start increasing the amount of error!


Look at some of the decimals above. Unfortunately, the Yahoo Finance API uses floats.

In [246]:
import decimal

dec = decimal.Decimal(73.910002)
print(dec)

dec = decimal.Decimal(str(round(73.910002, 2)))
print(dec)


73.9100020000000057507349993102252483367919921875
73.91


# Getting the data into Excel

We might want to leverage excel in our analysis. Fortunately Python has a module for working with excel!

In [247]:
# Let's import it and get started
import xlsxwriter

In [248]:
def write_xlsx(data, output_file):
    header = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Adj_Close']
    # Create a new workbook
    workbook = xlsxwriter.Workbook(output_file)
    # Pick the format that you want Excel to use for dates
    # mm/dd/yy is the default, so let's use that
    excel_date_format = workbook.add_format({'num_format': 'mm/dd/yy'})
    
    def __write_val(sheet, field, row, row_pos, col_pos):
        # A little helper func for dealing with different data types
        if field == 'Date':
            # Write the date to cell (col_pos, row_pos) i.e. (A1)
            sheet.write_datetime(
                row_pos,
                col_pos,
                datetime.datetime.strptime(row['Date'], date_format),
                excel_date_format
            )
        elif field == 'Volume':
            # Volume is an int, so we will write it as such
            sheet.write_number(row_pos, col_pos, int(row[field]))
        else:
            sheet.write_number(
                row_pos,
                col_pos,
                # From the aside
                decimal.Decimal(str(round(float(row[field]), 2)))
            )
    # Add the worksheets in sorted order
    for ticker in sorted(data.keys()):
        # Just name the worksheet after the stock
        worksheet = workbook.add_worksheet(ticker)
        row_pos = 0
        col_pos = 0
        # Write the header to the sheet
        for field in header:
            worksheet.write(row_pos, col_pos, field)
            col_pos += 1
        row_pos += 1
        
        # Sort the dates in descending order
        for row in sorted(data[ticker],
            key=lambda r: datetime.datetime.strptime(r['Date'], date_format)):
                col_pos = 0
                for field in header:
                    __write_val(worksheet, field, row, row_pos, col_pos)
                    col_pos += 1
                row_pos += 1

    # And finally close the workbook
    workbook.close()
        
    

In [249]:
# And let's try it
out_file = 'ticker_prices.xlsx'
write_xlsx(weekly_data, out_file)
# Should now have ticker_prices.xlsx in your working directory
import os
# Just a little check to see if the file is there
assert(out_file in os.listdir('.'))

# Conclusion

Open ticker_prices.xlsx up in excel and see the results. Feel free to tweak this notebook and experiment with it!

If we wanted to build a portfolio testing (backtesting) framework, we don't want to have to keep querying for data.
* Solution: Build a transparent wrapper around our queries to fetch them from a local cache if they exist, otherwise reach out to Yahoo Finance
 * A bit of overhead when populating the database, but much faster going forward