# 📊 Financial Data Scraper
This notebook scrapes financial statements from Macrotrends for selected companies across three sectors:
- Apple (Tech)
- JPMorgan Chase (Finance)
- Johnson & Johnson (Healthcare)


In [1]:
# Notebook 1: Data Acquisition

import yfinance as yf
import pandas as pd
import os

# List of companies with ticker symbols
companies = [
    {"ticker": "AAPL", "name": "Apple"},
    {"ticker": "JPM", "name": "JPMorgan Chase"},
    {"ticker": "JNJ", "name": "Johnson & Johnson"}
]

# Create raw data directory if it doesn't exist
os.makedirs("data/raw", exist_ok=True)

def save_df_to_csv(df: pd.DataFrame, company: str, statement: str):
    filename = f"data/raw/{company}_{statement}.csv"
    df.to_csv(filename)
    print(f"Saved {filename}")

for c in companies:
    ticker = yf.Ticker(c['ticker'])

    # Income Statement
    try:
        income = ticker.financials.transpose()
        save_df_to_csv(income, c['ticker'], 'income_statement')
    except Exception as e:
        print(f"Failed to get income statement for {c['ticker']}: {e}")

    # Balance Sheet
    try:
        balance = ticker.balance_sheet.transpose()
        save_df_to_csv(balance, c['ticker'], 'balance_sheet')
    except Exception as e:
        print(f"Failed to get balance sheet for {c['ticker']}: {e}")

    # Cash Flow
    try:
        cashflow = ticker.cashflow.transpose()
        save_df_to_csv(cashflow, c['ticker'], 'cash_flow')
    except Exception as e:
        print(f"Failed to get cash flow for {c['ticker']}: {e}")



Saved data/raw/AAPL_income_statement.csv
Saved data/raw/AAPL_balance_sheet.csv
Saved data/raw/AAPL_cash_flow.csv
Saved data/raw/JPM_income_statement.csv
Saved data/raw/JPM_balance_sheet.csv
Saved data/raw/JPM_cash_flow.csv
Saved data/raw/JNJ_income_statement.csv
Saved data/raw/JNJ_balance_sheet.csv
Saved data/raw/JNJ_cash_flow.csv
