## 1. API Pull

In [2]:
import os
from dotenv import load_dotenv
import requests
import pandas as pd
from datetime import datetime

load_dotenv()
API_KEY = os.getenv('ALPHA_VANTAGE_KEY')

params = {
    'function': 'TIME_SERIES_DAILY',
    'symbol': 'MSFT',
    'outputsize': 'compact',
    'apikey': API_KEY
}

response = requests.get('https://www.alphavantage.co/query', params=params)
data = response.json()
data

df_api = pd.DataFrame(data['Time Series (Daily)']).T
df_api = df_api.rename(columns={
    '1. open': 'open',
    '2. high': 'high',
    '3. low': 'low',
    '4. close': 'close',
    '5. volume': 'volume'
}).astype(float)

assert not df_api.empty, "API returned empty dataset - check symbol or API connectivity"
assert {'open', 'high', 'low', 'close', 'volume'}.issubset(df_api.columns)

timestamp = datetime.now().strftime("%Y%m%d-%H%M")
df_api.to_csv(f"../data/raw/api_alphavantage_MSFT_{timestamp}.csv")

## 2. Scrape a Small Table

In [12]:
from bs4 import BeautifulSoup

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
    'Referer': 'https://www.google.com/'
}

url = "https://www.macrotrends.net/stocks/charts/MSFT/microsoft/revenue"

# Send request
response = requests.get(url, headers=headers)
response.raise_for_status()  

soup = BeautifulSoup(response.text, 'html.parser')
table = soup.find('table', {'class': 'historical_data_table'})

df = pd.read_html(str(table))[0]
df
df.columns = ['Year', 'Revenue'] 

def clean_currency(x):
    if isinstance(x, str):
        x = x.replace('$', '').replace('B', '').replace(',', '')
        return float(x) if x else None  
    return x

df['Revenue'] = df['Revenue'].apply(clean_currency)

assert len(df) >= 5, f"Minimum 5 rows required, got {len(df)}"
assert 'Revenue' in df.columns, "Missing Revenue column"

timestamp = datetime.now().strftime("%Y%m%d-%H%M")
output_path = f"../data/raw/scrape_macrotrends_MSFT_revenue_{timestamp}.csv"
df.to_csv(output_path, index=False)
print(f"Data successfully saved to {output_path}")

Data successfully saved to ../data/raw/scrape_macrotrends_MSFT_revenue_20250815-2355.csv
