## Web Scraping 
Extracting Data from weather.gov and using 

In [1]:
from bs4 import BeautifulSoup
import requests

In [2]:
#extract weather data
page = requests.get("https://forecast.weather.gov/MapClick.php?lat=37.7772&lon=-122.4168#.WtBou9PwaCc")
soup = BeautifulSoup(page.content, 'html.parser')
soup

<!DOCTYPE html>
<html class="no-js">
<head>
<!-- Meta -->
<meta content="width=device-width" name="viewport"/>
<link href="http://purl.org/dc/elements/1.1/" rel="schema.DC"/>
<title>National Weather Service</title>
<meta content="National Weather Service" name="DC.title"/>
<meta content="NOAA National Weather Service" name="DC.description"/>
<meta content="US Department of Commerce, NOAA, National Weather Service" name="DC.creator"/>
<meta content="2025-03-27T06:40:21+00:00" name="DC.date.created" scheme="ISO8601"/>
<meta content="EN-US" name="DC.language" scheme="DCTERMS.RFC1766"/>
<meta content="weather" name="DC.keywords"/>
<meta content="NOAA's National Weather Service" name="DC.publisher"/>
<meta content="National Weather Service" name="DC.contributor"/>
<meta content="/disclaimer.php" name="DC.rights"/>
<meta content="General" name="rating"/>
<meta content="index,follow" name="robots"/>
<!-- Icons -->
<link href="/build/images/favicon.eab6deff.ico" rel="shortcut icon" type="image

In [3]:
#get the class that contain our data
seven_day = soup.find(id="seven-day-forecast")
forecast_items = seven_day.find_all(class_="tombstone-container")
tonight = forecast_items[0]
print(tonight.prettify())

<div class="tombstone-container">
 <p class="period-name">
  Tonight
 </p>
 <p>
  <img alt="Tonight: A 30 percent chance of rain.  Mostly cloudy, with a low around 54. South southwest wind 7 to 10 mph, with gusts as high as 18 mph.  New precipitation amounts of less than a tenth of an inch possible. " class="forecast-icon" src="newimages/medium/nra30.png" title="Tonight: A 30 percent chance of rain.  Mostly cloudy, with a low around 54. South southwest wind 7 to 10 mph, with gusts as high as 18 mph.  New precipitation amounts of less than a tenth of an inch possible. "/>
 </p>
 <p class="temp temp-low">
  Low: 54 °F
 </p>
 <p class="short-desc">
  Chance Rain
 </p>
</div>


In [4]:
#extract information from the page
period = tonight.find(class_="period-name").get_text()
short_desc = tonight.find(class_="short-desc").get_text()
temp = tonight.find(class_="temp").get_text()
print(period)
print(short_desc)
print(temp)

Tonight
Chance Rain
Low: 54 °F


In [5]:
#get title from the img tage
img = tonight.find("img")
desc = img['title']
print(desc)

Tonight: A 30 percent chance of rain.  Mostly cloudy, with a low around 54. South southwest wind 7 to 10 mph, with gusts as high as 18 mph.  New precipitation amounts of less than a tenth of an inch possible. 


In [6]:
#get all information from the webpage and do a loop
period_tags = seven_day.select(".tombstone-container .period-name")
periods = [pt.get_text() for pt in period_tags]
periods

['Tonight',
 'Thursday',
 'Thursday Night',
 'Friday',
 'Friday Night',
 'Saturday',
 'Saturday Night',
 'Sunday',
 'Sunday Night']

In [7]:
short_descs = [sd.get_text() for sd in seven_day.select(".tombstone-container .short-desc")]
temps = [t.get_text() for t in seven_day.select(".tombstone-container .temp")]
descs = [d["title"] for d in seven_day.select(".tombstone-container img")]

print(short_descs)
print(temps)
print(descs)

['Chance Rain', 'Chance Rain', 'Chance Rain', 'Chance Rainthen MostlySunny', 'Partly Cloudy', 'Mostly Sunny', 'Slight ChanceRain', 'Rain', 'Chance Rain']
['Low: 54 °F', 'High: 63 °F', 'Low: 52 °F', 'High: 61 °F', 'Low: 48 °F', 'High: 61 °F', 'Low: 48 °F', 'High: 61 °F', 'Low: 52 °F']
['Tonight: A 30 percent chance of rain.  Mostly cloudy, with a low around 54. South southwest wind 7 to 10 mph, with gusts as high as 18 mph.  New precipitation amounts of less than a tenth of an inch possible. ', 'Thursday: A 50 percent chance of rain.  Partly sunny, with a high near 63. South southwest wind 7 to 14 mph, with gusts as high as 22 mph.  New precipitation amounts of less than a tenth of an inch possible. ', 'Thursday Night: A 50 percent chance of rain.  Mostly cloudy, with a low around 52. South southwest wind 7 to 10 mph.  New precipitation amounts of less than a tenth of an inch possible. ', 'Friday: A 30 percent chance of rain before 11am.  Mostly sunny, with a high near 61. West wind 6 t

In [8]:
#combine tha data into a pandas dataframe
import pandas as pd
weather = pd.DataFrame({
    "period": periods,
    "short_desc": short_desc,
    "temp" : temps,
    "desc" : descs
})
weather

Unnamed: 0,period,short_desc,temp,desc
0,Tonight,Chance Rain,Low: 54 °F,Tonight: A 30 percent chance of rain. Mostly ...
1,Thursday,Chance Rain,High: 63 °F,Thursday: A 50 percent chance of rain. Partly...
2,Thursday Night,Chance Rain,Low: 52 °F,Thursday Night: A 50 percent chance of rain. ...
3,Friday,Chance Rain,High: 61 °F,Friday: A 30 percent chance of rain before 11a...
4,Friday Night,Chance Rain,Low: 48 °F,"Friday Night: Partly cloudy, with a low around..."
5,Saturday,Chance Rain,High: 61 °F,"Saturday: Mostly sunny, with a high near 61."
6,Saturday Night,Chance Rain,Low: 48 °F,Saturday Night: A 20 percent chance of rain af...
7,Sunday,Chance Rain,High: 61 °F,"Sunday: Rain. Mostly cloudy, with a high near..."
8,Sunday Night,Chance Rain,Low: 52 °F,Sunday Night: A chance of rain. Mostly cloudy...


In [9]:
#get the temp numbers
temp_nums = weather["temp"].str.extract("(?P<temp_num>\d+)", expand=False)
print(temp_nums)

0    54
1    63
2    52
3    61
4    48
5    61
6    48
7    61
8    52
Name: temp_num, dtype: object


In [10]:
#find the mean
weather["temp_num"] = temp_nums.astype('int')
weather["temp_num"].mean()

55.55555555555556

In [11]:
#select rows that happen at nite
is_night = weather["temp"].str.contains("Low")
weather["is_night"] = is_night
print(is_night)

0     True
1    False
2     True
3    False
4     True
5    False
6     True
7    False
8     True
Name: temp, dtype: bool


In [12]:
weather[is_night]

Unnamed: 0,period,short_desc,temp,desc,temp_num,is_night
0,Tonight,Chance Rain,Low: 54 °F,Tonight: A 30 percent chance of rain. Mostly ...,54,True
2,Thursday Night,Chance Rain,Low: 52 °F,Thursday Night: A 50 percent chance of rain. ...,52,True
4,Friday Night,Chance Rain,Low: 48 °F,"Friday Night: Partly cloudy, with a low around...",48,True
6,Saturday Night,Chance Rain,Low: 48 °F,Saturday Night: A 20 percent chance of rain af...,48,True
8,Sunday Night,Chance Rain,Low: 52 °F,Sunday Night: A chance of rain. Mostly cloudy...,52,True


## Using Beautiful Soup to Extract data from Yahoo finance

In [13]:
import requests 
from bs4 import BeautifulSoup

url = "https://finance.yahoo.com/quote/AAPL"

headers = {"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) ApplWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"}
response = requests.get(url, headers = headers)

In [15]:
soup = BeautifulSoup(response.text, 'html.parser')

In [17]:
soup

<!DOCTYPE html>

<html class="desktop neo-green dock-upscale" data-color-scheme="auto" data-color-theme-enabled="true" lang="en-US" theme="auto">
<head>
<meta charset="utf-8"/>
<meta content="guce.yahoo.com" name="oath:guce:consent-host"/>
            function _nimbusSendEVLoadEvent() {
                if (_nimbusEvLoad._player){
                    window.finNeoEVReady = Date.now();
                    window.dispatchEvent(new CustomEvent('NIMBUS_EV_READY',{detail: {}}));
                }
            }
            function onNimbusEVPlayerReady(){_nimbusEvLoad._player = true;_nimbusSendEVLoadEvent();}</script><script type="module">if(!window.finWebCore){window.finWebCore=function r(e){const{isModern:t=!0,isDev:i=!1,lang:a=s,devAssets:o,prodAssets:r,crumb:n="",features:c=[],strings:d}=e;let f={};const m=a.substring(a.lastIndexOf("-")+1);return{crumb:n,lang:a,region:m,features:c,store:{},intl:m.toLowerCase(),strings:d,assets:i?o:r,addScriptTag(e,s,t){if(!e)return;const i=document.creat

In [20]:
#fetch page content
price_tag = soup.find("fin-streamer", {"data-field": "regularMarketPrice"})
print(price_tag)

<fin-streamer active="" class="last-price yf-pt5nkw" data-field="regularMarketPrice" data-pricehint="2" data-symbol="ES=F" data-trend="none" data-value="5764.75" key="price">5,764.75</fin-streamer>


In [21]:
#get the price
price = price_tag.text if price_tag else "N/A"
print(price)

5,764.75


In [25]:
#extract the company name
name_tag = soup.find("h1", class_="yf-xxbei9")
print(name_tag)

<h1 class="yf-xxbei9">Apple Inc. (AAPL)</h1>


In [26]:
company_name = name_tag.text if name_tag else "N/A"
print(company_name)

Apple Inc. (AAPL)


In [27]:
print(f"Company name: {company_name}"  )
print(f"Stock Price: {price}")

Company name: Apple Inc. (AAPL)
Stock Price: 5,764.75


## Fetch historic data for analysis with yfinance