### Webscrapping with Python
- Goal: Scraping data from a website and converting it into a Pandas DataFrame to train a Machine Learning Model


In [17]:
import os
from dotenv import load_dotenv
import requests
import bs4

In [18]:
load_dotenv()

True

### Scrapping from API

In [19]:
os.environ['OPENWEATHER_API'] = os.getenv('OPENWEATHER_API')

In [24]:
api_key = os.getenv('OPENWEATHER_API')
city = 'karachi'
url = f"https://api.openweathermap.org/data/2.5/weather?appid={api_key}&q={city}&units=metric"

response = requests.get(url=url)

In [25]:
response.status_code

200

In [29]:
json_response = response.json()

In [30]:
json_response

{'coord': {'lon': 67.0822, 'lat': 24.9056},
 'weather': [{'id': 800,
   'main': 'Clear',
   'description': 'clear sky',
   'icon': '01d'}],
 'base': 'stations',
 'main': {'temp': 35.9,
  'feels_like': 33.31,
  'temp_min': 35.9,
  'temp_max': 35.9,
  'pressure': 1011,
  'humidity': 13,
  'sea_level': 1011,
  'grnd_level': 1008},
 'visibility': 6000,
 'wind': {'speed': 5.14, 'deg': 250},
 'clouds': {'all': 0},
 'dt': 1741611963,
 'sys': {'type': 1,
  'id': 7576,
  'country': 'PK',
  'sunrise': 1741571164,
  'sunset': 1741613887},
 'timezone': 18000,
 'id': 1174872,
 'name': 'Karachi',
 'cod': 200}

### Scrapping from Web

In [31]:
url = "http://books.toscrape.com/"

response = requests.get(url=url)

In [34]:
soup = bs4.BeautifulSoup(response.text, 'html.parser')

In [35]:
# Extract book titles and prices
books = soup.find_all('article', class_='product_pod')

In [38]:
book_data = []
for book in books:
    title = book.h3.a['title']
    price = book.find('p', class_='price_color').text.strip()
    book_data.append([title, price])

In [39]:
book_data

[['A Light in the Attic', 'Â£51.77'],
 ['Tipping the Velvet', 'Â£53.74'],
 ['Soumission', 'Â£50.10'],
 ['Sharp Objects', 'Â£47.82'],
 ['Sapiens: A Brief History of Humankind', 'Â£54.23'],
 ['The Requiem Red', 'Â£22.65'],
 ['The Dirty Little Secrets of Getting Your Dream Job', 'Â£33.34'],
 ['The Coming Woman: A Novel Based on the Life of the Infamous Feminist, Victoria Woodhull',
  'Â£17.93'],
 ['The Boys in the Boat: Nine Americans and Their Epic Quest for Gold at the 1936 Berlin Olympics',
  'Â£22.60'],
 ['The Black Maria', 'Â£52.15'],
 ['Starving Hearts (Triangular Trade Trilogy, #1)', 'Â£13.99'],
 ["Shakespeare's Sonnets", 'Â£20.66'],
 ['Set Me Free', 'Â£17.46'],
 ["Scott Pilgrim's Precious Little Life (Scott Pilgrim #1)", 'Â£52.29'],
 ['Rip it Up and Start Again', 'Â£35.02'],
 ['Our Band Could Be Your Life: Scenes from the American Indie Underground, 1981-1991',
  'Â£57.25'],
 ['Olio', 'Â£23.88'],
 ['Mesaerion: The Best Science Fiction Stories 1800-1849', 'Â£37.59'],
 ['Libertarian

In [52]:
import pandas as pd
import numpy as np

In [45]:
df = pd.DataFrame(book_data, columns=['A', 'B'])

In [46]:
df

Unnamed: 0,A,B
0,A Light in the Attic,Â£51.77
1,Tipping the Velvet,Â£53.74
2,Soumission,Â£50.10
3,Sharp Objects,Â£47.82
4,Sapiens: A Brief History of Humankind,Â£54.23
5,The Requiem Red,Â£22.65
6,The Dirty Little Secrets of Getting Your Dream...,Â£33.34
7,The Coming Woman: A Novel Based on the Life of...,Â£17.93
8,The Boys in the Boat: Nine Americans and Their...,Â£22.60
9,The Black Maria,Â£52.15


In [53]:
df['B'] = df.B.str.split('£').str[1].astype(np.float64)

In [54]:
df

Unnamed: 0,A,B
0,A Light in the Attic,51.77
1,Tipping the Velvet,53.74
2,Soumission,50.1
3,Sharp Objects,47.82
4,Sapiens: A Brief History of Humankind,54.23
5,The Requiem Red,22.65
6,The Dirty Little Secrets of Getting Your Dream...,33.34
7,The Coming Woman: A Novel Based on the Life of...,17.93
8,The Boys in the Boat: Nine Americans and Their...,22.6
9,The Black Maria,52.15


In [57]:
df['A'] = df.A.str.lower()

In [58]:
df

Unnamed: 0,A,B
0,a light in the attic,51.77
1,tipping the velvet,53.74
2,soumission,50.1
3,sharp objects,47.82
4,sapiens: a brief history of humankind,54.23
5,the requiem red,22.65
6,the dirty little secrets of getting your dream...,33.34
7,the coming woman: a novel based on the life of...,17.93
8,the boys in the boat: nine americans and their...,22.6
9,the black maria,52.15
