In [4]:
#Sarcina 1
import requests
from bs4 import BeautifulSoup

def wiki_title(url):
    res = requests.get(url)
    soup = BeautifulSoup(res.text, "html.parser")

    title = soup.find('title').text
    print(f"Title:\n{title}\n")

wiki_title("https://en.wikipedia.org/wiki/Linus_Torvalds")


Title:
Linus Torvalds - Wikipedia



In [5]:
#Sarcina 2
import requests
from bs4 import BeautifulSoup
def wiki_sections(url):
    res = requests.get(url)
    
    if res.status_code != 200:
        print(f"Error: Unable to retrieve content from {url}")
        return
    
    soup = BeautifulSoup(res.text, 'html.parser')
    
    header_tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
    sections = soup.find_all(header_tags)
    
    print("Sections:")
    for section in sections:
        print(section.text.strip())

wiki_sections("https://en.wikipedia.org/wiki/Linus_Torvalds")

Sections:
Contents
Linus Torvalds
Life and career[edit]
Early years[edit]
Linux[edit]
The Linus/Linux connection[edit]
Authority and trademark[edit]
Other software[edit]
Personal life[edit]
Awards and achievements[edit]
Media recognition[edit]
Bibliography[edit]
See also[edit]
References[edit]
Further reading[edit]
External links[edit]


In [6]:
#Sarcina 3
import os
import requests
from urllib.parse import urljoin
from bs4 import BeautifulSoup

def wiki_photo(url):
    res = requests.get(url)
    
    if res.status_code != 200:
        print(f"Error: Unable to retrieve content from {url}")
        return

    soup = BeautifulSoup(res.text, 'html.parser')
    images = soup.find_all('img')
    save_dir = 'img'

    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    counter = 0
    for image in images:
        name = f"image_{counter}:" 
        link = image['src']
        absolute_url = urljoin(url, link)
        print(f"{name} {absolute_url}")
        file_name = f'image_{counter}.jpg'
        file_path = os.path.join(save_dir, file_name)
        with open(file_path, 'wb') as f:
            im = requests.get(absolute_url)
            f.write(im.content)
        counter += 1

wiki_photo("https://en.wikipedia.org/wiki/Linus_Torvalds")


image_0: https://en.wikipedia.org/static/images/icons/wikipedia.png
image_1: https://en.wikipedia.org/static/images/mobile/copyright/wikipedia-wordmark-en.svg
image_2: https://en.wikipedia.org/static/images/mobile/copyright/wikipedia-tagline-en.svg
image_3: https://upload.wikimedia.org/wikipedia/commons/thumb/e/e8/Lc3_2018_%28263682303%29_%28cropped%29.jpeg/220px-Lc3_2018_%28263682303%29_%28cropped%29.jpeg
image_4: https://upload.wikimedia.org/wikipedia/commons/thumb/6/69/Linus_Torvalds.jpeg/175px-Linus_Torvalds.jpeg
image_5: https://upload.wikimedia.org/wikipedia/commons/thumb/e/e6/Linus-Torvalds_IEEE_Ibuka_Award_2018.jpg/220px-Linus-Torvalds_IEEE_Ibuka_Award_2018.jpg
image_6: https://upload.wikimedia.org/wikipedia/en/thumb/4/4a/Commons-logo.svg/20px-Commons-logo.svg.png
image_7: https://upload.wikimedia.org/wikipedia/commons/thumb/f/fa/Wikiquote-logo.svg/23px-Wikiquote-logo.svg.png
image_8: https://upload.wikimedia.org/wikipedia/commons/thumb/f/ff/Wikidata-logo.svg/27px-Wikidata-logo

In [7]:
#Sarcina 4
import requests
from bs4 import BeautifulSoup

PRODUCT_POD_CLASS = 'product_pod'
STAR_RATING_CLASS = 'star-rating Two'

def check_rating(book_item):
    rating_element = book_item.find('p', class_=STAR_RATING_CLASS)
    return rating_element is not None

def find_rating(default_url):
    books = []
    page_nr = 1

    while True:
        url = f"{default_url}/catalogue/page-{page_nr}.html"
        res = requests.get(url)

        if res.status_code == 200:
            soup = BeautifulSoup(res.text, 'html.parser')

            book_items = soup.find_all('article', class_=PRODUCT_POD_CLASS)

            for book_item in book_items:
                title_element = book_item.find('h3')
                if title_element:
                    title = title_element.a['title']

                    if check_rating(book_item):
                        books.append(title)

            page_nr += 1
        else:
            print(f"Can't go to page {page_nr}\n")
            break

    return books

url = "http://books.toscrape.com"
books = find_rating(url)

print(f"{len(books)} books have been found with the specified rating:")
for book in books:
    print(book)


Can't go to page 51

196 books have been found with the specified rating:
Starving Hearts (Triangular Trade Trilogy, #1)
Libertarianism for Beginners
It's Only the Himalayas
How Music Works
Maude (1883-1993):She Grew Up with the country
You can't bury them all: Poems
Reasons to Stay Alive
Without Borders (Wanderlove #1)
Soul Reader
Security
Saga, Volume 5 (Saga (Collected Editions) #5)
Reskilling America: Learning to Labor in the Twenty-First Century
Political Suicide: Missteps, Peccadilloes, Bad Calls, Backroom Hijinx, Sordid Pasts, Rotten Breaks, and Just Plain Dumb Mistakes in the Annals of American Politics
Obsidian (Lux #1)
My Paris Kitchen: Recipes and Stories
Masks and Shadows
Lumberjanes, Vol. 2: Friendship to the Max (Lumberjanes #5-8)
Lumberjanes Vol. 3: A Terrible Plan (Lumberjanes #9-12)
Judo: Seven Steps to Black Belt (an Introductory Guide for Beginners)
I Hate Fairyland, Vol. 1: Madly Ever After (I Hate Fairyland (Compilations) #1-5)
Giant Days, Vol. 2 (Giant Days #5-8)


In [10]:
#Sarcina 5
import requests
from bs4 import BeautifulSoup
import csv


def fahrenheit_to_celsius(fahrenheit):
    if '--' in fahrenheit:
        return None 
    if '°' in fahrenheit:
        fahrenheit = fahrenheit.replace('°', '')
    return round((float(fahrenheit) - 32) * 5 / 9)


def convert_and_write_csv(csv_writer, date, high_temp_str, low_temp_str):
    high_temp_c = fahrenheit_to_celsius(high_temp_str)
    low_temp_c = fahrenheit_to_celsius(low_temp_str)

    csv_writer.writerow({'D': date, 'H Temp': high_temp_c, 'L Temp': low_temp_c})

    print(f"Date: {date}")
    print(f"High Temperature: {high_temp_c}°C")
    print(f"Low Temperature: {low_temp_c}°C")
    print("\n")


def get_weather(url, csv_name):
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')

        buttons = soup.find_all('button', class_='Button--default--2gfm1')

        with open(f'{csv_name}.csv', 'w', newline='') as csvfile:
            fieldnames = ['D', 'H Temp', 'L Temp']
            csv_writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

            csv_writer.writeheader()

            for button in buttons:
                date_element = button.find('span', class_='CalendarDateCell--date--JO3Db')
                if date_element is None:
                    continue

                date = date_element.text.strip()

                high_temp_element = button.find('div', class_='CalendarDateCell--tempHigh--3k9Yr')
                low_temp_element = button.find('div', class_='CalendarDateCell--tempLow--2WL7c')

                if high_temp_element is None or low_temp_element is None:
                    continue

                high_temp_str = high_temp_element.text.strip()
                low_temp_str = low_temp_element.text.strip()

                convert_and_write_csv(csv_writer, date, high_temp_str, low_temp_str)

        print(f"Weather data saved to {csv_name}.csv")

    else:
        print(f"Failed to retrieve the page. Status code: {response.status_code}")


url_moscov = "https://weather.com/weather/monthly/l/aeea22202ca7af67cd001c86700f5486514081107a161ba8f78177867024ae43"
get_weather(url_moscov, 'moscov_weather')

url_paris = "https://weather.com/weather/monthly/l/d2a540efb4e9604b3c1d01b7851a1d9d2ab4c7b3ba428e5799936ac54404c035"
get_weather(url_paris, 'paris_weather')

url_amsterdam = "https://weather.com/weather/monthly/l/968d2f1a5509a2f71fca25929b7d83139ac5134f61611a9c6637c90354cd6da8"
get_weather(url_amsterdam, 'amsterdam_weather')


Date: 29
High Temperature: 1°C
Low Temperature: -1°C


Date: 30
High Temperature: 4°C
Low Temperature: 3°C


Date: 31
High Temperature: 10°C
Low Temperature: 7°C


Date: 1
High Temperature: 17°C
Low Temperature: 10°C


Date: 2
High Temperature: 11°C
Low Temperature: 2°C


Date: 3
High Temperature: 3°C
Low Temperature: 0°C


Date: 4
High Temperature: 5°C
Low Temperature: 3°C


Date: 5
High Temperature: 13°C
Low Temperature: 8°C


Date: 6
High Temperature: 11°C
Low Temperature: 8°C


Date: 7
High Temperature: 10°C
Low Temperature: 7°C


Date: 8
High Temperature: 9°C
Low Temperature: 4°C


Date: 9
High Temperature: 8°C
Low Temperature: 4°C


Date: 10
High Temperature: 7°C
Low Temperature: 3°C


Date: 11
High Temperature: 8°C
Low Temperature: 3°C


Date: 12
High Temperature: 6°C
Low Temperature: 3°C


Date: 13
High Temperature: 4°C
Low Temperature: 2°C


Date: 14
High Temperature: 4°C
Low Temperature: -1°C


Date: 15
High Temperature: 3°C
Low Temperature: 1°C


Date: 16
High Temperature: 2