# 1. APIs Intranet task 0

In [62]:
#!/usr/bin/env python3
"""
This module provides a function to get a list of starships
that can hold a specified number of passengers.
"""

import requests


def availableShips(passengerCount):
    """
    Retrieve a list of starships that can hold
    at least `passengerCount` passengers.

    Args:
        passengerCount (int): The minimum number of passengers
        the starship should be able to hold.

    Returns:
        list: A list of starship names that can hold
        the specified number of passengers.
    """
    url = "https://swapi-api.alx-tools.com/api/starships/"
    ships = []

    while url:
        response = requests.get(url)
        data = response.json()

        for ship in data["results"]:
            passengers = ship["passengers"]
            if passengers not in ["n/a", "unknown", "0", "none"]:
                passengers = passengers.replace(",", "")
                if int(passengers) >= passengerCount:
                    ships.append(ship["name"])

        url = data["next"]

    return ships


if __name__ == "__main__":
    ships = availableShips(4)
    for ship in ships:
        print(ship)

CR90 corvette
Sentinel-class landing craft
Death Star
Millennium Falcon
Executor
Rebel transport
Slave 1
Imperial shuttle
EF76 Nebulon-B escort frigate
Calamari Cruiser
Republic Cruiser
Droid control ship
Scimitar
J-type diplomatic barge
AA-9 Coruscant freighter
Republic Assault ship
Solar Sailer
Trade Federation cruiser
Theta-class T-2c shuttle
Republic attack cruiser


# 2. Scraping the tabular data

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import csv

# URL of the page to scrape
url = 'https://www.scrapethissite.com/pages/forms/'

# Request to fetch the page content
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

# Finding the table element
table = soup.find('table')

# Extract table headers
headers = [header.text.strip() for header in table.find_all('th')]

# Extract table rows
rows = []
for row in table.find_all('tr')[1:]:  # Skip the header row
    cols = [col.text.strip() for col in row.find_all('td')]
    if cols:
        rows.append(cols)

# Create a DataFrame from the extracted data
df = pd.DataFrame(rows, columns=headers)

# Display the table
print(df)


# Saved data to a CSV file
csv_file = 'scraped_table_data.csv'
with open(csv_file, 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(headers)
    writer.writerows(rows)

print(f"Data has been successfully written to {csv_file}")


                Team Name  Year Wins Losses OT Losses  Win % Goals For (GF)  \
0           Boston Bruins  1990   44     24             0.55            299   
1          Buffalo Sabres  1990   31     30            0.388            292   
2          Calgary Flames  1990   46     26            0.575            344   
3      Chicago Blackhawks  1990   49     23            0.613            284   
4       Detroit Red Wings  1990   34     38            0.425            273   
5         Edmonton Oilers  1990   37     37            0.463            272   
6        Hartford Whalers  1990   31     38            0.388            238   
7       Los Angeles Kings  1990   46     24            0.575            340   
8   Minnesota North Stars  1990   27     39            0.338            256   
9      Montreal Canadiens  1990   39     30            0.487            273   
10      New Jersey Devils  1990   32     33              0.4            272   
11     New York Islanders  1990   25     45         

# 3. Scraping 5 products of different categories from Amazon.com

In [60]:
import requests
from bs4 import BeautifulSoup
import os
import random
import time

# List of user agents to choose from
USER_AGENTS = [
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.0.2 Safari/605.1.15',
    'Mozilla/5.0 (Linux; Android 10; Pixel 3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Mobile Safari/537.36',
    'Mozilla/5.0 (Linux; Android 11; SM-G950U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Mobile Safari/537.36',
]

# List of categories and their respective URLs
categories = {
    'laptops': 'https://www.amazon.com/s?k=laptop&crid=69J5LAQPRTNY&sprefix=laptop%2Caps%2C326&ref=nb_sb_ss_ts-doa-p_1_6',
    'books': 'https://www.amazon.com/s?k=books&i=stripbooks-intl-ship&crid=1SH8JGDPMX8JZ&sprefix=book%2Cstripbooks-intl-ship%2C318&ref=nb_sb_noss_1',
    'shoes': 'https://www.amazon.com/s?k=shoes&i=fashion-boys-intl-ship&crid=JR3YXOQQ070X&sprefix=shoe%2Cfashion-boys-intl-ship%2C303&ref=nb_sb_noss_2',
    'movies': 'https://www.amazon.com/s?k=movies&i=movies-tv-intl-ship&crid=2WL62AUHVUHN8&sprefix=mov%2Cmovies-tv-intl-ship%2C302&ref=nb_sb_noss_2',
    'cookers': 'https://www.amazon.com/s?k=cooker&ref=nb_sb_noss'
}

# Function to create a directory to save images
def create_dir(dir_name):
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)

# Function to scrape Amazon products
def scrape_amazon_products(url):
    # Select a random user agent
    headers = {
        'User-Agent': random.choice(USER_AGENTS),
        'Accept-Language': 'en-US,en;q=0.5',
        'Accept-Encoding': 'gzip, deflate, br',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1',
        'Referer': 'https://www.amazon.com/'
    }

    # Sleep before making the request to mimic human behavior
    time.sleep(random.uniform(2, 5))

    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raise an error for bad responses
    except requests.RequestException as e:
        print(f"Failed to retrieve the webpage: {e}")
        return []

    soup = BeautifulSoup(response.content, 'html.parser')

    # Find product listings
    products = soup.find_all('div', {'data-component-type': 's-search-result'})

    # List to hold product data
    product_data = []

    # Loop through each product and gather data
    for product in products:
        title_tag = product.h2
        if title_tag:
            title = title_tag.text.strip()
            image_tag = product.find('img')
            if image_tag and 'src' in image_tag.attrs:
                image = image_tag['src']
                product_data.append((title, image))
                if len(product_data) >= 5:  # Limit to first 5 products
                    break

    return product_data

# Function to save images in respective category folders
def save_images(category, product_data):
    create_dir(f'amazon_images/{category}')  # Create category folder

    for title, image_url in product_data:
        try:
            image_response = requests.get(image_url)
            image_response.raise_for_status()  # Raise an error for bad responses
            image_name = title.replace(" ", "_").replace("/", "_") + '.jpg'

            with open(os.path.join(f'amazon_images/{category}', image_name), 'wb') as img_file:
                img_file.write(image_response.content)

            print(f'Saved image: {image_name} in category: {category}')
        except requests.RequestException as e:
            print(f"Failed to save image for '{title}': {e}")

# Main execution
if __name__ == "__main__":
    for category, url in categories.items():
        print(f'Scraping category: {category}')
        products = scrape_amazon_products(url)
        if products:
            save_images(category, products)
        else:
            print(f"No products found in category: {category}")


Scraping category: laptops
Saved image: Acer_Aspire_3_A315-24P-R7VH_Slim_Laptop_|_15.6"_Full_HD_IPS_Display_|_AMD_Ryzen_3_7320U_Quad-Core_Processor_|_AMD_Radeon_Graphics_|_8GB_LPDDR5_|_128GB_NVMe_SSD_|_Wi-Fi_6_|_Windows_11_Home_in_S_Mode.jpg in category: laptops
Saved image: HP_Newest_255_G10_Laptop_for_Home_or_Work,_16GB_RAM,_1TB_SSD,_15.6"_Full_HD,_Ryzen_3_7330U_(Beat_Intel_i5-1135G7),_Ethernet_Port,_HDMI,_USB-C,_Windows_11_Pro,_Business_and_Fun_Ready_(2024).jpg in category: laptops
Saved image: HP_Newest_14"_Ultral_Light_Laptop_for_Students_and_Business,_Intel_Quad-Core_N4120,_8GB_RAM,_192GB_Storage(64GB_eMMC+128GB_Micro_SD),_1_Year_Office_365,_Webcam,_HDMI,_WiFi,_USB-A&C,_Win_11_S.jpg in category: laptops
Saved image: Acer_Aspire_Go_15_Slim_Laptop_|_15.6"_Full_HD_IPS_1080P_Display_|_Intel_Core_i3-N305|_Intel_UHD_Graphics_|_8GB_LPDDR5_|_128GB_HD_|_Wi-Fi_6_|_AI_PC_|_Windows_11_Home_in_S_Mode_|_AG15-31P-3947.jpg in category: laptops
Saved image: HP_14_Laptop,_Intel_Celeron_N4020,_4_GB

In [61]:
!zip -r amazon_images.zip amazon_images
from google.colab import files
files.download('amazon_images.zip')


  adding: amazon_images/ (stored 0%)
  adding: amazon_images/books/ (stored 0%)
  adding: amazon_images/books/From_Here_to_the_Great_Unknown:_A_Memoir.jpg (stored 0%)
  adding: amazon_images/books/Melania.jpg (deflated 15%)
  adding: amazon_images/books/Good_Energy:_The_Surprising_Connection_Between_Metabolism_and_Limitless_Health.jpg (stored 0%)
  adding: amazon_images/books/Onyx_Storm_(Deluxe_Limited_Edition)_(The_Empyrean,_3).jpg (deflated 0%)
  adding: amazon_images/books/War.jpg (deflated 1%)
  adding: amazon_images/cookers/ (stored 0%)
  adding: amazon_images/cookers/Hawkins_CB30_Hard_Anodised_Pressure_Cooker,_3-Liter,_Contura_Black.jpg (deflated 0%)
  adding: amazon_images/cookers/Instant_Pot_Duo_7-in-1_Mini_Electric_Pressure_Cooker,_Slow_Rice_Cooker,_Steamer,_Sauté,_Yogurt_Maker,_Warmer_&_Sterilizer,_Includes_Free_App_with_over_1900_Recipes,_Stainless_Steel,_3_Quart.jpg (stored 0%)
  adding: amazon_images/cookers/HAWKINS_Classic_CL50_5-Liter_New_Improved_Aluminum_Pressure_Cooke

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>