# **INTEGRATED LAB ASSIGNMENT**

***
## **BOOK SCRAPPER**

This project will: <br>
>-Scrap data from a book website (OpenLibrary in this case) <br>
>-Then use the OpenLibrary API in order to ggenerate more data about the titles scrapped <br>
>-All the data collected will be encrypted and stored as a csv file

In [10]:
#Libraries that will be used in this assignment 
from bs4 import BeautifulSoup
import requests
import requests
import csv
from urllib.parse import quote

In [11]:
#This cell takes the url link for the website I'm scraping, a page in OPen Library for this example
#The request library is used to grab this as a link then we use BeautifulSoup to scrape the data
url = 'https://openlibrary.org/search?subject=Fantasy'
result = requests.get(url)
doc = BeautifulSoup(result.text, "html.parser")

In [12]:
#This cell will select only the titles of the books present
def formatNicely(doc):
    books = []
    # Loop through the found 'a' elements with the class 'results'
    for ctrl in range(len(doc)):
        stringBooks = doc[ctrl].string  # Extract the text content (book title)
        if stringBooks:  # Make sure the string is not None
            books.append(stringBooks.strip())  # Add the book title to the list
    return books

books = doc.find_all('a', class_='results')
book_titles = formatNicely(books)

print(book_titles)

['Phantastes: A Faerie Romance for Men and Women', 'The  Road to Oz', 'The Silmarillion', 'Lilith: A Romance', 'Ozma of Oz: a record of her adventures with Dorothy Gale of Kansas, the Yellow Hen, the Scarecrow, the Tin Woodman, Tiktok, the cowardly Lion and the Hungry Tiger, besides other good people too numerous to mention faithfully recorded herein', 'The Last Battle', 'The story of Doctor Dolittle: being the history of his peculiar life at home and astonishing adventuresin foreign parts, never before printed', "Grimm's fairy tales", 'The Silver Chair', 'The King in Yellow', 'The Hobbit', 'Momo', 'The Lion, the Witch and the Wardrobe', 'Prince Caspian', 'The Fellowship of Ring', 'The Land of Oz: A sequel to The Wizard of Oz', 'The Enchanted Castle', 'The two towers', 'The Horse and his Boy', "The Magician's Nephew"]


In [None]:
# Path to the CSV file
csv_file_path = "books.csv"

# Open the CSV file in write mode to clear its contents and write the header
with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
    csv_writer = csv.writer(csv_file)
    # Write the header row
    csv_writer.writerow(["Title", "Author(s)", "First Published", "Average Rating", "Number of Pages"])

    # Function to search for books by title and write results to CSV
    def search_books_by_title(title, csv_writer):
        # Construct the URL for the Open Library search API, encoding the title for URL safety
        url = f"https://openlibrary.org/search.json?q={quote(title)}"
        
        # Send a GET request to the Open Library API
        response = requests.get(url)
        
        # If the request was successful
        if response.status_code == 200:
            data = response.json()  # Get the JSON data from the response
            
            # Check if there are any books in the response
            if 'docs' in data and len(data['docs']) > 0:
                # Take the first result
                book = data['docs'][0]
                book_title = book.get('title', 'N/A')
                author_name = ', '.join(book.get('author_name', ['Unknown Author']))
                first_publish_year = book.get('first_publish_year', 'N/A')
                ratings = book.get('ratings_average', 'N/A')
                if isinstance(ratings, (float, int)):
                    ratings = round(ratings, 1)
                number_of_pages = book.get('number_of_pages_median', 'N/A')
                
                # Print the result
                print(f"Title: {book_title}")
                print(f"Author(s): {author_name}")
                print(f"First published: {first_publish_year}")
                print(f"Average Rating: {ratings}")
                print(f"Number of Pages: {number_of_pages}")
                print("-" * 40)
                
                # Write the result to the CSV file
                csv_writer.writerow([book_title, author_name, first_publish_year, ratings, number_of_pages])
            else:
                print(f"No results found for '{title}'")
        else:
            print(f"Error fetching data for '{title}': {response.status_code}")

    # Loop through the list of book titles and search for each one
    for title in book_titles:
        search_books_by_title(title, csv_writer)
        print("=" * 60)  # Print a separator between book searches


Title: Phantastes
Author(s): George MacDonald
First published: 1850
Average Rating: 3.9
Number of Pages: 197
----------------------------------------
Title: The Road to Oz
Author(s): L. Frank Baum, Jenny S.anchez, John R. Neill
First published: 1909
Average Rating: 3.3
Number of Pages: 166
----------------------------------------
Title: The Silmarillion
Author(s): J.R.R. Tolkien
First published: 1977
Average Rating: 4.0
Number of Pages: 432
----------------------------------------
Title: Lilith
Author(s): George MacDonald
First published: 1895
Average Rating: 3.0
Number of Pages: 288
----------------------------------------
Title: Ozma of Oz
Author(s): L. Frank Baum, Erin Yuen, Taylor Anderson, John R. Neill, Alexa RACHAEL
First published: 1907
Average Rating: 4.0
Number of Pages: 163
----------------------------------------
Title: The Last Battle
Author(s): C.S. Lewis
First published: 1956
Average Rating: 3.9
Number of Pages: 192
----------------------------------------
Title: The Sto

In [5]:
pip install pycryptodome

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.1.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
#I used XOR encryption for this project
def xor_encrypt_decrypt(file_path, key):
    key_bytes = key.encode()  # Convert key to bytes

    # Read the file's content
    with open(file_path, "rb") as file:
        file_data = file.read()

    # XOR each byte of the file with the key
    encrypted_data = bytearray()
    for i, byte in enumerate(file_data):
        encrypted_data.append(byte ^ key_bytes[i % len(key_bytes)])

    # Write the encrypted (or decrypted) data back to the file
    with open(file_path, "wb") as file:
        file.write(encrypted_data)
    print(f"File '{file_path}' has been processed (encrypted/decrypted).")

# Example usage
csv_file_path = "books.csv"
encryption_key = "simplekey123"  # Use any string as the key (keep it safe!)

# Encrypt the file
xor_encrypt_decrypt(csv_file_path, encryption_key)