In [47]:
import requests
from bs4 import BeautifulSoup
from fpdf import FPDF

class PDF(FPDF):
    def header(self):
        self.set_font('Arial', 'B', 12)
        self.cell(0, 10, 'Best Places to Visit in Egypt', 0, 1, 'C')

    def chapter_title(self, name):
        self.set_font('Arial', 'B', 12)
        self.cell(0, 10, name.encode('latin-1', 'replace').decode('latin-1'), 0, 1, 'L')

    def chapter_body(self, body):
        self.set_font('Arial', '', 12)
        self.multi_cell(0, 10, body.encode('latin-1', 'replace').decode('latin-1'))
        self.ln()

    def add_place(self, name, body):
        self.add_page()
        self.chapter_title(name)
        self.chapter_body(body)


In [45]:
# URL of the website to scrape
url = 'https://localguidetoegypt.com/post/where-to-go-in-egypt-10-best-egyptian-cities-destinations/'

# Send a request to the website
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Parse the HTML content
    soup = BeautifulSoup(response.text, 'html.parser')

    # Lists to store the data
    names = []
    contents = []

    # Find all the place names and their descriptions
    article = soup.find('article')  # Assuming all content is within an <article> tag
    if article:
        headings = article.find_all(['h2', 'h3'])
        for heading in headings:
            name = heading.get_text(strip=True)
            content = ''
            for sibling in heading.find_next_siblings():
                if sibling.name in ['h2', 'h3']:
                    break
                if sibling.name in ['p', 'span', 'div']:
                    content += sibling.get_text(strip=True) + ' '
            names.append(name)
            contents.append(content.strip())
else:
    print(f"Failed to retrieve the website. Status code: {response.status_code}")


In [46]:
import os

# Create a PDF object
pdf = PDF()

# Add each place to the PDF
for name, content in zip(names, contents):
    pdf.add_place(name, content)

# Ensure the directory exists
output_directory = 'output'
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

# Save the PDF to a file in the specified directory
output_path = os.path.join(output_directory, 'places_to_visit_in_egypt.pdf')
pdf.output(output_path)

print(f"Data has been saved to '{output_path}'")


Data has been saved to 'output\places_to_visit_in_egypt.pdf'
