In [3]:
import requests
from bs4 import BeautifulSoup


def post_process_text(text):
    # Join lines with lowercase letters to the previous line
    lines = text.split('\n')
    processed_text = [lines[0]]  # Keep the first line as is
    for line in lines[1:]:
        # Check if the line starts with a lowercase letter and there is no period before the line break
        if line and line[0].islower() and not processed_text[-1].endswith('.'):
            # Append to the previous line
            processed_text[-1] += ' ' + line
        else:
            # Start a new line
            processed_text.append(line)

    return '\n'.join(processed_text)


def crawl_text_and_save(url, output_file='crawled_text.txt'):
    try:
        # Send a GET request to the URL
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for bad responses

        # Parse the HTML content
        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract text content (modify this based on the structure of the website)
        text_content = soup.get_text(separator='\n', strip=True)

        # Post-process the text
        processed_text = post_process_text(text_content)

        # Save the crawled and post-processed text to a file
        with open(output_file, 'w', encoding='utf-8') as file:
            file.write(processed_text)

        print(f"Text has been successfully crawled and saved to {output_file}")
        return processed_text

    except requests.exceptions.RequestException as e:
        print(f"Error: {e}")
        return None


# Example usage:
url_to_crawl = 'https://jalammar.github.io/illustrated-stable-diffusion/'
result = crawl_text_and_save(url_to_crawl)

if result:
    print('Finished.')
else:
    print("Failed to crawl text.")

Text has been successfully crawled and saved to crawled_text.txt
The Illustrated Stable Diffusion – Jay Alammar – Visualizing machine learning one concept at a time.
Jay Alammar
Visualizing machine learning one concept at a time.
@JayAlammar on Twitter.
YouTube Channel
Blog
About
The Illustrated Stable Diffusion
Translations:
Chinese
,
Vietnamese
.
(
V2 Nov 2022
: Updated images for more precise description of forward diffusion. A few more images in this version)
AI image generation is the most recent AI capability blowing people’s minds (mine included). The ability to create striking visuals from text descriptions has a magical quality to it and points clearly to a shift in how humans create art. The release of
Stable Diffusion is a clear milestone in this development because it made a high-performance model available to the masses (performance in terms of image quality, as well as speed and relatively low resource/memory requirements).
After experimenting with AI image generation, yo