# Scraping from Flash Fiction Online

In [9]:
import os
import requests
from bs4 import BeautifulSoup

# List of categories
category_list = [
    'classic-flash', 
    'fantasy', 
    'horror', 
    'humor', 
    'literary', 
    'mainstream', 
    'science-fiction'
]

# Set the base URL
base_url = 'https://www.flashfictiononline.com/article-categories/'


def get_story_links(page_url):
    """
    Get the links from the category page.
    """
    
    # Get the response
    try:
        response = requests.get(page_url)
        response.raise_for_status()  
    except requests.exceptions.RequestException as e:
        print(f"Error accessing {page_url}: {e}")
        return []
    
    # Find the article division
    soup = BeautifulSoup(response.content, 'html.parser')
    articles = soup.find_all('article')

    # Get the links from the division
    story_links = []
    for article in articles:
        figure = article.find('figure', class_='post-image')
        if figure:
            a_tag = figure.find('a')
            if a_tag and 'href' in a_tag.attrs:
                story_links.append(a_tag['href'])

    return story_links


def get_html(url):
    """
    Get the html texts.
    """
    response = requests.get(url)
    return response.text if response.status_code == 200 else ''


def get_story_details(html_content):
    """
    Get the story text in the html contents.
    """
    soup = BeautifulSoup(html_content, 'html.parser')
    
    title_tag = soup.find('span', class_='main-head')
    title = title_tag.get_text(strip=True) if title_tag else 'Title not found'

    story_div = soup.find('div', class_='module module-post-content tb_iy83113')
    if story_div:
        paragraphs = story_div.find_all('p')
        story_content = '\n\n'.join(paragraph.get_text(strip=True) for paragraph in paragraphs)
        story_content = story_content.split("Share this")[0].strip()
    else:
        story_content = 'Story content not found'

    return title, story_content


def main():
    """
    Get stories from the base urls and download txts.
    """
    for category in category_list:
        
        # Create directory structure if it doesn't exist
        dir_path = f'./flash-fiction-online/{category}'
        if not os.path.exists(dir_path):
            os.makedirs(dir_path)

        # Iterate over pages
        for page in range(1, 10):
            page_url = f'{base_url}{category}/page/{page}/'
            story_links = get_story_links(page_url)

            for link in story_links:
                html_content = get_html(link)
                title, story_content = get_story_details(html_content)

                formatted_title = title.replace(' ', '-').lower()
                file_path = os.path.join(dir_path, f'{formatted_title}.txt')

                with open(file_path, 'w', encoding='utf-8') as file:
                    file.write(f"Title: {title}\n\n{story_content}")

                print(f"Saved: {file_path}")

                
if __name__ == '__main__':
    main()

Saved: ./flash-fiction/fantasy/ursula-the-monster.txt
Saved: ./flash-fiction/fantasy/saint-woad-and-sister-welwitshcia.txt
Saved: ./flash-fiction/fantasy/the-constellations-of-daughter-death.txt
Saved: ./flash-fiction/fantasy/to-slay-a-goblin.txt
Saved: ./flash-fiction/fantasy/little-fish,-big-fish.txt
Saved: ./flash-fiction/fantasy/nancy-shreds-the-clouds.txt
Saved: ./flash-fiction/fantasy/when-the-forest-comes-to-you.txt
Saved: ./flash-fiction/fantasy/lapis-lazuli.txt
Saved: ./flash-fiction/fantasy/to-rise,-to-set.txt
Saved: ./flash-fiction/fantasy/a-tiger-in-eden.txt
Saved: ./flash-fiction/fantasy/the-fox-spirit’s-retelling.txt
Saved: ./flash-fiction/fantasy/we-are-not-phoenixes.txt
Saved: ./flash-fiction/fantasy/gently-creaking-boards.txt
Saved: ./flash-fiction/fantasy/fae-magic-on-a-friday-night.txt
Saved: ./flash-fiction/fantasy/upon-what-soil-they-fed.txt
Saved: ./flash-fiction/fantasy/wonderful-wounds-await-you.txt
Saved: ./flash-fiction/fantasy/power-is-love-in-the-devil’s-eye

Saved: ./flash-fiction/humor/the-black-clover-equation.txt
Saved: ./flash-fiction/humor/foreign-tongues.txt
Saved: ./flash-fiction/humor/space-travel-loses-its-allure-when-you’ve-lost-your-moon-cup.txt
Saved: ./flash-fiction/humor/a-note-to-parents-regarding-the-beginning-and-end-of-time-diorama-presentations-for-ms.-miller’s-third-grade-class.txt
Saved: ./flash-fiction/humor/i-am-graalnak-of-the-vroon-empire,-destroyer-of-galaxies,-supreme-overlord-of-the-planet-earth.-ask-me-anything..txt
Saved: ./flash-fiction/humor/my-superpower.txt
Saved: ./flash-fiction/humor/irma-splinkbottom’s-recipe-for-cold-fusion.txt
Saved: ./flash-fiction/humor/last-bites.txt
Saved: ./flash-fiction/humor/pêlos.txt
Saved: ./flash-fiction/humor/zigzag-strikes-again.txt
Saved: ./flash-fiction/humor/the-numbers-game.txt
Saved: ./flash-fiction/humor/the-dragonslayer.txt
Saved: ./flash-fiction/humor/caps-lock-and-the-ellipsis-of-doom.txt
Error accessing https://www.flashfictiononline.com/article-categories/humor/

Error accessing https://www.flashfictiononline.com/article-categories/mainstream/page/9/: 404 Client Error: Not Found for url: https://www.flashfictiononline.com/article-categories/mainstream/page/9/
Saved: ./flash-fiction/science-fiction/in-search-of-body.txt
Saved: ./flash-fiction/science-fiction/the-first-day-of-the-week.txt
Saved: ./flash-fiction/science-fiction/grandma’s-sex-robot.txt
Saved: ./flash-fiction/science-fiction/quantum-love.txt
Saved: ./flash-fiction/science-fiction/grin-minus-cat.txt
Saved: ./flash-fiction/science-fiction/cruise-control.txt
Saved: ./flash-fiction/science-fiction/it-begins-with-raven.txt
Saved: ./flash-fiction/science-fiction/lost-and-found-at-the-center-of-the-universe.txt
Saved: ./flash-fiction/science-fiction/how-they-name-the-ships.txt
Saved: ./flash-fiction/science-fiction/eight-reasons-you-are-alone.txt
Saved: ./flash-fiction/science-fiction/on-the-anniversary-of-your-passing.txt
Saved: ./flash-fiction/science-fiction/the-perfect-brick-of-feta.tx

# Scraping from Flash Fiction Library

In [12]:
import os
import requests
from bs4 import BeautifulSoup


# List of categories
category_list = [
    'fantasy', 
    'uncategorized', 
    'horror', 
    'romance', 
    'scifi', 
    'science-fiction'
]


# Base URL
base_url = 'https://flashfictionlibrary.com/category/'


def get_story_links(category_url):
    """
    Get the story link from the home url.
    """
    
    # Get the response
    try:
        response = requests.get(category_url)
        response.raise_for_status()  # Raise an error for bad status codes
    except requests.exceptions.RequestException as e:
        print(f"Error accessing {category_url}: {e}")
        return []

    # Parse the html file
    soup = BeautifulSoup(response.content, 'html.parser')
    articles = soup.find_all('article')

    # Get the links from the header of the article section
    story_links = []
    for article in articles:
        header = article.find('header', class_='entry-header')
        if header:
            a_tag = header.find('a')
            if a_tag and 'href' in a_tag.attrs:
                story_links.append(a_tag['href'])

    return story_links


def get_story_content(url):
    """
    Get story content from the url.
    """
    
    # Get the response
    response = requests.get(url)
    if response.status_code != 200:
        return 'Story content not found', ''
    
    # Parse the html file
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Get the title
    title_tag = soup.find('title')
    title = title_tag.get_text(strip=True).split(' – ')[0] if title_tag else 'Title not found'
    
    # Find the story content
    story_div = soup.find('article')
    if story_div:
        paragraphs = story_div.find_all('p')
        story_content = '\n\n'.join(paragraph.get_text(strip=True) for paragraph in paragraphs)
    else:
        story_content = 'Story content not found'

    return title, story_content


def main():
    """
    Get stories from the base urls and download txts.
    """
    for category in category_list:
        dir_path = f'./flash-fiction-library/{category}'
        if not os.path.exists(dir_path):
            os.makedirs(dir_path)

        for page in range(1, 11):  # Iterate over pages 1 to 10
            page_url = f'{base_url}{category}/page/{page}/'
            story_links = get_story_links(page_url)

            for link in story_links:
                title, story_content = get_story_content(link)
                formatted_title = title.replace(' ', '-').lower()
                file_path = os.path.join(dir_path, f'{formatted_title}.txt')

                with open(file_path, 'w', encoding='utf-8') as file:
                    file.write(f"Title: {title}\n\n{story_content}")

                print(f"Saved: {file_path}")

                
if __name__ == '__main__':
    main()


Saved: ./flash-fiction-library/uncategorized/the-last-corporate.txt
Saved: ./flash-fiction-library/uncategorized/the-many-faces-of-sophia-morrow.txt
Saved: ./flash-fiction-library/uncategorized/the-ethereal-form-of-fairies.txt
Saved: ./flash-fiction-library/uncategorized/the-aeonian-ball.txt
Saved: ./flash-fiction-library/uncategorized/autumn-leaves.txt
Saved: ./flash-fiction-library/uncategorized/the-big-black-bird.txt
Saved: ./flash-fiction-library/uncategorized/the-weaving-woman.txt
Saved: ./flash-fiction-library/uncategorized/the-necromancer.txt
Saved: ./flash-fiction-library/uncategorized/picture-in-the-locket.txt
Saved: ./flash-fiction-library/uncategorized/those-that-live-longest.txt
Saved: ./flash-fiction-library/uncategorized/the-cost-of-divinity.txt
Saved: ./flash-fiction-library/uncategorized/astronought.txt
Saved: ./flash-fiction-library/uncategorized/beginning-&-end.txt
Saved: ./flash-fiction-library/uncategorized/when-you-look-away.txt
Saved: ./flash-fiction-library/uncat

Saved: ./flash-fiction-library/horror/mixed-signals.txt
Saved: ./flash-fiction-library/horror/being-in-the-mist.txt
Saved: ./flash-fiction-library/horror/pillars-in-the-deep.txt
Saved: ./flash-fiction-library/horror/the-mouth-in-the-wall.txt
Saved: ./flash-fiction-library/horror/court-of-the-sunflower-king.txt
Saved: ./flash-fiction-library/horror/when-death-wore-lipstick.txt
Saved: ./flash-fiction-library/horror/elysium-field.txt
Saved: ./flash-fiction-library/horror/the-beast.txt
Saved: ./flash-fiction-library/horror/the-mad-moors-of-calum.txt
Saved: ./flash-fiction-library/horror/fragile-creatures.txt
Saved: ./flash-fiction-library/horror/buying-a-soul.txt
Saved: ./flash-fiction-library/horror/me,-myself-and-the-fae.txt
Saved: ./flash-fiction-library/horror/the-old-man-and-the-stars.txt
Saved: ./flash-fiction-library/horror/miggi-island.txt
Saved: ./flash-fiction-library/horror/warriors-of-yesteryear.txt
Saved: ./flash-fiction-library/horror/the-ladies-of-llewelyn-library.txt
Saved:

Saved: ./flash-fiction-library/scifi/beast-of-burden.txt
Saved: ./flash-fiction-library/scifi/the-museum-of-selfies.txt
Saved: ./flash-fiction-library/scifi/manufacturing-stars.txt
Saved: ./flash-fiction-library/scifi/unintended-consequences.txt
Saved: ./flash-fiction-library/scifi/cold-lights.txt
Saved: ./flash-fiction-library/scifi/the-space-between-ages.txt
Saved: ./flash-fiction-library/scifi/what-you-see.txt
Saved: ./flash-fiction-library/scifi/children-of-the-cosmos.txt
Saved: ./flash-fiction-library/scifi/watcher-in-the-wastes.txt
Saved: ./flash-fiction-library/scifi/suicide-note.txt
Saved: ./flash-fiction-library/scifi/jefferson.txt
Saved: ./flash-fiction-library/scifi/the-thing-that-matters.txt
Saved: ./flash-fiction-library/scifi/tim’s-demons-&-other-friends.txt
Saved: ./flash-fiction-library/scifi/the-biologist’s-daughter.txt
Saved: ./flash-fiction-library/scifi/pillars-in-the-deep.txt
Saved: ./flash-fiction-library/scifi/elizabeth’s-sentience.txt
Saved: ./flash-fiction-libr

In [16]:
def count_txt_files(directory):
    txt_file_count = 0

    # Walk through all directories and files within the specified directory
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.txt'):
                txt_file_count += 1

    return txt_file_count


print(f"Total number of .txt files in '{dir_1}': {count_txt_files(dir_1)}")
print(f"Total number of .txt files in '{dir_2}': {count_txt_files(dir_2)}")

Total number of .txt files in './flash-fiction-library': 290
Total number of .txt files in './flash-fiction-online': 350
