## Step: 1: Import Libraries

In [17]:
# Import libraries

import pandas as pd
import time
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service  # Correct import
from selenium.webdriver.chrome.options import Options
import matplotlib.pyplot as plt 
import os
import logging

## Step 2: Configuration

In [19]:
# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger()

## Step 3: Scrape Wikipedia Content

In [21]:
def scrape_wikipedia_content():
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
    }
    
    url = "https://en.wikipedia.org/wiki/Key_events_of_the_20th_century"
    response = requests.get(url, headers=headers, timeout=15)
    response.raise_for_status()
    
    soup = BeautifulSoup(response.content, 'html.parser')
    main_content = soup.find('div', {'class': 'mw-parser-output'})
    
    return main_content.get_text() if main_content else None

content = scrape_wikipedia_content()

## Step 4: Organize Content with Headings

In [23]:
def organize_content_with_headings(text_content):
    lines = text_content.split('\n')
    organized_lines = []
    
    organized_lines.append("KEY EVENTS OF THE 20TH CENTURY")
    organized_lines.append("=" * 50)
    organized_lines.append("Historical Overview and Major Developments")
    organized_lines.append("")
    
    current_section = "INTRODUCTION"
    
    for line in lines:
        line = line.strip()
        if not line:
            continue
            
        # Detect section headers (lines in uppercase or with specific patterns)
        if (line.isupper() and len(line) > 10 and len(line) < 100) or \
           ('20th century' in line.lower() and len(line) < 80):
            organized_lines.append("")
            organized_lines.append(line.upper())
            organized_lines.append("-" * len(line))
            current_section = line.upper()
        elif len(line) > 30:  # Substantial content
            organized_lines.append(line)
            organized_lines.append("")
    
    return '\n'.join(organized_lines)

if content:
    organized_content = organize_content_with_headings(content)
else:
    organized_content = "Content not available"

In [24]:
# Cell 4: Save Organized Content

In [25]:
output_filename = "organized_20th_century_events.txt"

with open(output_filename, 'w', encoding='utf-8') as f:
    f.write(organized_content)

print(f"Organized content saved to: {output_filename}")
print(f"Content size: {len(organized_content)} characters")

Organized content saved to: organized_20th_century_events.txt
Content size: 108471 characters


In [26]:
# Cell 5: Display Preview

In [27]:
print("CONTENT PREVIEW")
print("=" * 50)
lines = organized_content.split('\n')
for i, line in enumerate(lines[:30]):
    print(line)

CONTENT PREVIEW
KEY EVENTS OF THE 20TH CENTURY
Historical Overview and Major Developments

The 20th century changed the world in unprecedented ways. The World Wars sparked tension between countries and led to the creation of atomic bombs, the Cold War led to the Space Race and the creation of space-based rockets, and the World Wide Web was created. These advancements have played a significant role in citizens' lives and shaped the 21st century into what it is today.


HISTORIC EVENTS IN THE 20TH CENTURY[EDIT]
-----------------------------------------
World at the beginning of the century[edit]

The new beginning of the 20th century marked significant changes. The 1900s saw the decade herald a series of inventions, including the automobile, airplane and radio broadcasting. 1914 saw the completion of the Panama Canal.

The Scramble for Africa continued in the 1900s and resulted in wars and genocide across the continent. The atrocities in the Congo Free State shocked the civilized world.


In [None]:
# Cell 6: Create Summary File

In [28]:
summary_content = """
20TH CENTURY HISTORICAL ANALYSIS
================================

MAJOR THEMATIC CATEGORIES:

Global Conflicts and Wars
- World War I and II
- Cold War era
- Regional conflicts
- Decolonization movements

Technological Advancements  
- Aviation and space exploration
- Digital revolution
- Medical breakthroughs
- Communication technologies

Political Transformations
- Rise and fall of empires
- Democratic expansions
- International organizations
- Geopolitical shifts

Social and Cultural Changes
- Civil rights movements
- Women's suffrage
- Educational expansion
- Cultural globalization

DATA CHARACTERISTICS:
- Period Coverage: 1900-1999
- Content Type: Historical events and developments
- Source: Wikipedia curated content
- Format: Organized text with thematic headings

This dataset provides comprehensive coverage of major 20th century 
historical events suitable for research and analysis.
"""

with open("20th_century_analysis_summary.txt", "w", encoding="utf-8") as f:
    f.write(summary_content)

print("Summary file created: 20th_century_analysis_summary.txt")

Summary file created: 20th_century_analysis_summary.txt


In [29]:
# Cell 7: Final Verification

In [31]:
files_created = [
    "organized_20th_century_events.txt",
    "20th_century_analysis_summary.txt"
]

print("PROJECT COMPLETION VERIFICATION")
print("=" * 40)

for file in files_created:
    if os.path.exists(file):
        size = os.path.getsize(file)
        print(f"✓ {file} ({size} bytes)")
    else:
        print(f"✗ {file} - MISSING")

print(f"\nStatus: COMPLETED")

PROJECT COMPLETION VERIFICATION
✓ organized_20th_century_events.txt (108822 bytes)
✓ 20th_century_analysis_summary.txt (905 bytes)

Status: COMPLETED
