# Text Processing Script
**Prelim Task2**

This Python script reads a file and performs basic text analysis using regular expressions (regex).

### Main Functionalities:

1. **File Reading (`read_file`)**:
   - Reads content from a file.
   - Returns the content as a string.

2. **Regex Search (`regEX`)**:
   - Finds numbers, words, and email addresses in the text.
   - Returns matches for each pattern.

3. **Sentence Search (`find_sentences_starting_with`)**:
   - Finds sentences starting with a specific word (case-insensitive).

4. **Email Extraction (`find_all_emails`)**:
   - Extracts all email addresses from the text.

5. **Basic Stats (`process_text`)**:
   - Generates a report with the count, total, min, max, and average for numbers found.


In [29]:
import re

def read_file(file_path):
    try:
        with open(file_path, 'r') as file:
            content = file.read()
        return content
    except FileNotFoundError:
        print('File not found.')
        return ''


def regEX(content):
    # regEX Patterns
    patterns = {
        'Numbers': r'\b\d+\b',
        'Words': r'\b[A-Za-z]+\b',
        'Emails': r'[\w\.-]+@[\w\.-]+'  
    }

    results = {}
    for key, pattern in patterns.items():
        matches = re.findall(pattern, content)
        results[key] = matches

    return results


def find_sentences_starting_with(content, word):
    pattern = rf'(?i)\b{word}\b[^.?!]*[.?!]'
    sentences = re.findall(pattern, content)
    return sentences


def find_all_emails(content):
    pattern = r'[\w\.-]+@[\w\.-]+'
    emails = re.findall(pattern, content)
    return emails

def get_min(numbers):
    min_num = numbers[0]
    for num in numbers:
        if num < min_num:
            min_num = num
    return min_num

def get_max(numbers):
    max_num = numbers[0]
    for num in numbers:
        if num > max_num:
            max_num = num
    return max_num


def process_text(results):
    report = {}
    for key, matches in results.items():
        count = len(matches)

        # Collect numbers
        numbers = []
        if key == 'Numbers':
            for num in matches:
                if num.isdigit():
                    numbers.append(int(num))

        # Total Values
        if key == 'Numbers':
            if numbers:
                totals = sum(numbers)
                min_value = get_min(numbers)
                max_value = get_max(numbers)
                average = totals / count if count > 0 else 'None Found'
            else:
                totals = 'None Found'
                min_value = 'None Found'
                max_value = 'None Found'
                average = 'None Found'
        else:
            totals = 'N/A'
            min_value = 'None Found'
            max_value = 'None Found'
            average = 'None Found'


        report[key] = {
            'Count': count,
            'Total': totals,
            'Minimum': min_value,
            'Maximum': max_value,
            'Average': average
        }
    
    return report



file_path = 'virus.txt' # FILE
content = read_file(file_path)
if content:
    results = regEX(content)
    report = process_text(results)

    print('\nRegex Search Result:\n')S
    for key, data in report.items():
        print(f'{key}:')
        for k, v in data.items():
            print(f'  {k}: {v}')

    # Print sentences starting with a specific word
    word_to_search = 'virus'  # Can change to any word
    sentences = find_sentences_starting_with(content, word_to_search)
    print(f"\nSentences starting with '{word_to_search}':")
    for sentence in sentences:
        print(f'- {sentence.strip()}')

    # Print all emails found in the text
    emails = find_all_emails(content)
    print("--------------------------------------------")
    print("\nAll Emails Found:")
    for email in emails:
        print(f'- {email}')



Regex Search Result:

Numbers:
  Count: 404
  Total: 53091
  Minimum: 0
  Maximum: 9600
  Average: 131.41336633663366
Words:
  Count: 12387
  Total: N/A
  Minimum: None Found
  Maximum: None Found
  Average: None Found
Emails:
  Count: 2
  Total: N/A
  Minimum: None Found
  Maximum: None Found
  Average: None Found

Sentences starting with 'virus':
- Virus Tools and Techniques 

W.
- virus tools are now available to help manage this threat.
- virus tools.
- Virus Removal Tools 
    4.
- Virus Techniques 
  5.
- Virus Tools 
    5.
- virus tools are discussed, as well as suggestions of 
appropriate applications for these tools.
- virus tools, regardless of platform.
- virus tools have focused on personal computers since these systems are 
currently at the greatest risk of infection.
- virus tools for an organization.
- virus products and follows with selection criteria which must 
be considered to ensure practicality and convenience.
- virus tools (e.
- virus tools.
- virus problem is 