<a href="https://colab.research.google.com/github/ericyoc/gather_cyber_topics_poc/blob/main/gather_interesting_links.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
from google.colab import drive
import os
import re
from datetime import datetime

def mount_drive():
    """
    Mount Google Drive to access files
    Returns the base path to Interesting_links directory
    """
    drive.mount('/content/drive')
    base_path = '/content/drive/My Drive/Interesting_Links'
    return base_path

def get_shortcut_info(file_path):
    """
    Extract information from shortcut file
    Returns tuple of (name, url, type)
    """
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()

        url_match = re.search(r'URL=(.*)', content)
        if url_match:
            url = url_match.group(1).strip()
            name = os.path.splitext(os.path.basename(file_path))[0]

            link_type = 'YouTube Video' if ('youtube.com' in url.lower() or 'youtu.be' in url.lower()) else 'URL'
            return (name, url, link_type)
    except Exception as e:
        print(f"Error processing file {file_path}: {str(e)}")
        return None

def categorize_directory(dir_path):
    """
    Determine which section a directory belongs to based on its name and path
    """
    dir_path_lower = dir_path.lower()
    categories = {
        'refresher': ['refresher', 'basics', 'fundamentals'],
        'security engineering': ['engineering', 'security engineering'],
        'cryptography': ['crypto', 'cryptography', 'encryption'],
        'authentication': ['authentication', 'auth', 'identity'],
        'user access': ['access', 'user access', 'authorization'],
        'malicious software': ['malware', 'malicious', 'virus', 'ransomware'],
        'network security': ['network', 'networking'],
        'web security': ['web', 'webapp', 'web security'],
        'social engineering': ['social', 'phishing', 'social engineering'],
        'security architecture': ['architecture', 'infrastructure', 'security architecture']
    }

    for category, keywords in categories.items():
        if any(keyword in dir_path_lower for keyword in keywords):
            return category

    return 'uncategorized'

def scan_directories(base_path):
    """
    Recursively scan all directories and collect shortcut information
    Returns dictionary with categories as keys and lists of tuples (dir_path, shortcuts) as values
    """
    categorized_shortcuts = {
        'Refresher': [],
        'Security Engineering': [],
        'Cryptography': [],
        'Authentication': [],
        'User Access': [],
        'Malicious Software': [],
        'Network Security': [],
        'Web Security': [],
        'Social Engineering': [],
        'Security Architecture': [],
        'Uncategorized': []
    }

    for root, dirs, files in os.walk(base_path):
        shortcuts = []

        for file in files:
            if file.endswith('.url'):
                file_path = os.path.join(root, file)
                shortcut_info = get_shortcut_info(file_path)
                if shortcut_info:
                    shortcuts.append(shortcut_info)

        if shortcuts:
            rel_path = os.path.relpath(root, base_path)
            category = categorize_directory(rel_path)
            category = category.title()  # Capitalize first letter
            if category not in categorized_shortcuts:
                category = 'Uncategorized'
            categorized_shortcuts[category].append((rel_path, shortcuts))

    return categorized_shortcuts

def generate_rtf_content(categorized_shortcuts):
    """
    Generate RTF formatted content with predefined sections
    """
    rtf_header = """{\\rtf1\\ansi\\deff0
{\\colortbl;\\red0\\green0\\blue0;\\red0\\green0\\blue255;\\red128\\green0\\blue0;}
{\\fonttbl{\\f0\\fswiss\\fcharset0 Arial;}{\\f1\\froman\\fcharset0 Times New Roman;}}
"""

    content = rtf_header

    # Add title and date
    current_date = datetime.now().strftime("%B %d, %Y")
    content += f"\\f0\\fs32\\b Security Resources Collection\\line"
    content += f"\\fs24\\i Generated on {current_date}\\i0\\line\\line"

    # Add each section in specified order
    section_order = [
        'Refresher',
        'Security Engineering',
        'Cryptography',
        'Authentication',
        'User Access',
        'Malicious Software',
        'Network Security',
        'Web Security',
        'Social Engineering',
        'Security Architecture',
        'Uncategorized'
    ]

    for section in section_order:
        dir_shortcuts = categorized_shortcuts[section]
        if dir_shortcuts:  # Only add sections that have content
            content += f"\\fs36\\b\\cf3 {section}\\line"
            content += "\\cf1\\fs24 " + "="*50 + "\\line\\line"

            for dir_path, shortcuts in dir_shortcuts:
                # Add directory subheader
                content += f"\\b\\fs28 {dir_path}\\line\\b0\\fs24"

                # Add shortcuts
                for name, url, link_type in shortcuts:
                    content += f"\\b Name:\\b0  {name}\\line"
                    content += f"\\b Type:\\b0  {link_type}\\line"
                    content += f"\\b URL:\\b0  \\cf2\\ul {url}\\ulnone\\cf1\\line"
                    content += "-"*30 + "\\line\\line"

            content += "\\line"  # Extra space between sections

    content += "}"
    return content

def save_rtf_document(content, base_path):
    """
    Save or update RTF document
    """
    output_path = os.path.join(base_path, 'Security_Resources_Collection.rtf')
    try:
        with open(output_path, 'w', encoding='utf-8') as f:
            f.write(content)
        print(f"Successfully {'updated' if os.path.exists(output_path) else 'created'} RTF document at: {output_path}")
    except Exception as e:
        print(f"Error with document: {str(e)}")

def main():
    # Mount Google Drive and get base path
    base_path = mount_drive()

    # Scan and categorize directories
    categorized_shortcuts = scan_directories(base_path)

    # Generate RTF content
    rtf_content = generate_rtf_content(categorized_shortcuts)

    # Save/update the RTF document
    save_rtf_document(rtf_content, base_path)

if __name__ == "__main__":
    main()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Successfully updated RTF document at: /content/drive/My Drive/Interesting_Links/Security_Resources_Collection.rtf
