# Deterministic Url and HashTag Segmentation

In [4]:
import re

def load_words(file_path):
    """Load the list of valid words from words.txt."""
    with open(file_path, 'r') as f:
        return set(word.strip().lower() for word in f)

def is_number(s):
    """Check if a string is a valid number (integer or decimal)."""
    return re.match(r'^\d+(\.\d+)?$', s) is not None

def segment_string(s, words):
    """Segment the string into tokens using the provided words and numbers."""
    n = len(s)
    dp = [None] * (n + 1)  # dp[i] stores the tokens for s[:i]
    dp[0] = []

    for i in range(1, n + 1):
        for j in range(i):
            token = s[j:i]
            if token in words or is_number(token):
                if dp[j] is not None:
                    dp[i] = dp[j] + [token]
                    break

    return dp[n]

def clean_input(s):
    """Clean the input string by removing prefixes and extensions."""
    s = s.lower()
    # Remove www. and extensions for domain names
    s = re.sub(r'^www\.|(\.com|\.org|\.edu|\.in)$', '', s)
    # Remove hashtag symbol
    s = s.lstrip('#')
    return s

def process_input(inputs, words):
    """Process each input string and return its segmented version."""
    results = []
    for s in inputs:
        cleaned = clean_input(s)
        segmented = segment_string(cleaned, words)
        if segmented:
            results.append(' '.join(segmented))
        else:
            results.append(cleaned)  # Return original if segmentation fails
    return results

# Main Execution
if __name__ == "__main__":
    # Load dictionary of words
    words = load_words('words.txt')

    # Sample input
    num_cases = int(input())
    inputs = [input().strip() for _ in range(num_cases)]

    # Process each input
    outputs = process_input(inputs, words)

    # Print the outputs
    for output in outputs:
        print(output)


 3
 #letusgo  
 www.whatismyname.com  
 #isittime 


let us go
whatismyname
i sit time
