In [2]:
pip install dateparser

Collecting dateparser
  Downloading dateparser-1.2.1-py3-none-any.whl.metadata (29 kB)
Downloading dateparser-1.2.1-py3-none-any.whl (295 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m295.7/295.7 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: dateparser
Successfully installed dateparser-1.2.1


In [3]:
import re
import dateparser
from datetime import datetime

# Disambiguate ambiguous financial terms using a simple dictionary.
ambiguous_terms = {
    "raised": "increased",   #  raised is expressed as "increased"
    "fell": "decreased",
    "drop": "decline",
    "rally": "upturn",
    "rallied": "experienced an upturn"
}

def disambiguate_financial_terms(text):
    """
    Replaces ambiguous financial terms with clearer alternatives.
    Uses regex for whole-word replacement, case-insensitive.
    """
    def replacer(match):
        word = match.group(0)
        # converting to lowercase makes comparisons normalised
        return ambiguous_terms.get(word.lower(), word)

    pattern = r'\b(' + '|'.join(re.escape(word) for word in ambiguous_terms.keys()) + r')\b'
    return re.sub(pattern, replacer, text, flags=re.IGNORECASE)




Disambiguated Text:
Two years ago, the company's revenue increased by 16% than yesterday. Also, the market experienced an upturn sharply.

Extracted Numerical Data:
relative_dates: [('yesterday', '2025-02-07')]
percentages: [16.0]
textual_numerical_expressions: ['2 years ago']


In [4]:
# Extract numerical data such as relative dates and percentages.
def extract_numerical_data(text, reference_date=None):
    # check for null and invalid dates
    if reference_date is None:
        reference_date = datetime.now()

    results = {}

    # Use regex tfor date phrases
    date_phrases = re.findall(
        r'\b(?:\d+\s+(?:years?|months?|days?)\s+ago|(?:' +
        '|'.join(["yesterday", "today"]) +
        r'))\b', text, flags=re.IGNORECASE
    )
    parsed_dates = []
    for phrase in date_phrases:
        parsed = dateparser.parse(phrase, settings={'RELATIVE_BASE': reference_date})
        if parsed:
            parsed_dates.append((phrase, parsed.strftime("%Y-%m-%d")))
    results["relative_dates"] = parsed_dates

    # Extract specific symbols
    percentage_matches = re.findall(r'\b(\d+(?:\.\d+)?)\s*%|\b(\d+(?:\.\d+)?)\s*percent\b', text, flags=re.IGNORECASE)
    percentages = []
    for match in percentage_matches:
        # Each match is a tuple; choose the where flag is non-empty
        number = match[0] if match[0] else match[1]
        percentages.append(float(number))
    results["percentages"] = percentages

    # Handling textual numbers.  map words for numbers.
    number_words = {
        "one": 1,
        "two": 2,
        "three": 3,
        "four": 4,
        "five": 5,
        "six": 6,
        "seven": 7,
        "eight": 8,
        "nine": 9,
        "ten": 10
    }

    # Look for expressions like date entries in words
    text_num_matches = re.findall(
        r'\b(' + '|'.join(number_words.keys()) + r')\s+(years?|months?|days?)\s+ago\b',
        text, flags=re.IGNORECASE
    )
    textual_expressions = []
    for word, unit in text_num_matches:
        value = number_words[word.lower()]
        textual_expressions.append(f"{value} {unit} ago")
    results["textual_numerical_expressions"] = textual_expressions

    return results




In [5]:
#test case 1
sample_text = "Two years ago, the company's revenue raised by 16% than yesterday. Also, the market rallied sharply."

# Disambiguate ambiguous financial terms.
disambiguated_text = disambiguate_financial_terms(sample_text)

print(disambiguated_text)


# Extract numerical data into tuples
extracted_data = extract_numerical_data(disambiguated_text)

for key, value in extracted_data.items():
    print(f"{key}: {value}")

Two years ago, the company's revenue increased by 16% than yesterday. Also, the market experienced an upturn sharply.
relative_dates: [('yesterday', '2025-02-07')]
percentages: [16.0]
textual_numerical_expressions: ['2 years ago']
