In [1]:
import re

# Sample event log data with timestamps
event_log_data = """
Event 1: 01/09/2013 08:30:45 Task A started
Event 2: 02/09/2013 10:15:20 Task B completed
Event 3: 12/11/2013 14:45:00 Task C started
"""

# Regular expression pattern to match timestamps in "DD/MM/YYYY HH:MM:SS" format
timestamp_pattern = r'\b\d{2}/\d{2}/\d{4} \d{2}:\d{2}:\d{2}\b'

# Find all timestamps in the event log data using the pattern matcher
timestamps_found = re.findall(timestamp_pattern, event_log_data)

# Print the identified timestamps
for timestamp in timestamps_found:
    print(timestamp)


01/09/2013 08:30:45
02/09/2013 10:15:20
12/11/2013 14:45:00


Identify wrong format( if month is larger than 12)


In [1]:
import re

# Sample event log data with timestamps (including some incorrect formats)
event_log_data = """
Event 1: 01/09/2013 08:30:45 Task A started
Event 2: 13/09/2013 10:15:20 Task B completed (wrong format)
Event 3: 12/11/2013 14:45:00 Task C started
Event 4: 25/15/2013 18:00:00 Task D finished (wrong format)
"""

# Regular expression pattern to match timestamps in "DD/MM/YYYY HH:MM:SS" format
timestamp_pattern = r'\b\d{2}/\d{2}/\d{4} \d{2}:\d{2}:\d{2}\b'

# Find all timestamps in the event log data using the pattern matcher
timestamps_found = re.findall(timestamp_pattern, event_log_data)

# Print the identified timestamps with format validation
for timestamp in timestamps_found:
    # Extract month and check if it's within valid range
    month = int(timestamp[3:5])
    if month > 12:
        print(f"{timestamp} (wrong format)")
    else:
        print(timestamp)


01/09/2013 08:30:45
13/09/2013 10:15:20
12/11/2013 14:45:00
25/15/2013 18:00:00 (wrong format)


Convert Between two formats

In [None]:
from datetime import datetime

def convert_timestamp(input_timestamp, input_format, output_format):
    try:
        # Parse the input timestamp using the input format
        parsed_timestamp = datetime.strptime(input_timestamp, input_format)
        # Convert the parsed timestamp to the output format
        output_timestamp = parsed_timestamp.strftime(output_format)
        return output_timestamp
    except ValueError:
        return "Invalid input format or timestamp"

# Sample event logs with timestamps in different formats
event_logs = [
    ("01/09/2013 08:30:45", "%d/%m/%Y %H:%M:%S"),  # DD/MM/YYYY HH:MM:SS format
    ("2023-05-15 12:45:30", "%Y-%m-%d %H:%M:%S"),  # YYYY-MM-DD HH:MM:SS format
    ("06/30/2022 09:00:00 AM", "%m/%d/%Y %I:%M:%S %p"),  # MM/DD/YYYY HH:MM:SS AM/PM format
]

# Desired output format for conversion
output_format = "%Y-%m-%d %H:%M:%S"  # YYYY-MM-DD HH:MM:SS format

# Convert and print the timestamps in the desired output format
for event_log in event_logs:
    input_timestamp, input_format = event_log
    converted_timestamp = convert_timestamp(input_timestamp, input_format, output_format)
    print(f"Input Timestamp: {input_timestamp}, Input Format: {input_format}")
    print("Converted Timestamp:", converted_timestamp)
    print()


Input Timestamp: 01/09/2013 08:30:45, Input Format: %d/%m/%Y %H:%M:%S
Converted Timestamp: 2013-09-01 08:30:45

Input Timestamp: 2023-05-15 12:45:30, Input Format: %Y-%m-%d %H:%M:%S
Converted Timestamp: 2023-05-15 12:45:30

Input Timestamp: 06/30/2022 09:00:00 AM, Input Format: %m/%d/%Y %I:%M:%S %p
Converted Timestamp: 2022-06-30 09:00:00



In [1]:
from datetime import datetime

# Sample timestamps with format variations
timestamps = [
    "05/12/2023 08:30:45",  # Month-Day-Year format (MM/DD/YYYY)
    "12/05/2023 08:32:30",  # Day-Month-Year format (DD/MM/YYYY)
    "2023-05-12 08:35:20",  # Month-Day-Year format (YYYY-MM-DD)
    "2023-12-05 08:40:00",  # Day-Month-Year format (YYYY-DD-MM)
]

# Function to check for format variations
def check_format_variations(timestamp):
    try:
        # Attempt to parse timestamp using MM/DD/YYYY format
        parsed_md = datetime.strptime(timestamp, '%m/%d/%Y %H:%M:%S')
        # Attempt to parse timestamp using DD/MM/YYYY format
        parsed_dm = datetime.strptime(timestamp, '%d/%m/%Y %H:%M:%S')
        # Check if parsed dates differ significantly (e.g., more than a month)
        if abs((parsed_md - parsed_dm).days) > 30:
            return True  # Format variation detected
        else:
            return False  # No significant format variation
    except ValueError:
        return False  # Unable to parse timestamp in either format

# Check for format variations in the timestamps
for ts in timestamps:
    if check_format_variations(ts):
        print(f"Format variation detected in timestamp: {ts}")
    else:
        print(f"No significant format variation in timestamp: {ts}")


Format variation detected in timestamp: 05/12/2023 08:30:45
Format variation detected in timestamp: 12/05/2023 08:32:30
No significant format variation in timestamp: 2023-05-12 08:35:20
No significant format variation in timestamp: 2023-12-05 08:40:00


In [3]:
from datetime import datetime

# Sample timestamps with format and timezone variations
timestamps = [
    "2023-05-15 08:30:45",   # Standard format with colon for time separation
    "2023-05-15 08.32.30",   # Alternative format with dot for time separation
    "2023-05-15T08:35:20Z",  # UTC timezone encoding
    "2023-05-15 08:40:00-05:00",  # Timezone offset encoding
]

# Function to check for format and timezone variations
def check_format_timezone_variations(timestamp):
    try:
        # Attempt to parse timestamp using standard format with colon
        parsed_standard = datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')
        # Attempt to parse timestamp using alternative format with dot
        parsed_alternative = datetime.strptime(timestamp, '%Y-%m-%d %H.%M.%S')
        # Check if parsed timestamps differ significantly
        if abs((parsed_standard - parsed_alternative).seconds) > 0:
            return True  # Format or timezone variation detected
        else:
            return False  # No significant format or timezone variation
    except ValueError:
        return False  # Unable to parse timestamp in either format

# Check for format and timezone variations in the timestamps
for ts in timestamps:
    if check_format_timezone_variations(ts):
        print(f"Format or timezone variation detected in timestamp: {ts}")
    else:
        print(f"No significant format or timezone variation in timestamp: {ts}")


No significant format or timezone variation in timestamp: 2023-05-15 08:30:45
No significant format or timezone variation in timestamp: 2023-05-15 08.32.30
No significant format or timezone variation in timestamp: 2023-05-15T08:35:20Z
No significant format or timezone variation in timestamp: 2023-05-15 08:40:00-05:00


In [4]:
# Sample timestamps with time separation symbol variations
timestamps = [
    "2023-05-15 08:30:45",   # Standard format with colon for time separation
    "2023-05-15 08.32.30",   # Alternative format with period for time separation
]

# Function to convert timestamps to the same format
def convert_to_same_format(timestamp):
    # Replace the alternative time separation symbol (period ".") with the standard symbol (colon ":")
    converted_timestamp = timestamp.replace(".", ":")
    return converted_timestamp

# Convert timestamps to the same format
converted_timestamps = [convert_to_same_format(ts) for ts in timestamps]

# Print the original and converted timestamps
for original, converted in zip(timestamps, converted_timestamps):
    print(f"Original Timestamp: {original} | Converted Timestamp: {converted}")


Original Timestamp: 2023-05-15 08:30:45 | Converted Timestamp: 2023-05-15 08:30:45
Original Timestamp: 2023-05-15 08.32.30 | Converted Timestamp: 2023-05-15 08:32:30


In [None]:
# Sample function to parse timestamps and detect anomalies
def detect_anomalies(timestamps):
    anomalies = []  # List to store flagged timestamps

    for timestamp in timestamps:
        parsed_timestamp = parse_timestamp(timestamp)  # Parse timestamp into date components

        # Ambiguous Date Format Detection
        if parsed_timestamp['month'] > 12 or parsed_timestamp['day'] > 31:
            anomalies.append((timestamp, "Ambiguous Date Format"))  # Flag ambiguous date format

        # Out-of-Range Value Detection
        if parsed_timestamp['year'] < 1900 or parsed_timestamp['year'] > 2100:
            anomalies.append((timestamp, "Out-of-Range Year"))  # Flag out-of-range year
        # Add similar checks for 'month', 'day', 'hour', 'minute', 'second'

    return anomalies  # Return list of flagged timestamps

# Sample function to parse timestamp into date components (for demonstration purposes)
def parse_timestamp(timestamp):
    date_str, time_str = timestamp.split()  # Split timestamp into date and time strings
    year, month, day = map(int, date_str.split('-'))  # Extract year, month, day
    hour, minute, second = map(int, time_str.split(':'))  # Extract hour, minute, second
    return {'year': year, 'month': month, 'day': day, 'hour': hour, 'minute': minute, 'second': second}

# Sample timestamps for testing
timestamps = ["2023-05-15 08:30:45", "2023-15-05 08:32:30", "2023-05-15 25:30:45"]

# Detect anomalies in timestamps
flagged_timestamps = detect_anomalies(timestamps)
print(flagged_timestamps)  # Output: [('2023-15-05 08:32:30', 'Ambiguous Date Format'), ('2023-05-15 25:30:45', 'Out-of-Range Year')]



[('2023-15-05 08:32:30', 'Ambiguous Date Format')]
