In [83]:
import pandas as pd
import re

In [84]:
def detect_data_patterns(df, value_to_match):
    column_names = df.columns

    date_patterns = [
                r'\d{4}-\d{2}-\d{2}',
                r'\d{2}/\d{2}/\d{4}',
                r'\d{2}-\d{2}-\d{4}',
                r'\d{4}/\d{2}/\d{2}',
                r'\d{2}/\d{2}/\d{2}',
                r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}',
                r'\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}',
                r'\d{2}-\d{2}-\d{4} \d{2}:\d{2}:\d{2}',
                r'\d{2}/\d{2}/\d{4} \d{2}:\d{2}:\d{2}',
            ]

    integer_pattern = r'^\d+$'
    string_pattern = r'.*'
    alphanumeric_pattern = r'^[a-zA-Z0-9]+$'

    pattern_results = {'Date': [], 'Integer': [], 'Alphanumeric': [], 'String': []}

    for column_name in column_names:
        unique_values = df[column_name].unique()

        for pattern in date_patterns:
            if any(re.match(pattern, str(value)) for value in unique_values):
                pattern_results['Date'].append(column_name)
                break

        if any(re.match(integer_pattern, str(value)) for value in unique_values):
            pattern_results['Integer'].append(column_name)

        if any(re.match(alphanumeric_pattern, str(value)) for value in unique_values):
            pattern_results['Alphanumeric'].append(column_name)

        if any(re.match(string_pattern, str(value)) for value in unique_values):
            pattern_results['String'].append(column_name)

    matching_columns = []
    if value_to_match is not None:
        for column_name, patterns in pattern_results.items():
            for pattern_column in patterns:
                pattern = None
                if 'Date' in column_name:
                    pattern = re.compile(date_patterns[0])
                elif 'Integer' in column_name:
                    pattern = re.compile(integer_pattern)
                elif 'Alphanumeric' in column_name:
                    pattern = re.compile(alphanumeric_pattern)
                if 'String' in column_name:
                    pattern = re.compile(string_pattern)

                if pattern and pattern.match(str(value_to_match)):
                    matching_columns.append(column_name)
                    break

    return pattern_results, matching_columns


In [91]:
data = {
    'Date_Column_1': ['2024-01-01', '01/02/2024', '2024-03-31', '04-30-2024', '2024-05-20',
                      '2024-06-01 12:30:45', '2024/07/15 14:20', '2024-08-31T10:00:00',
                      '2024-09-10 08:45:30 AM', '2024-10-20T16:00:00+05:30'],
    'Date_Column_2': ['2024/01/01', '01/02/2024', '2024/03/31', '2024-04-10', '2024/05/20',
                      '2024-06-01 08:30:45', '2024/07/15 17:20', '2024-08-31T11:00:00',
                      '2024-09-10 09:45:30 AM', '2024-10-20T17:00:00+05:30'],
    'Integer_Column': [161718, 192021, 222324, 252627, 282930,
                       313233, 343536, 373839, 404142, 434445,],
    'String_Column': ['apple', 'banana', 'carrot', 'dog', 'elephant',
                      'fox', 'goat', 'horse', 'iguana', 'jackal'],
    'Alphanumeric_Column': ['E31F32G33', 'H34I35J36', 'K37L38M39', 'N40O41P42', 'Q43R44S45',
                            'T46U47V48', 'W49X50Y51', 'Z52A53B54', 'C55D56E57', 'F58G59H60']
}

df = pd.DataFrame(data)

value_to_match = ["2020-01-12", "2024-09-11 02:05:30", "hello mister", 987123421343, "hello3434343", "abcdf1231lklksdfnl"]

main_df_pattern_print_stop_count = 0

for value in value_to_match:

    patterns, matching_columns = detect_data_patterns(df, value)

    for pattern_type, columns in patterns.items():
        if main_df_pattern_print_stop_count == 0:
            print(f"{pattern_type} pattern detected in dataframe columns: {', '.join(columns)}")

    main_df_pattern_print_stop_count += 1

    print(f"\nColumns where the value '{value}' matches: {', '.join(matching_columns)}")

# NOTE: The first value in the result has more confidence. 
# PENDING: Match first result value to the column name from dataframe.

Date pattern detected in dataframe columns: Date_Column_1, Date_Column_2
Integer pattern detected in dataframe columns: Integer_Column
Alphanumeric pattern detected in dataframe columns: Integer_Column, String_Column, Alphanumeric_Column
String pattern detected in dataframe columns: Date_Column_1, Date_Column_2, Integer_Column, String_Column, Alphanumeric_Column

Columns where the value '2020-01-12' matches: Date, String

Columns where the value '2024-09-11 02:05:30' matches: Date, String

Columns where the value 'hello mister' matches: String

Columns where the value '987123421343' matches: Integer, Alphanumeric, String

Columns where the value 'hello3434343' matches: Alphanumeric, String

Columns where the value 'abcdf1231lklksdfnl' matches: Alphanumeric, String
