## T-LCS method

In [None]:
from datetime import datetime
import numpy as np

def lcs(X, Y):
    m = len(X)
    n = len(Y)
    
    # Create a table to store the lengths of LCS
    lcs_table = [[0] * (n + 1) for _ in range(m + 1)]
    
    # Build the LCS table
    for i in range(1, m + 1):
        for j in range(1, n + 1):
            if X[i - 1] == Y[j - 1]:
                lcs_table[i][j] = lcs_table[i - 1][j - 1] + 1
            else:
                lcs_table[i][j] = max(lcs_table[i - 1][j], lcs_table[i][j - 1])
    
    # Reconstruct the LCS from the table
    lcs = []
    i, j = m, n
    while i > 0 and j > 0:
        if X[i - 1] == Y[j - 1]:
            lcs.insert(0, X[i - 1])
            i -= 1
            j -= 1
        elif lcs_table[i - 1][j] > lcs_table[i][j - 1]:
            i -= 1
        else:
            j -= 1
    
    return lcs

def keep_lcs_replace_others_with_zeros(seq, lcs):
    seq_copy = seq.copy()
    lcs_index = 0

    for i in range(len(seq_copy)):
        if lcs_index < len(lcs) and seq_copy[i] == lcs[lcs_index]:
            lcs_index += 1
        else:
            seq_copy[i] = 0
    return seq_copy

def get_non_lcs_parts(seq, lcs):
    seq_copy = seq.copy()
    non_lcs_parts = []

    lcs_index = 0
    for i in range(len(seq_copy)):
        if lcs_index < len(lcs) and seq_copy[i] == lcs[lcs_index]:
            lcs_index += 1
        else:
            non_lcs_parts.append(seq_copy[i])
    return non_lcs_parts

def calculate_time_difference(time1, time2):
    # Convert time strings to datetime objects for time difference calculation
    time_format = "%H:%M"
    t1 = datetime.strptime(time1, time_format)
    t2 = datetime.strptime(time2, time_format)
    # Return the absolute time difference in minutes
    return abs((t2 - t1).total_seconds() / 1800)

def extract_lcs_with_timestamps(S1, S2, T1, T2):
    # Extract LCS and corresponding timestamps
    lcs_seq = lcs(S1, S2)
    lcs_time_S1 = []
    lcs_time_S2 = []
    
    i, j = 0, 0
    for char in lcs_seq:
        while i < len(S1) and S1[i] != char:
            i += 1
        while j < len(S2) and S2[j] != char:
            j += 1
        lcs_time_S1.append(T1[i])
        lcs_time_S2.append(T2[j])
        i += 1
        j += 1
    
    return lcs_seq, lcs_time_S1, lcs_time_S2

def calculate_time_moves(lcs_time_S1, lcs_time_S2):
    # Calculate total time moves (differences in timestamps)
    total_time_moves = 0
    for time1, time2 in zip(lcs_time_S1, lcs_time_S2):
        total_time_moves += calculate_time_difference(time1, time2)
    
    # Get the length of the LCS
    length_lcs = len(lcs_time_S1)
    
    if length_lcs == 0 or length_lcs == 48:
        return 0  # Avoid division by zero in case of LCS length being 0 or 48

    # Return time moves divided by length of LCS and (48 - length of LCS)
    return total_time_moves / (length_lcs * (48 - length_lcs))


def distance_with_time(S1, S2, T1, T2, ratio, D_rate, w, max_iterations=100):
    iterations = 0
    SScore = 0
    a = len(S1)
    b = len(S2)
    
    Output_R_LCS = []
    
    while len(S1) > 0 and len(S2) > 0 and iterations < max_iterations:
        iterations += 1
        lcs_sequence, lcs_time_S1, lcs_time_S2 = extract_lcs_with_timestamps(S1, S2, T1, T2)
        
        if len(lcs_sequence) == 0:
            break
        
        print(f"Iteration {iterations}")
        print("S1:", S1)
        print("S2:", S2)
        print("LCS:", lcs_sequence)
        print("Timestamps in S1:", lcs_time_S1)
        print("Timestamps in S2:", lcs_time_S2)

        Iteration_num = iterations
        Length_S1 = len(S1)
        Length_S2 = len(S2)
        Length_LCS = len(lcs_sequence)
        Each_iteration = [Iteration_num, Length_S1, Length_S2, Length_LCS]
        Output_R_LCS.append(Each_iteration)
        
        # Calculate time differences (moves) between matched items
        time_moves = calculate_time_moves(lcs_time_S1, lcs_time_S2)
        
        # Updated score with time-based moves
        SScore += ratio**(iterations - 1) * (2 * Length_LCS / (a + b)) * np.exp(-D_rate * time_moves)
        print(f"SScore after iteration {iterations}: {SScore}")

        # Get non-LCS parts for next iteration
        S1 = get_non_lcs_parts(S1, lcs_sequence)
        S2 = get_non_lcs_parts(S2, lcs_sequence)
        T1 = get_non_lcs_parts(T1, lcs_sequence)
        T2 = get_non_lcs_parts(T2, lcs_sequence)

    # Final score with w exponent on the length difference penalty
    Final_Score = SScore * (1 - (abs(a - b) / max(a, b)))**w
    return Final_Score

In [None]:
### The benchmark data includes six sequences (S1, S2, S3, S4, S5, and S6). 
### S1 represents a typical timestamped semantic sequence
S1 = ['R', 'T', 'W', 'D', 'W', 'S', 'T', 'R']
T1 = ['07:00', '08:00', '09:30', '12:30', '14:30', '17:30', '18:00', '20:30']

S2 = ['R', 'T', 'W', 'D', 'W', 'S', 'T', 'R']
T2 = ['07:30', '08:30', '10:00', '12:00', '15:30', '18:00', '18:30', '22:30']

S3 = ['R', 'T', 'S', 'W', 'D', 'W', 'T', 'R']
T3 = ['07:00', '08:00', '09:30', '12:30', '14:30', '17:30', '18:00', '20:30']


S4 = ['R', 'T', 'I', 'W', 'D', 'W', 'T', 'R']
T4 = ['07:00', '08:00', '09:30', '12:30', '14:30', '17:30', '18:00', '20:30']

S5 = ['R', 'T', 'W', 'D', 'W', 'S', 'T', 'R','I']
T5 = ['07:00', '08:00', '09:30', '12:30', '14:30', '17:30', '18:00', '20:30','21:00']

S6 = ['R', 'T', 'W', 'D', 'W', 'S', 'T', 'R','I','I']
T6 = ['07:00', '08:00', '09:30', '12:30', '14:30', '17:30', '18:00', '20:30','21:00','21:30']

# Parameters
ratio = 0.8
D_rate = 0.8
w = 0.5


In [None]:
### Calculate the similarity score
final_score = distance_with_time(S1, S2, T1, T2, ratio, D_rate, w)
print("Final Score:", final_score)