## Test the model with delta_t and DEAP algorithm

In [32]:
import numpy as np
import pandas as pd
import itertools
import matplotlib.pyplot as plt
from scipy.optimize import minimize
from scipy.linalg import block_diag
from deap import base, creator, tools, algorithms
import random

In [33]:
file_path = './Synched_Data_GR0_22_DEN_MAXZ1_25/NEWDATA/'
file_date = ['101922', '102122', '111422', '111622', '120522', '120722', 
                '013023', '020123', '031323', '031523', '041723', '041923', '061523']
date = file_date[0]

file_name = f'DAYUBIGR_{date}_GR0_22_DEN_032825_V2392628911.CSV'
full_path = file_path + file_name

raw_data = pd.read_csv(full_path, header=None, names=['SUBJECTID', 'TIME', 'X', 'Y', 'Z'])
clear_data = raw_data.reset_index(drop=True)
clear_data = clear_data[(clear_data["X"] <= 15) & (clear_data["Y"] <= 9) & 
                        (clear_data["X"] >= 0) & (clear_data["Y"] >= 0)].copy()
target_subject_base = "DS_STARFISH_2223_27"
subject_data = clear_data[clear_data['SUBJECTID'].str.startswith(target_subject_base)].copy()
subject_data['TIME'] = pd.to_datetime(subject_data['TIME'])
t0 = subject_data['TIME'].min()
subject_data['timestamp'] = (subject_data['TIME'] - t0).dt.total_seconds()

subject_data['side'] = subject_data['SUBJECTID'].str.extract(r'(\d+[LR])$')[0].str[-1].map({'L': 'left', 'R': 'right'})
subject_data['timestamp_rounded'] = subject_data['timestamp'].round(3)

grouped = subject_data.groupby('timestamp_rounded')
real_data = []

for ts, group in grouped:
    entry = {'timestamp': ts}
    left = group[group['side'] == 'left']
    right = group[group['side'] == 'right']
    
    if not left.empty:
        left_xy = left[['X', 'Y']].iloc[0].to_numpy()
        entry['left'] = left_xy
    if not right.empty:
        right_xy = right[['X', 'Y']].iloc[0].to_numpy()
        entry['right'] = right_xy
    
    if 'left' in entry and 'right' in entry:
        entry['observed'] = 'both'
        entry['obs'] = np.concatenate([entry['left'], entry['right']])
    elif 'left' in entry:
        entry['observed'] = 'left'
        entry['obs'] = entry['left']
    elif 'right' in entry:
        entry['observed'] = 'right'
        entry['obs'] = entry['right']
    else:
        entry['observed'] = 'none'
        entry['obs'] = np.array([])

    real_data.append(entry)

In [34]:
DT_VIRT = 0.5  # Virtual time step interval
SIGMA_MIN = 0.000001
SIGMA_MAX = 10
# sigma_v_values = [0.01, 0.05, 0.1, 0.5]
# sigma_omega_values = [0.001, 0.01, 0.05, 0.1]
# sigma_obs_values = [0.5, 1]
# param_combinations = list(itertools.product(sigma_v_values, sigma_omega_values, sigma_obs_values))
# results = []

In [35]:
DT_VIRT = 0.5
SIGMA_MIN = 0.000001
SIGMA_MAX = 10

def state_transition(s_t, delta_t):
    x, y, theta, vx, vy, omega = s_t
    return np.array([
        x + vx * delta_t,
        y + vy * delta_t,
        theta + omega * delta_t,
        vx,
        vy,
        omega
    ])

def jacobian_F(delta_t):
    return np.array([
        [1, 0, 0, delta_t, 0, 0],
        [0, 1, 0, 0, delta_t, 0],
        [0, 0, 1, 0, 0, delta_t],
        [0, 0, 0, 1, 0, 0],
        [0, 0, 0, 0, 1, 0],
        [0, 0, 0, 0, 0, 1]
    ])

def h(s_t, observed_sensors, d):
    x, y, theta = s_t[0], s_t[1], s_t[2]
    if observed_sensors == 'both':
        return np.array([
            x - d * np.sin(theta),
            y + d * np.cos(theta),
            x + d * np.sin(theta),
            y - d * np.cos(theta)
        ])
    elif observed_sensors == 'left':
        return np.array([x - d * np.sin(theta), y + d * np.cos(theta)])
    elif observed_sensors == 'right':
        return np.array([x + d * np.sin(theta), y - d * np.cos(theta)])
    else:
        return np.array([])

def jacobian_h(s_t, observed_sensors, d):
    theta = s_t[2]
    if observed_sensors == 'both':
        return np.array([
            [1, 0, -d * np.cos(theta), 0, 0, 0],
            [0, 1, -d * np.sin(theta), 0, 0, 0],
            [1, 0, d * np.cos(theta), 0, 0, 0],
            [0, 1, d * np.sin(theta), 0, 0, 0]
        ])
    elif observed_sensors == 'left':
        return np.array([
            [1, 0, -d * np.cos(theta), 0, 0, 0],
            [0, 1, -d * np.sin(theta), 0, 0, 0]
        ])
    elif observed_sensors == 'right':
        return np.array([
            [1, 0, d * np.cos(theta), 0, 0, 0],
            [0, 1, d * np.sin(theta), 0, 0, 0]
        ])
    else:
        return np.zeros((0, 6))

In [36]:
def ekf_forward(data, timestamps, virtual_timestamps, params):
    sigma_vx, sigma_vy, sigma_omega, sigma_obs, d = params
    master_timestamps = sorted(set(timestamps + virtual_timestamps))
    T = len(master_timestamps)
    s_hat = [np.zeros(6)] * T
    P = [np.zeros((6, 6))] * T
    s_filt = [np.zeros(6)] * T
    P_filt = [np.zeros((6, 6))] * T
    neg_log_likelihood = 0.0

    # Initialize state and covariance
    s_hat[0] = np.zeros(6)
    for entry in data[:10]:
        if entry['observed'] != 'none':
            if entry['observed'] == 'left':
                s_hat[0][:2] = entry['left']
                break
            elif entry['observed'] == 'right':
                s_hat[0][:2] = entry['right']
                break
            elif entry['observed'] == 'both':
                s_hat[0][:2] = (entry['left'] + entry['right']) / 2
                break
                
    P[0] = np.diag([10, 10, 10, 5, 5, 2])
    s_filt[0] = s_hat[0]
    P_filt[0] = P[0]

    for k in range(T - 1):
        t_k = master_timestamps[k]
        t_k1 = master_timestamps[k + 1]
        delta_t = t_k1 - t_k

        # Prediction step
        s_hat[k + 1] = state_transition(s_filt[k], delta_t)
        F_k = jacobian_F(delta_t)
        Q_k = block_diag(0, 0, 0, sigma_vx**2 * delta_t**2, sigma_vy**2 * delta_t**2, sigma_omega**2 * delta_t**2)
        P[k + 1] = F_k @ P_filt[k] @ F_k.T + Q_k

        # Update step
        if t_k1 in timestamps:
            idx = timestamps.index(t_k1)
            observed_sensors = data[idx]['observed']
            if observed_sensors != 'none':
                H_k1 = jacobian_h(s_hat[k + 1], observed_sensors, d)
                z_pred = h(s_hat[k + 1], observed_sensors, d)
                z_k1 = data[idx]['obs']
                m_t = len(z_k1)
                R = sigma_obs**2 * np.eye(m_t)
                S_k1 = H_k1 @ P[k + 1] @ H_k1.T + R
                
                S_k1 = (S_k1 + S_k1.T) / 2
                
                try:
                    innovation = z_k1 - z_pred
                    sign, logdet = np.linalg.slogdet(S_k1)
                    if sign > 0:
                        neg_log_likelihood += 0.5 * (m_t * np.log(2 * np.pi) + logdet + 
                                              innovation @ np.linalg.inv(S_k1) @ innovation)
                    else:
                        return np.inf  # Return high penalty for invalid parameters
                    
                    K_k1 = P[k + 1] @ H_k1.T @ np.linalg.inv(S_k1)
                    s_filt[k + 1] = s_hat[k + 1] + K_k1 @ innovation
                    P_filt[k + 1] = (np.eye(6) - K_k1 @ H_k1) @ P[k + 1]
                except np.linalg.LinAlgError:
                    return np.inf  # Return high penalty for numerical issues
            else:
                s_filt[k + 1] = s_hat[k + 1]
                P_filt[k + 1] = P[k + 1]
        else:
            s_filt[k + 1] = s_hat[k + 1]
            P_filt[k + 1] = P[k + 1]
            
    return s_filt, P_filt, s_hat, P, neg_log_likelihood

def smoother(s_filt, P_filt, s_hat, P, timestamps, virtual_timestamps):
    master_timestamps = sorted(set(timestamps + virtual_timestamps))
    T = len(master_timestamps)
    s_smooth = [np.zeros(6)] * T
    P_smooth = [np.zeros((6, 6))] * T
    s_smooth[-1] = s_filt[-1]
    P_smooth[-1] = P_filt[-1]

    for k in range(T - 2, -1, -1):
        t_k = master_timestamps[k]
        t_k1 = master_timestamps[k + 1]
        delta_t = t_k1 - t_k
        F_k = jacobian_F(delta_t)
        
        try:
            C_k = P_filt[k] @ F_k.T @ np.linalg.inv(P[k + 1])
            s_smooth[k] = s_filt[k] + C_k @ (s_smooth[k + 1] - s_hat[k + 1])
            P_smooth[k] = P_filt[k] + C_k @ (P_smooth[k + 1] - P[k + 1]) @ C_k.T
        except np.linalg.LinAlgError:
            s_smooth[k] = s_filt[k]
            P_smooth[k] = P_filt[k]

    return s_smooth, P_smooth

In [None]:
class HybridOptimizer:
    def __init__(self, data, timestamps, virtual_timestamps):
        self.data = data
        self.timestamps = timestamps
        self.virtual_timestamps = virtual_timestamps
        
        # Parameter bounds: [sigma_vx, sigma_vy, sigma_omega, sigma_obs, d]
        self.bounds = [
            (SIGMA_MIN, SIGMA_MAX),
            (SIGMA_MIN, SIGMA_MAX),
            (SIGMA_MIN, SIGMA_MAX),
            (0.01, SIGMA_MAX),
            (0.01, 1.0)
        ]
        
        self.setup_deap()
        
    def setup_deap(self):
        creator.create("FitnessMin", base.Fitness, weights=(-1.0,))  # Not sure about weights)
        creator.create("Individual", list, fitness=creator.FitnessMin)
        
        self.toolbox = base.Toolbox()
        
        self.toolbox.register("sigma_vx", random.uniform, self.bounds[0][0], self.bounds[0][1])
        self.toolbox.register("sigma_vy", random.uniform, self.bounds[1][0], self.bounds[1][1])
        self.toolbox.register("sigma_omega", random.uniform, self.bounds[2][0], self.bounds[2][1])
        self.toolbox.register("sigma_obs", random.uniform, self.bounds[3][0], self.bounds[3][1])
        self.toolbox.register("d", random.uniform, self.bounds[4][0], self.bounds[4][1])
        
        self.toolbox.register("individual", tools.initCycle, creator.Individual,
                             (self.toolbox.sigma_vx, self.toolbox.sigma_vy, 
                              self.toolbox.sigma_omega, self.toolbox.sigma_obs, self.toolbox.d), n=1)
        self.toolbox.register("population", tools.initRepeat, list, self.toolbox.individual)
        
        self.toolbox.register("evaluate", self.evaluate_individual)
        self.toolbox.register("mate", tools.cxBlend, alpha=0.3)
        self.toolbox.register("mutate", self.mutate_individual, mu=0, sigma=0.1, indpb=0.2)
        self.toolbox.register("select", tools.selTournament, tournsize=3)
        
    def evaluate_individual(self, individual):

        params = np.array(individual)
        for i, (param, (low, high)) in enumerate(zip(params, self.bounds)):
            if param < low or param > high:
                return (1e10,)  # Just in case parameters are out of bounds
        
        _, _, _, _, neg_log_likelihood = ekf_forward(self.data, self.timestamps, self.virtual_timestamps, params)
        
        if np.isnan(neg_log_likelihood) or np.isinf(neg_log_likelihood):
            return (1e10,)
        
        return (neg_log_likelihood,)
    
    def mutate_individual(self, individual, mu, sigma, indpb):
        for i in range(len(individual)):
            if random.random() < indpb:
                individual[i] += random.gauss(mu, sigma * individual[i]) # Add Gaussian noise can modify later here
                low, high = self.bounds[i]
                individual[i] = np.clip(individual[i], low, high)
        return individual,
    
    def run_ga(self, pop_size=50, generations=30, verbose=True):
        # Initialize population
        pop = self.toolbox.population(n=pop_size)
        
        stats = tools.Statistics(lambda ind: ind.fitness.values)
        stats.register("avg", np.mean)
        stats.register("min", np.min)
        stats.register("max", np.max)
        
        # Hall of fame to keep best individuals
        hof = tools.HallOfFame(5)
        
        pop, logbook = algorithms.eaSimple(
            pop, self.toolbox, cxpb=0.7, mutpb=0.3, ngen=generations,
            stats=stats, halloffame=hof, verbose=verbose
        )
        
        return hof, logbook
    
    def fine_tune(self, initial_params_list, method='L-BFGS-B'):
        def objective(params):
            try:
                _, _, _, _, neg_log_likelihood = ekf_forward(self.data, self.timestamps, 
                                                           self.virtual_timestamps, params)
                return neg_log_likelihood
            except Exception:
                return 1e10
        
        best_results = []
        
        for i, initial_params in enumerate(initial_params_list):
            print(f"Refining solution {i+1}/{len(initial_params_list)}")
            print(f"Initial params: {initial_params}")
            
            try:
                result = minimize(
                    objective,
                    initial_params,
                    method=method,
                    bounds=self.bounds,
                    options={'disp': False, 'maxiter': 100}
                )
                
                best_results.append({
                    'initial_params': initial_params,
                    'optimized_params': result.x,
                    'final_likelihood': result.fun,
                    'success': result.success,
                    'scipy_result': result
                })
                
                print(f"Optimized params: {result.x}")
                print(f"Final likelihood: {result.fun}")
                print(f"Success: {result.success}\n")
                
            except Exception as e:
                print(f"Error in scipy refinement: {e}\n")
                best_results.append({
                    'initial_params': initial_params,
                    'optimized_params': initial_params,
                    'final_likelihood': np.inf,
                    'success': False,
                    'error': str(e)
                })
        
        return best_results
    
    def optimize(self, pop_size=50, generations=30, n_refine=3):
        hof, logbook = self.run_ga(pop_size, generations)
        
        print(f"\n=== GA Results ===")
        print("Best individuals from genetic algorithm:")
        for i, ind in enumerate(hof):
            print(f"  {i+1}: {np.array(ind)}, fitness: {ind.fitness.values[0]}")
        
        # Take top n_refine solutions for scipy refinement
        top_solutions = [list(ind) for ind in hof[:n_refine]]
        
        print(f"\n=== Refining top {n_refine} solutions with scipy ===")
        refined_results = self.fine_tune(top_solutions)
        
        # Find best overall result
        best_result = min(refined_results, key=lambda x: x['final_likelihood'])
        
        print(f"\n=== Best Final Result ===")
        print(f"Parameters: {best_result['optimized_params']}")
        print(f"Negative log-likelihood: {best_result['final_likelihood']}")
        print(f"Success: {best_result['success']}")
        
        return best_result, refined_results, hof, logbook

In [39]:
def visualize_optimization_results(optimizer, best_result, title_suffix=""):
    params = best_result['optimized_params']

    # Run EKF and smoother with optimized parameters
    s_filt, P_filt, s_hat, P, nll = ekf_forward(
        optimizer.data, optimizer.timestamps, optimizer.virtual_timestamps, params
    )
    s_smooth, _ = smoother(s_filt, P_filt, s_hat, P, 
                           optimizer.timestamps, optimizer.virtual_timestamps)

    # Extract left/right sensor data
    left = np.array([entry['left'] for entry in optimizer.data if 'left' in entry])
    right = np.array([entry['right'] for entry in optimizer.data if 'right' in entry])

    # Plot
    plt.figure(figsize=(14, 10))
    plt.title(
        f'Hybrid Optimization {title_suffix}\n'
        f'sigma_vx={params[0]:.4f}, sigma_vy={params[1]:.4f}, sigma_ω={params[2]:.4f}, '
        f'sigma_obs={params[3]:.4f}, d={params[4]:.4f}\nNLL: {nll:.2f}'
    )
    if left.size: plt.scatter(left[:, 0], left[:, 1], c='blue', alpha=0.3, s=10, label='Left Sensor')
    if right.size: plt.scatter(right[:, 0], right[:, 1], c='red', alpha=0.3, s=10, label='Right Sensor')

    trajectory = np.array(s_smooth)
    plt.plot(trajectory[:, 0], trajectory[:, 1], 'g-', lw=2, alpha=0.8, label='Smoothed Trajectory')

    plt.xlabel("X Position")
    plt.ylabel("Y Position")
    plt.legend()
    plt.grid(alpha=0.3)
    plt.show()

    return s_smooth, _


In [None]:
def run_hybrid_optimization_with_your_data():
    max_data_points = 3000
    data_subset = real_data[:max_data_points]
    timestamps = [entry['timestamp'] for entry in data_subset]
    virtual_timestamps = np.arange(min(timestamps), max(timestamps), DT_VIRT).tolist()

    optimizer = HybridOptimizer(data_subset, timestamps, virtual_timestamps)

    best_result, refined_results, hof, logbook = optimizer.optimize(
        pop_size=40,
        generations=25,
        n_refine=3        # refine top 3 solutions with SciPy
    )

    s_smooth, P_smooth = visualize_optimization_results(optimizer, best_result, title_suffix=f"(Date: {date})")

    results_data = []

    # GA results
    for i, ind in enumerate(hof):
        results_data.append({
            'method': 'genetic_algorithm',
            'rank': i + 1,
            'sigma_vx': ind[0],
            'sigma_vy': ind[1],
            'sigma_omega': ind[2],
            'sigma_obs': ind[3],
            'd': ind[4],
            'negative_log_likelihood': ind.fitness.values[0],
            'stage': 'GA_only'
        })

    # GA + SciPy refined results
    for i, result in enumerate(refined_results):
        results_data.append({
            'method': 'hybrid_ga_scipy',
            'rank': i + 1,
            'sigma_vx': result['optimized_params'][0],
            'sigma_vy': result['optimized_params'][1],
            'sigma_omega': result['optimized_params'][2],
            'sigma_obs': result['optimized_params'][3],
            'd': result['optimized_params'][4],
            'negative_log_likelihood': result['final_likelihood'],
            'stage': 'GA_then_scipy',
            'scipy_success': result['success']
        })

    # results_df = pd.DataFrame(results_data)
    return best_result, refined_results, hof, logbook, optimizer

if __name__ == "__main__":
    best_result, refined_results, hof, logbook, optimizer = run_hybrid_optimization_with_your_data()
    
    visualize_optimization_results(optimizer, best_result, title_suffix="(Best Result)")
    

gen	nevals	avg         	min    	max         
0  	40    	1.73541e+180	13078.9	6.94164e+181
1  	36    	1.00002e+09 	11712.9	1e+10       
2  	29    	7.50014e+08 	11712.9	1e+10       
3  	32    	1.00001e+09 	10407.6	1e+10       
4  	30    	12443.9     	10407.6	14909.4     
5  	35    	11573.3     	10052.1	12927.6     
