In [5]:
import pandas as pd
import numpy as np
from pykalman import KalmanFilter

In [11]:
# Load Excel file
df = pd.read_excel("Extracted_Colors.xlsx", sheet_name="Sheet5")

# Display total number of entries
num_entries = len(df)
print(f"Total number of entries: {num_entries}")

# Define input and output columns
input_columns = ["a", "V", "b", "S"]
output_column = "pH"  # Assuming pH is the output column

# Original feature ranges
feature_ranges = {
    "a": (-55, 60),
    "V": (40, 80),
    "b": (-100, 80),
    "S": (15, 130)
}

# Determine number of groups (every 10 rows)
num_groups = num_entries // 10  # Integer division to get the number of full groups
print(f"Processing {num_groups} groups of 10 rows each.")

Total number of entries: 410
Processing 41 groups of 10 rows each.


In [12]:
# Scaling functions
def min_max_scale(value, original_range):
    min_val, max_val = original_range
    return 2 * (value - min_val) / (max_val - min_val) - 1  # Scale to (-1,1)

def inverse_min_max_scale(value, original_range):
    min_val, max_val = original_range
    return (value + 1) * (max_val - min_val) / 2 + min_val  # Convert back to original scale

# Function to apply Kalman filtering on each group of 10 rows
def apply_kalman_filter(df_group, feature_ranges):
    df_filtered = df_group.copy()
    
    for col in input_columns:
        values = df_group[col].values.astype(float)
        
        # Scale to (-1,1) for stable Kalman filtering
        scaled_values = np.array([min_max_scale(v, feature_ranges[col]) for v in values])
        
        # Define Kalman filter (feature-specific noise settings)
        kf = KalmanFilter(
            initial_state_mean=scaled_values[0],  
            transition_matrices=[1],  
            observation_matrices=[1],  
            transition_covariance=0.001,  # Small process noise
            observation_covariance=0.01   # Observation noise
        )
        
        # Apply filter
        filtered_state_means, _ = kf.filter(scaled_values)
        
        # Convert back to original scale
        df_filtered[col] = [inverse_min_max_scale(v, feature_ranges[col]) for v in filtered_state_means.flatten()]
    
    return df_filtered

In [13]:
# Create a new DataFrame to store results
smoothed_df = pd.DataFrame()

# Apply Kalman filtering in fixed-size groups of 10 rows
for i in range(num_groups):
    start_idx = i * 10
    end_idx = start_idx + 10
    smoothed_df = pd.concat([smoothed_df, apply_kalman_filter(df.iloc[start_idx:end_idx], feature_ranges)])

# Keep the original pH column unchanged
smoothed_df["pH"] = df["pH"]

# Check the last few rows for correctness
print(smoothed_df.tail())

             a          V          b          S    pH
405  11.299567  63.546965 -45.805016  88.929270  10.0
406  11.936652  63.911979 -46.210074  88.069309  10.0
407  11.168304  62.636678 -45.286134  89.451345  10.0
408  15.534921  63.797076 -48.954465  91.199451  10.0
409  11.538953  64.035471 -45.386316  87.119607  10.0


In [14]:
# Save the filtered data to a new Excel file
smoothed_df.to_excel("filtered_data_kalman_scaled.xlsx", index=False)
print("Filtered dataset saved successfully as 'filtered_data_kalman_scaled.xlsx'!")

Filtered dataset saved successfully as 'filtered_data_kalman_scaled.xlsx'!
