In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('./Temp-Dataset'):
    # for filename in filenames:
    #     print(os.path.join(dirname, filename))
    print(os.path.join(dirname))

# You can write up to 20GB to the current directory (./) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from skimage.color import rgb2gray
import pywt
import os
import pandas as pd
from skimage.io import imread

# Define folder path
folder_path = "./Temp-Dataset/TRAIN/"

# Initialize lists for storing feature data
fourier_features = []
wavelet_features_list = []
counter = 0
lst = os.listdir(folder_path)

# Iterate through all directories (persons) in the folder
for person in lst:
    person_path = os.path.join(folder_path, person)

    # Ensure the path is a directory before proceeding
    if not os.path.isdir(person_path):
        continue  # Skip files that are not directories

    for filename in os.listdir(person_path):
        if filename.endswith(".jpg"):  # Process only image files
            img_path = os.path.join(person_path, filename)
            img = imread(img_path)
            img = rgb2gray(img)
            img = img.astype(np.float32)

            # -------------------------------------------------------------
            # Compute the 2D Fourier Transform (FFT)
            # -------------------------------------------------------------
            f_transform = np.fft.fft2(img)
            f_shift = np.fft.fftshift(f_transform)  # Shift zero frequency to center
            magnitude_spectrum = np.log(np.abs(f_shift) + 1)  # Log-scale for better visualization

            # Extract FFT Features
            fft_mean = np.mean(magnitude_spectrum)
            fft_variance = np.var(magnitude_spectrum)
            fft_energy = np.sum(np.abs(magnitude_spectrum) ** 2)

            fourier_features.append({
                'Person': person,
                "Image": filename,
                "FFT Mean": fft_mean,
                "FFT Variance": fft_variance,
                "FFT Energy": fft_energy
            })

            # -------------------------------------------------------------
            # Compute a 2D Wavelet Transform (Haar wavelet)
            # -------------------------------------------------------------
            coeffs2 = pywt.dwt2(img, 'haar')
            cA, (cH, cV, cD) = coeffs2

            wavelet_features_list.append({
                'Person': person,
                "Image": filename,
                "cA Mean": np.mean(cA), "cA Variance": np.var(cA), "cA Energy": np.sum(cA ** 2),
                "cH Mean": np.mean(cH), "cH Variance": np.var(cH), "cH Energy": np.sum(cH ** 2),
                "cV Mean": np.mean(cV), "cV Variance": np.var(cV), "cV Energy": np.sum(cV ** 2),
                "cD Mean": np.mean(cD), "cD Variance": np.var(cD), "cD Energy": np.sum(cD ** 2)
            })
    
    print(f'{person} has been extracted')
    counter += 1
    print(f'{counter},/ {len(lst)} completed.')

# Convert lists to DataFrames
fourier_df = pd.DataFrame(fourier_features)
wavelet_df = pd.DataFrame(wavelet_features_list)

# Save to Excel
with pd.ExcelWriter("features_fourier_train.xlsx") as writer:
    fourier_df.to_excel(writer, sheet_name="Fourier Features train", index=False)
with pd.ExcelWriter("features_wavelet_train.xlsx") as writer:
    wavelet_df.to_excel(writer, sheet_name="Wavelet Features train", index=False)
print("Feature extraction completed. Data saved to features_fourier_train.xlsx and features_wavelet_train.xlsx")


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from skimage.color import rgb2gray
import pywt
import os
import pandas as pd
from skimage.io import imread

# Define folder path
folder_path = "./Temp-Dataset/TEST/"

# Initialize lists for storing feature data
fourier_features = []
wavelet_features_list = []
counter = 0
lst = os.listdir(folder_path)

# Iterate through all directories (persons) in the folder
for person in lst:
    person_path = os.path.join(folder_path, person)

    # Ensure the path is a directory before proceeding
    if not os.path.isdir(person_path):
        continue  # Skip files that are not directories

    for filename in os.listdir(person_path):
        if filename.endswith(".jpg"):  # Process only image files
            img_path = os.path.join(person_path, filename)
            img = imread(img_path)
            img = rgb2gray(img)
            img = img.astype(np.float32)

            # -------------------------------------------------------------
            # Compute the 2D Fourier Transform (FFT)
            # -------------------------------------------------------------
            f_transform = np.fft.fft2(img)
            f_shift = np.fft.fftshift(f_transform)  # Shift zero frequency to center
            magnitude_spectrum = np.log(np.abs(f_shift) + 1)  # Log-scale for better visualization

            # Extract FFT Features
            fft_mean = np.mean(magnitude_spectrum)
            fft_variance = np.var(magnitude_spectrum)
            fft_energy = np.sum(np.abs(magnitude_spectrum) ** 2)

            fourier_features.append({
                'Person': person,
                "Image": filename,
                "FFT Mean": fft_mean,
                "FFT Variance": fft_variance,
                "FFT Energy": fft_energy
            })

            # -------------------------------------------------------------
            # Compute a 2D Wavelet Transform (Haar wavelet)
            # -------------------------------------------------------------
            coeffs2 = pywt.dwt2(img, 'haar')
            cA, (cH, cV, cD) = coeffs2

            wavelet_features_list.append({
                'Person': person,
                "Image": filename,
                "cA Mean": np.mean(cA), "cA Variance": np.var(cA), "cA Energy": np.sum(cA ** 2),
                "cH Mean": np.mean(cH), "cH Variance": np.var(cH), "cH Energy": np.sum(cH ** 2),
                "cV Mean": np.mean(cV), "cV Variance": np.var(cV), "cV Energy": np.sum(cV ** 2),
                "cD Mean": np.mean(cD), "cD Variance": np.var(cD), "cD Energy": np.sum(cD ** 2)
            })
    
    print(f'{person} has been extracted')
    counter += 1
    print(f'{counter},/ {len(lst)} completed.')

# Convert lists to DataFrames
fourier_df = pd.DataFrame(fourier_features)
wavelet_df = pd.DataFrame(wavelet_features_list)

# Save to Excel
with pd.ExcelWriter("features_fourier_test.xlsx") as writer:
    fourier_df.to_excel(writer, sheet_name="Fourier Features test", index=False)
with pd.ExcelWriter("features_wavelet_test.xlsx") as writer:
    wavelet_df.to_excel(writer, sheet_name="Wavelet Features test", index=False)

print("Feature extraction completed. Data saved to features_fourier_test.xlsx and features_wavelet_test.xlsx")


In [None]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# Load the Excel file (Update the file path as needed)
file_path = "./features_wavelet_train.xlsx"
df = pd.read_excel(file_path)

# Extract Image ID

image_ids = df['Image']  # Assuming 'Image' column contains the image IDs

# Extract relevant columns for PCA
mean_features = df[['cA Mean', 'cH Mean', 'cV Mean', 'cD Mean']]
variance_features = df[['cA Variance', 'cH Variance', 'cV Variance', 'cD Variance']]
energy_features = df[['cA Energy', 'cH Energy', 'cV Energy', 'cD Energy']]

# Function to apply PCA
def apply_pca(data, n_components=2):
    scaler = StandardScaler()
    data_scaled = scaler.fit_transform(data)
    
    pca = PCA(n_components=n_components)
    transformed_data = pca.fit_transform(data_scaled)
    
    explained_variance = np.sum(pca.explained_variance_ratio_)
    return transformed_data, explained_variance

# Apply PCA by feature type
pca_mean, var_mean = apply_pca(mean_features)
pca_variance, var_variance = apply_pca(variance_features)
pca_energy, var_energy = apply_pca(energy_features)

# Store explained variance results
print(f"Explained Variance:")
print(f"Mean Features: {var_mean * 100:.2f}%")
print(f"Variance Features: {var_variance * 100:.2f}%")
print(f"Energy Features: {var_energy * 100:.2f}%")

# Create a DataFrame to store PCA results with Image ID
pca_df = pd.DataFrame({
    'Image': image_ids,  # Add Image ID column
    'Person': df['Person'],  # Add Person column
    'PCA_Mean_1': pca_mean[:, 0],
    'PCA_Mean_2': pca_mean[:, 1],
    'PCA_Variance_1': pca_variance[:, 0],
    'PCA_Variance_2': pca_variance[:, 1],
    'PCA_Energy_1': pca_energy[:, 0],
    'PCA_Energy_2': pca_energy[:, 1],
})

# Define output file path
output_file_path = "pca_wavelet_features_train.xlsx"

# Save to Excel
pca_df.to_excel(output_file_path, index=False)

print(f"PCA results saved to {output_file_path}")


In [None]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# Load the Excel file (Update the file path as needed)
file_path = "./features_wavelet_test.xlsx"
df = pd.read_excel(file_path)

# Extract Image ID
image_ids = df['Image']  # Assuming 'Image' column contains the image IDs

# Extract relevant columns for PCA
mean_features = df[['cA Mean', 'cH Mean', 'cV Mean', 'cD Mean']]
variance_features = df[['cA Variance', 'cH Variance', 'cV Variance', 'cD Variance']]
energy_features = df[['cA Energy', 'cH Energy', 'cV Energy', 'cD Energy']]

# Function to apply PCA
def apply_pca(data, n_components=2):
    scaler = StandardScaler()
    data_scaled = scaler.fit_transform(data)
    
    pca = PCA(n_components=n_components)
    transformed_data = pca.fit_transform(data_scaled)
    
    explained_variance = np.sum(pca.explained_variance_ratio_)
    return transformed_data, explained_variance

# Apply PCA by feature type
pca_mean, var_mean = apply_pca(mean_features)
pca_variance, var_variance = apply_pca(variance_features)
pca_energy, var_energy = apply_pca(energy_features)

# Store explained variance results
print(f"Explained Variance:")
print(f"Mean Features: {var_mean * 100:.2f}%")
print(f"Variance Features: {var_variance * 100:.2f}%")
print(f"Energy Features: {var_energy * 100:.2f}%")

# Create a DataFrame to store PCA results with Image ID
pca_df = pd.DataFrame({
    'Image': image_ids,  # Add Image ID column
    'Person': df['Person'],  # Add Person column
    'PCA_Mean_1': pca_mean[:, 0],
    'PCA_Mean_2': pca_mean[:, 1],
    'PCA_Variance_1': pca_variance[:, 0],
    'PCA_Variance_2': pca_variance[:, 1],
    'PCA_Energy_1': pca_energy[:, 0],
    'PCA_Energy_2': pca_energy[:, 1],
})

# Define output file path
output_file_path = "pca_wavelet_features_test.xlsx"

# Save to Excel
pca_df.to_excel(output_file_path, index=False)

print(f"PCA results saved to {output_file_path}")


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import torch
import torch.nn.functional as F

# --- Step 1: Load Excel Data ---
# Load the dataset from the Excel file
file_path = "./pca_wavelet_features_train.xlsx"  # Replace with your actual file path
data_df = pd.read_excel(file_path)

# Extract numeric columns and their names
data = data_df.select_dtypes(include=["number"])  # Select only numeric columns
feature_names = data.columns  # Update feature names to match numeric columns

data = data.fillna(0)

# Convert to a NumPy array
data = data.values

# --- Step 2: Compute Feature Similarity ---
# Convert data to a PyTorch tensor
data_tensor = torch.tensor(data, dtype=torch.float32)

# Normalize the data along the feature axis
data_tensor_n = F.normalize(data_tensor, p=2, dim=0)  # Normalize columns (features)

# Compute a similarity matrix (dot product of normalized features)
similarity_matrix = data_tensor_n.T @ data_tensor_n  # Shape: (num_features, num_features)

# --- Step 3: Rank Features ---
# For each feature, rank other features based on similarity
ranked_features = {}
for i, feature in enumerate(feature_names):
    # Get similarity scores for the current feature
    scores = similarity_matrix[i].numpy()
    
    # Rank features by descending similarity
    sorted_indices = scores.argsort()[::-1]  # Descending order
    ranked_features[feature] = [(feature_names[j], scores[j]) for j in sorted_indices]

# --- Step 4: Save Ranked Features ---
# Prepare data for saving
output_data = []
for feature, rankings in ranked_features.items():
    for rank, (other_feature, score) in enumerate(rankings, start=1):
        output_data.append([feature, rank, other_feature, score])

# Create a DataFrame for the ranked features
output_df = pd.DataFrame(output_data, columns=["Feature", "Rank", "Ranked Feature", "Score"])

# Save the ranked features to a new Excel file
output_file = "ranked_features_pca_wavelet.xlsx"
output_df.to_excel(output_file, index=False)

print(f"Feature ranking completed. Results saved to '{output_file}'.")


# Pivot the similarity scores into a matrix for heatmap
heatmap_data = output_df.pivot(index="Feature", columns="Ranked Feature", values="Score")

# Set up the figure size
plt.figure(figsize=(12, 10))

# Create the heatmap
sns.heatmap(heatmap_data, cmap="coolwarm", center=0, square=True, linewidths=0.5)

# Title and display
plt.title("Feature Similarity Heatmap (Cosine Similarity)")
plt.xticks(rotation=90)
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()
