# Heatmap

### Normalized Density:
It shows the density of fish positions in a specific area of the channel and to identify areas of high or low usage by fish and fish prefer to swim or avoid.

It also correlates fish movement with flow turbulence features and evaluate how fish interact with obstacles, structures, or turbines in the channel. 

The numbers represent a fraction or probability density.
For example a value of 0.1 means 10% of the total fish positions are in that bin.
Normalization is often done by dividing the count in each bin by the total count of data points.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# List of files to process
file_names = ['MO1', 'HO1', 'MS', 'HS']  # Add/remove as needed
x_min, x_max = 0, 120
y_min, y_max = 0, 30

# Configure bins (4 horizontal, 3 vertical)
x_bins = np.linspace(x_min, x_max, 5)
y_bins = np.linspace(y_min, y_max, 4)

for file_name in file_names:
    # Load data
    file_path = f'C:\\Users\\hseyyedzadeh\\Documents\\Heatmap\\{file_name}.xlsx'
    data = pd.read_excel(file_path)
    x = 120 - data['X0']
    y = 30 - data['Y0']

    # Create histogram
    heatmap, x_edges, y_edges = np.histogram2d(x, y, bins=[x_bins, y_bins])
    heatmap_normalized = heatmap / np.sum(heatmap)
    
    # Verify normalization
    print(f'{file_name} - Total density: {np.sum(heatmap_normalized):.4f}')

    # Plot setup
    plt.rcParams['font.family'] = 'Times New Roman'
    plt.rcParams['font.size'] = 22
    plt.figure(figsize=(20, 5))
    
    im = plt.imshow(
        heatmap_normalized.T,
        extent=[x_min, x_max, y_min, y_max],
        origin='lower',
        cmap='Reds',
        aspect='auto'
    )

    # Annotate bins
    for i in range(heatmap_normalized.shape[0]):
        for j in range(heatmap_normalized.shape[1]):
            value = heatmap_normalized[i, j]
            x_center = (x_edges[i] + x_edges[i+1])/2
            y_center = (y_edges[j] + y_edges[j+1])/2
            plt.text(
                x_center, y_center,
                f'{value:.2f}',
                color='black',
                ha='center', va='center',
                fontsize=14
            )

    # Labels and formatting
    plt.xticks(x_bins, [f'{edge:.0f}' for edge in x_bins])
    plt.yticks(y_bins, [f'{edge:.0f}' for edge in y_bins])
    plt.xlabel('Channel length (cm)')
    plt.ylabel('Channel width (cm)')
    plt.title(f'Normalized Heatmap: {file_name}')
    
    # Colorbar
    cbar = plt.colorbar(im, label='Fraction of Fish Positions')
    cbar_ticks = np.linspace(0, heatmap_normalized.max(), 5)
    cbar.set_ticks(cbar_ticks)
    cbar.set_ticklabels([f'{tick:.2f}' for tick in cbar_ticks])

    # Save and close
    plt.savefig(f'Final_Annotated_Normalized_Heatmap-{file_name}.png', dpi=300, bbox_inches='tight')
    #plt.close()  # Prevent figure overlap

print("All heatmaps generated successfully!")

This script devides the domain into a grid of bins (like tiny cells). Then counts how many data points fall into each bin, which represents the density of data points. counts is a 2D matrix where each element represents the number of data points that fall into a specific bin (a small area defined by edgesX and edgesY).
For example, if counts(5, 10) = 50, it means there were 50 fish movements in the bin corresponding to the 5th row and 10th column of the grid.

Data density can vary widely—some regions may have very high counts, while others are nearly empty. Logarithmic scaling compresses the range, making low-density regions visible without overwhelming the plot with high-density areas. Then the code does normalization where maps the  logarithmic values to a 0 to 1 scale for consistent color representation and ensures that color intensity reflects the actual data distribution accurately.

So, we can say the difference between the following code and the previous code is it uses logarithmic scaling and more number of bins.

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm

# Folder containing the .xlsx files
folder_path = r'C:\Users\hseyyedzadeh\Documents\Heatmap'  # Update with your folder path

# Set the font to Times New Roman
plt.rcParams['font.family'] = 'Times New Roman'
plt.rcParams['font.size'] = 12  # Set global font size

# Thresholds for color scaling
min_threshold = 0
max_threshold = 300

# Get list of .xlsx files in folder
file_list = [f for f in os.listdir(folder_path) if f.endswith('.xlsx')]

# Loop through each file to generate a separate heatmap
for file_name in file_list:
    # Read Excel file
    file_path = os.path.join(folder_path, file_name)
    data = pd.read_excel(file_path)

    # Extract X0 and Y0 columns
    if 'X0' in data.columns and 'Y0' in data.columns:
        X = data['X0'].values
        Y = data['Y0'].values
    else:
        print(f"File {file_name} does not contain required columns (X0, Y0)")
        continue

    # Remove values above 125 before scaling
    valid_idx = X <= 125
    X = X[valid_idx]
    Y = Y[valid_idx]

    # Scale X and Y values
    max_width = 30
    max_length = 120
    scale_factor_x = max_length / np.max(X)
    scale_factor_y = max_width / np.max(Y)
    X = X * scale_factor_x
    Y = Y * scale_factor_y

    # Define bin edges
    bins_x = np.linspace(0, max_length, 401)
    bins_y = np.linspace(0, max_width, 101)

    # Calculate 2D histogram (density)
    counts, xedges, yedges = np.histogram2d(X, Y, bins=[bins_x, bins_y])

    # Apply logarithmic scaling to density
    log_counts = np.log10(counts + 1)  # Add 1 to avoid log10(0)

    # Normalize the log-scaled density values
    log_min_threshold = np.log10(min_threshold + 1)
    log_max_threshold = np.log10(max_threshold + 1)
    normalized_density = (log_counts - log_min_threshold) / (log_max_threshold - log_min_threshold)
    normalized_density[normalized_density < 0] = 0
    normalized_density[normalized_density > 1] = 1

    # Generate bin centers for plotting
    bin_centers_x = (xedges[:-1] + xedges[1:]) / 2
    bin_centers_y = (yedges[:-1] + yedges[1:]) / 2

    # Plot heatmap for the current file
    plt.figure(figsize=(12, 3))
    plt.imshow(normalized_density.T, extent=[0, max_length, 0, max_width], origin='lower', aspect='auto', cmap='viridis')
    plt.colorbar(label='Density', ticks=[0, 1], format='%.1f')
    plt.title(f'Density Scatter Plot for {file_name} (Logarithmic Scaling)')
    plt.xlabel('$z (cm)$')
    plt.ylabel('$x (cm)$')

    # Save the heatmap as an image
    output_file_name = os.path.splitext(file_name)[0] + '_density_scatter_log_scaled.png'
    output_file_path = os.path.join(folder_path, output_file_name)
    plt.savefig(output_file_path, bbox_inches='tight')
    #plt.close()  # Close the figure to free memory

The following code generates a scatter plot with density-based coloring, which is a different approach compared to the 2D histogram heatmap in the previous script.
Density is estimated using kernel density estimation (KDE) (scipy.stats.gaussian_kde), which smooths the data and provides a continuous density estimate. The result is a continuous density map, where each point is assigned a density value based on its proximity to other points.

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde

# Folder containing .xlsx files
folder_path = './'  # Replace with your folder path
file_list = [f for f in os.listdir(folder_path) if f.endswith('.xlsx')]

# Set the font to Times New Roman
plt.rcParams['font.family'] = 'Times New Roman'
plt.rcParams['font.size'] = 14  # Set global font size

# Define grid parameters (4 horizontal cells, 3 vertical cells)
x_bins = np.linspace(0, 120, 5)  # 4 cells (0-30, 30-60, 60-90, 90-120)
y_bins = np.linspace(0, 30, 4)   # 3 cells (0-10, 10-20, 20-30)

for file_name in file_list:
    # Load data
    file_path = os.path.join(folder_path, file_name)
    data = pd.read_excel(file_path)
    
    if 'X0' not in data.columns or 'Y0' not in data.columns:
        print(f"Skipping {file_name}: Missing X0/Y0 columns")
        continue

    x = 120 - data['X0'].values # Subtract X0 from max X-axis limit (120) if you wanna flip horizontally
    y = 30 - data['Y0'].values  # Subtract Y0 from max Y-axis limit (30) if you wanna flip vertically

    # Calculate density
    xy = np.vstack([x, y])
    density = gaussian_kde(xy)(xy)
    normalized_density = (density - density.min()) / (density.max() - density.min())

    # Create plot
    plt.figure(figsize=(16, 4))
    scatter = plt.scatter(x, y, c=normalized_density, s=10, 
                         cmap='viridis', edgecolor='none')

    # Add grid lines matching 4x3 cells
    for x_edge in x_bins:
        plt.axvline(x=x_edge, color='black', linestyle='-', linewidth=0.5)
    for y_edge in y_bins:
        plt.axhline(y=y_edge, color='black', linestyle='-', linewidth=0.5)

    # Formatting
    plt.xlim(0, 120)
    plt.ylim(0, 30)
    plt.xticks(x_bins)
    plt.yticks(y_bins)
    plt.xlabel('$z (cm)$')
    plt.ylabel('$y (cm)$')
    plt.title(f'Fish Positions (Normalized Density) - {file_name[:-5]}')

    # Colorbar with fixed scale
    cbar = plt.colorbar(scatter, label='Normalized Density')
    cbar.set_ticks([0, 1])
    cbar.set_ticklabels(['0.0', '1.0'])

    # Save and clean up
    output_path = os.path.join(folder_path, f"{file_name[:-5]}_density_grid.png")
    plt.savefig(output_path, bbox_inches='tight', dpi=300)
    #plt.close()

print("All density plots with grids generated!")