### nearest neighbour

In [None]:
sns.boxplot(x='unique_id_count',y='normalized_nnd',data = nndID, palette='Set1')
# Overlay strip plot for data points with the same palette
sns.stripplot(x="unique_id_count", y='normalized_nnd', data=nndID, palette='Set1', marker="o", alpha=0.6, jitter=True)
plt.savefig(figures_path + 'nndcountboxplot.png')

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

# Function to calculate averages for multiple metrics with filtering
def calculate_segment_averages(data, metrics, min_count=1):
    averages = []
    for trial, trial_data in data.groupby('Trial'):
        for id_, id_data in trial_data.groupby('ID'):
            for segment, segment_data in id_data.groupby('segment'):  # Group by 'segment'
                if len(segment_data) >= min_count:  # Filter by minimum count
                    avg_values = {metric: segment_data[metric].mean() for metric in metrics}
                    avg_values['Trial'] = trial  # Add Trial information
                    avg_values['Carangid'] = segment_data['carangids'].iloc[0]  # Add carangid info
                    averages.append(avg_values)  # Append average values for all metrics
    return pd.DataFrame(averages)

# Metrics to plot
metrics = ['normalized_nnd']

# Calculate the averages for the metrics
avg_df = calculate_segment_averages(subset, metrics)

# Ensure 'Carangid' is treated as a categorical variable
avg_df['Carangid'] = avg_df['Carangid'].astype('category')

# Define color palette for carangid (0: red, 1: blue) with string keys to match the data
palette="Set1"

# Create plots for each metric
for metric in metrics:
    plt.figure(figsize=(12, 8))

    # Box plot for the current metric, with x as 'Carangid' and the specified color palette
    sns.boxplot(x="Carangid", y=metric, data=avg_df, palette=palette)

    # Overlay strip plot for data points with the same color palette
    sns.stripplot(
        x="Carangid", y=metric, data=avg_df, palette=palette, marker="o", alpha=0.6, jitter=True
    )

    # Add legend using Carangid values (0: red, 1: blue)
    handles = [
        plt.Line2D([0], [0], marker='o', color='red', linestyle='', markersize=10, label='Carangid 0'),
        plt.Line2D([0], [0], marker='o', color='blue', linestyle='', markersize=10, label='Carangid 1')
    ]
    plt.legend(handles=handles, title='Carangid', bbox_to_anchor=(1.05, 1), loc='upper left')

    # Title and labels
    plt.title(f'{metric.replace("_", " ").title()} Distribution by Carangid with Data Points', fontsize=16)
    plt.xlabel("Carangid", fontsize=14)
    plt.ylabel(metric.replace("_", " ").title(), fontsize=14)

    # Adjust layout for better readability
    plt.tight_layout()
    plt.savefig(figures_path + 'nndcarangidsboxplot.png')
    plt.show()


### speed

In [None]:
sns.boxplot(x='unique_id_count',y='standardized_speed',data = speedID, palette='Set1')
sns.stripplot(x="unique_id_count", y='standardized_speed', data=speedID, palette='Set1', marker="o", alpha=0.6, jitter=True)
plt.savefig(figures_path + 'speedcountboxplot.png')

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

# Function to calculate averages for multiple metrics with filtering
def calculate_segment_averages(data, metrics, min_count=100):
    averages = []
    for trial, trial_data in data.groupby('Trial'):
        for id_, id_data in trial_data.groupby('ID'):
            for segment, segment_data in id_data.groupby('segment'):  # Group by 'segment'
                if len(segment_data) >= min_count:  # Filter by minimum count
                    avg_values = {metric: segment_data[metric].mean() for metric in metrics}
                    avg_values['Trial'] = trial  # Add Trial information
                    avg_values['Carangid'] = segment_data['carangids'].iloc[0]  # Add carangid info
                    averages.append(avg_values)  # Append average values for all metrics
    return pd.DataFrame(averages)

# Metrics to plot
metrics = ['standardized_speed']

# Calculate the averages for the metrics
avg_df = calculate_segment_averages(df_speed, metrics)

# Ensure 'Carangid' is treated as a categorical variable
avg_df['Carangid'] = avg_df['Carangid'].astype('category')

# Define color palette for carangid (0: red, 1: blue) with string keys to match the data
palette="Set1"

# Create plots for each metric
for metric in metrics:
    plt.figure(figsize=(12, 8))

    # Box plot for the current metric, with x as 'Carangid' and the specified color palette
    sns.boxplot(x="Carangid", y=metric, data=avg_df, palette=palette)

    # Overlay strip plot for data points with the same color palette
    sns.stripplot(
        x="Carangid", y=metric, data=avg_df, palette=palette, marker="o", alpha=0.6, jitter=True
    )

    # Add legend using Carangid values (0: red, 1: blue)
    handles = [
        plt.Line2D([0], [0], marker='o', color='red', linestyle='', markersize=10, label='Carangid 0'),
        plt.Line2D([0], [0], marker='o', color='blue', linestyle='', markersize=10, label='Carangid 1')
    ]
    plt.legend(handles=handles, title='Carangid', bbox_to_anchor=(1.05, 1), loc='upper left')

    # Title and labels
    plt.title(f'{metric.replace("_", " ").title()} Distribution by Carangid with Data Points', fontsize=16)
    plt.xlabel("Carangid", fontsize=14)
    plt.ylabel(metric.replace("_", " ").title(), fontsize=14)

    # Adjust layout for better readability
    plt.tight_layout()
    plt.savefig(figures_path + 'speedcarangidsboxplot.png')
    plt.show()


### Kinematics

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

# Function to calculate averages for multiple metrics with filtering
def calculate_segment_averages(data, metrics, min_count=100):
    averages = []
    for trial, trial_data in data.groupby('Trial'):
        for id_, id_data in trial_data.groupby('ID'):
            for segment, segment_data in id_data.groupby('segment'):  # Group by 'segment'
                if len(segment_data) >= min_count:  # Filter by minimum count
                    avg_values = {metric: segment_data[metric].mean() for metric in metrics}
                    avg_values['Trial'] = trial  # Add Trial information
                    avg_values['Carangid'] = segment_data['carangids'].iloc[0]  # Add carangid info
                    averages.append(avg_values)  # Append average values for all metrics
    return pd.DataFrame(averages)

# Metrics to plot
metrics = ['tailbeat_frequency', 'standardized_amplitude', 'strouhal_number']

# Calculate the averages for the metrics
avg_df = calculate_segment_averages(data2, metrics)

# Ensure 'Carangid' is treated as a categorical variable
avg_df['Carangid'] = avg_df['Carangid'].astype('category')

# Define color palette for carangid (0: red, 1: blue) with string keys to match the data
palette="Set1"

# Create plots for each metric
for metric in metrics:
    plt.figure(figsize=(12, 8))

    # Box plot for the current metric, with x as 'Carangid' and the specified color palette
    sns.boxplot(x="Carangid", y=metric, data=avg_df, palette=palette)

    # Overlay strip plot for data points with the same color palette
    sns.stripplot(
        x="Carangid", y=metric, data=avg_df, palette=palette, marker="o", alpha=0.6, jitter=True
    )

    # Add legend using Carangid values (0: red, 1: blue)
    handles = [
        plt.Line2D([0], [0], marker='o', color='red', linestyle='', markersize=10, label='Carangid 0'),
        plt.Line2D([0], [0], marker='o', color='blue', linestyle='', markersize=10, label='Carangid 1')
    ]
    plt.legend(handles=handles, title='Carangid', bbox_to_anchor=(1.05, 1), loc='upper left')

    # Title and labels
    plt.title(f'{metric.replace("_", " ").title()} Distribution by Carangid with Data Points', fontsize=16)
    plt.xlabel("Carangid", fontsize=14)
    plt.ylabel(metric.replace("_", " ").title(), fontsize=14)

    # Adjust layout for better readability
    plt.tight_layout()

    # Save each plot with a unique filename
    filename = f"{figures_path}carangidsboxplot{metric}.png"
    plt.savefig(filename)

    # Show the plot
    plt.show()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

# Function to calculate averages for multiple metrics with filtering
def calculate_segment_averages(data, metrics, min_count=100):
    averages = []
    for trial, trial_data in data.groupby('Trial'):
        for id_, id_data in trial_data.groupby('ID'):
            for segment, segment_data in id_data.groupby('segment'):  # Group by 'segment'
                if len(segment_data) >= min_count:  # Filter by minimum count
                    avg_values = {metric: segment_data[metric].mean() for metric in metrics}
                    avg_values['Trial'] = trial  # Add Trial information
                    avg_values['Carangid'] = segment_data['carangids'].iloc[0]  # Add carangid info
                    averages.append(avg_values)  # Append average values for all metrics
    return pd.DataFrame(averages)

# Metrics to plot
metrics = ['heading','angular_velocity']

# Calculate the averages for the metrics
avg_df = calculate_segment_averages(data, metrics)

# Ensure 'Carangid' is treated as a categorical variable
avg_df['Carangid'] = avg_df['Carangid'].astype('category')

# Define color palette for carangid (0: red, 1: blue) with string keys to match the data
palette="Set1"

# Create plots for each metric
for metric in metrics:
    plt.figure(figsize=(12, 8))

    # Box plot for the current metric, with x as 'Carangid' and the specified color palette
    sns.boxplot(x="Carangid", y=metric, data=avg_df, palette=palette)

    # Overlay strip plot for data points with the same color palette
    sns.stripplot(
        x="Carangid", y=metric, data=avg_df, palette=palette, marker="o", alpha=0.6, jitter=True
    )

    # Add legend using Carangid values (0: red, 1: blue)
    handles = [
        plt.Line2D([0], [0], marker='o', color='red', linestyle='', markersize=10, label='Carangid 0'),
        plt.Line2D([0], [0], marker='o', color='blue', linestyle='', markersize=10, label='Carangid 1')
    ]
    plt.legend(handles=handles, title='Carangid', bbox_to_anchor=(1.05, 1), loc='upper left')

    # Title and labels
    plt.title(f'{metric.replace("_", " ").title()} Distribution by Carangid with Data Points', fontsize=16)
    plt.xlabel("Carangid", fontsize=14)
    plt.ylabel(metric.replace("_", " ").title(), fontsize=14)

    # Adjust layout for better readability
    plt.tight_layout()
    plt.show()

In [None]:
sns.boxplot(x='unique_id_count',y='tailbeat_frequency',data = tailbeatID, palette="Set1")
sns.stripplot(x="unique_id_count", y='tailbeat_frequency', data=tailbeatID, palette='Set1', marker="o", alpha=0.6, jitter=True)
plt.savefig(figures_path + 'tailbeatcountboxplot.png')
sns.boxplot(x='unique_id_count',y='standardized_amplitude',data = amplitudeID, palette="Set1")
sns.stripplot(x="unique_id_count", y='standardized_amplitude', data=amplitudeID, palette='Set1', marker="o", alpha=0.6, jitter=True)
plt.savefig(figures_path + 'amplitudecountboxplot.png')
sns.boxplot(x='unique_id_count',y='strouhal_number',data = strouhalID, palette="Set1")
sns.stripplot(x="unique_id_count", y='strouhal_number', data=strouhalID, palette='Set1', marker="o", alpha=0.6, jitter=True)
plt.savefig(figures_path + 'strouhalcountboxplot.png')