In [None]:
import scipy.io
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
import seaborn as sns
import pandas as pd
import scipy.stats
import numpy as np

# Flatten the arrays for plotting
subset_sizes_spa_flat = data['subsetSizesSpa'].flatten()
subset_sizes_cue_flat = data['subsetSizesCue'].flatten()

# Create a DataFrame for plotting
df = pd.DataFrame({
    'Subset Size': np.concatenate((subset_sizes_spa_flat, subset_sizes_cue_flat)),
    'Task': ['Spatial']*len(subset_sizes_spa_flat) + ['Cue']*len(subset_sizes_cue_flat)
})

# Create a figure and axis for plotting
plt.figure(figsize=(3, 5))
ax = plt.gca()

# Plot swarmplot with hue set for color mapping
# swarm_plot = sns.swarmplot(x='Task', y='Subset Size', data=df, hue='Task', palette=['blue', 'green'], size=10, ax=ax)
swarm_plot = sns.swarmplot(x='Task', y='Subset Size', data=df, color='black', size=10, ax=ax)
# swarm_plot.legend_.remove()  # Remove the automatic legend

# Draw lines to connect corresponding points
for i in range(len(subset_sizes_spa_flat)):
    ax.plot([0, 1], [subset_sizes_spa_flat[i], subset_sizes_cue_flat[i]], 'gray')

# Set axis properties and labels
ax.set_ylabel('Proportion of Neurons per Ensemble in Predictive Subset')
ax.set_xlim(-0.5, 1.5)  # Set limits for better spacing
ax.set_ylim(-0.02, 0.5)
ax.yaxis.set_major_locator(MultipleLocator(0.05))  # Set major ticks every 0.05

# Remove top and right borders
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

# Create custom handles for the legend
handles = [plt.Line2D([0], [0], marker='o', color='w', markerfacecolor='blue', markersize=10),
           plt.Line2D([0], [0], marker='o', color='w', markerfacecolor='green', markersize=10),
           plt.Line2D([0], [0], linestyle='-', color='gray')]
labels = ['Spatial', 'Cue', 'Correspondence']

# Add legend outside the plot
# legend = ax.legend(handles, labels, loc='center left', bbox_to_anchor=(1, 0.5))

# Customize layout
plt.tight_layout()

# Fig 1B
plt.show()

# Use the overlap data for plotting
overlap_values = data['matchRateSpaCue'].flatten()

# Create a DataFrame for seaborn plotting for overlap
df_overlap = pd.DataFrame({
    'Overlap': overlap_values,
    'Ensemble': ['Ensemble'] * len(overlap_values)
})

# Plotting the swarmplot for overlap
plt.figure(figsize=(2, 5))
ax = sns.swarmplot(x='Ensemble', y='Overlap', data=df_overlap, size=10, color='black')
ax.set_xlabel('')
ax.set_ylabel('Proportion of Spatial and Cue Predictive Subset Overlap')
ax.set_ylim(-0.02, 0.50)  # Extend the lower limit slightly below 0
ax.yaxis.set_major_locator(MultipleLocator(0.05))

# Remove top and right borders
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

# Customize layout
plt.tight_layout()

# Fig 1C
plt.show()