In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Assuming 'comparison_df' is your DataFrame with indicators, R_squared, and Correlation
# Example: Generate random data for demonstration
np.random.seed(0)  # For reproducible outputs
indicators = ['Indicator ' + str(i) for i in range(1, 11)]
r2_values = np.random.rand(10) * 0.5
cor_values = np.random.rand(10) * 0.5 + 0.5  # Ensure some visible gap
comparison_df = pd.DataFrame({'Indicator': indicators, 'R_squared': r2_values, 'Correlation': cor_values})

# Plot
fig, ax = plt.subplots(figsize=(10, 6))

# Scatter plot for R_squared values
ax.scatter(comparison_df['Indicator'], comparison_df['R_squared'], color='tab:blue', s=100, label='R_squared', zorder=5)

# Scatter plot for Correlation coefficients
ax.scatter(comparison_df['Indicator'], comparison_df['Correlation'], color='tab:red', s=100, label='Correlation', zorder=5)

# Connect R_squared and Correlation with lines
for i, row in comparison_df.iterrows():
    ax.plot([row['Indicator'], row['Indicator']], [row['R_squared'], row['Correlation']], color='gray', linestyle='-', linewidth=1, zorder=3)

# Improve layout
ax.set_xticks(comparison_df['Indicator'])
ax.set_xticklabels(comparison_df['Indicator'], rotation=45, ha="right")
ax.set_ylabel('Value')
ax.set_title('Gap Between R_squared and Correlation Coefficients for Each Indicator')
ax.legend()
ax.grid(True, which='major', linestyle='--', linewidth='0.5', color='grey', zorder=0)

plt.tight_layout()
plt.show()


In [None]:
indicators = filtered_top_correlations_refined.drop('PCE')

# Assuming 'r2_values_sorted' and 'top_bottom_cor' are dictionaries or pandas Series
r2_values = pd.Series(r2_values_sorted)
cor_values = indicators #.drop('PCE')  # Exclude PCE as it's a self-correlation

# Merge the two Series into a DataFrame and add groups
comparison_df = pd.DataFrame({'R_squared': r2_values, 'Correlation': cor_values})
comparison_df['Group'] = comparison_df.index.map(groups)

# Create the scatter plot using Plotly
fig = go.Figure()

# Add the scatter plot trace for each group
for group, group_df in comparison_df.groupby('Group'):
    fig.add_trace(
        go.Scatter(
            x=group_df['R_squared'],
            y=group_df['Correlation'],
            mode='markers',
            text=group_df.index,  # This will show up on hover
            marker=dict(
                size=10,
                opacity=0.8,  # Slightly more opaque for better visibility
            ),
            name=group  # This will be the legend entry
        )
    )

# Define the aspect ratio for the figure layout
aspect_ratio = 1.6  # Example aspect ratio, adjust as needed

# Set the layout for the plot
fig.update_layout(
    title='Comparison of R-squared and Correlation Coefficients',
    xaxis_title='R-squared Values',
    yaxis_title='Correlation Coefficients',
    xaxis=dict(showgrid=True, range=[0, 1]),  # Adjust the x-axis range if needed
    yaxis=dict(showgrid=True),
    legend_title="Groups",
    width=800,  # Control the width of the figure
    height=800 / aspect_ratio,  # Control the height based on the aspect ratio
)

# Adjust the legend to not overlap with data
fig.update_layout(legend=dict(
    yanchor="top",
    y=0.99,
    xanchor="right",
    x=0.99
))

# Show the figure
fig.show()