In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from matplotlib_venn import venn2, venn3
import seaborn as sns

# Define parameters
parameters = [
    'LE_CORR', 'H_CORR', 'TA_F', 'VPD_F', 'P_F', 'H_F_MDS', 
    'GPP_DT_VUT_MEAN', 'LE_F_MDS', 'G_F_MDS', 'TS_F_MDS_1', 
    'NETRAD', 'SWC_F_MDS_1'
]

# Directory containing Excel files
input_directory = r'C:\Deepak\stations\MM\Final1\yearly'
output_directory = r'C:\Deepak\stations\MM\Final1\yearly\commonstats_output_final'

# Create output directory if it doesn't exist
os.makedirs(output_directory, exist_ok=True)

# Global storage for significance data across all stations (only selected periods)
all_stations_selected_data = []

# Get list of Excel files - separate yearly and JJA files
yearly_files = [f for f in os.listdir(input_directory) if f.endswith('_yearly.xlsx')]
jja_files = [f for f in os.listdir(input_directory) if f.endswith('_JJA.xlsx')]

# Create a mapping between yearly and JJA files
file_pairs = {}
for y_file in yearly_files:
    base_name = y_file.replace('_yearly.xlsx', '')
    jja_file = base_name + '_JJA.xlsx'
    if jja_file in jja_files:
        file_pairs[base_name] = {'yearly': y_file, 'jja': jja_file}
    else:
        file_pairs[base_name] = {'yearly': y_file, 'jja': None}

# Also include JJA files that don't have yearly counterparts
for j_file in jja_files:
    base_name = j_file.replace('_JJA.xlsx', '')
    if base_name not in file_pairs:
        file_pairs[base_name] = {'yearly': None, 'jja': j_file}

def calculate_score_and_selection(p_values_df, slopes_df, parameters):
    """Calculate scores and determine selected periods based on highest scores"""
    scores_by_period = {}
    
    for period_idx in range(len(p_values_df)):
        period_score = 0
        sig_count = 0
        
        for param in parameters:
            if param not in p_values_df.columns or param not in slopes_df.columns:
                continue
                
            p_val = p_values_df[param].iloc[period_idx]
            slope = slopes_df[param].iloc[period_idx]
            
            if pd.isna(p_val) or pd.isna(slope):
                continue
            
            # Calculate score based on significance
            if p_val < 0.1:
                period_score += 0.9
                sig_count += 1
            elif p_val < 0.2:
                period_score += 0.5
                sig_count += 1
            else:
                period_score += 0.1
        
        scores_by_period[period_idx] = {
            'score': period_score,
            'sig_count': sig_count,
            'period_name': slopes_df['Years'].iloc[period_idx] if 'Years' in slopes_df.columns else f'Period_{period_idx}'
        }
    
    # Select period with highest score (tie-breaker: most significant counts)
    if scores_by_period:
        selected_period = max(scores_by_period.keys(), 
                            key=lambda x: (scores_by_period[x]['score'], 
                                         scores_by_period[x]['sig_count']))
        return scores_by_period, selected_period
    return {}, None

def get_selected_period_data(station_name, p_values_df, slopes_df, parameters, selected_period_idx, season):
    """Get data for the selected period only"""
    selected_data = []
    
    for param in parameters:
        if param not in p_values_df.columns or param not in slopes_df.columns:
            continue
            
        p_val = p_values_df[param].iloc[selected_period_idx]
        slope = slopes_df[param].iloc[selected_period_idx]
        
        if pd.isna(p_val) or pd.isna(slope):
            continue
        
        # Determine significance level
        if p_val < 0.1:
            sig_level = 'high'
            score_contribution = 0.9
        elif p_val < 0.2:
            sig_level = 'medium'
            score_contribution = 0.5
        else:
            sig_level = 'insig'
            score_contribution = 0.1
        
        selected_data.append({
            'station': station_name,
            'parameter': param,
            'season': season,
            'significance': sig_level,
            'p_value': p_val,
            'slope': slope,
            'score_contribution': score_contribution,
            'period': p_values_df['Years'].iloc[selected_period_idx] if 'Years' in p_values_df.columns else f'Period_{selected_period_idx}'
        })
    
    return selected_data

def create_comprehensive_bar_plots_separate_seasons():
    """Create separate comprehensive bar plots for Yearly and JJA seasons"""
    
    # Filter data by season
    yearly_data = [record for record in all_stations_selected_data if record['season'] == 'yearly']
    jja_data = [record for record in all_stations_selected_data if record['season'] == 'JJA']
    
    # Create separate plots for yearly and JJA
    for season, season_data, season_name in [('yearly', yearly_data, 'Yearly'), ('JJA', jja_data, 'JJA (Summer)')]:
        if len(season_data) == 0:
            print(f"No data available for {season_name}")
            continue
            
        # Create summary data for bar plot
        param_summary = {}
        for param in parameters:
            param_summary[param] = {'high_sig': 0, 'medium_sig': 0, 'insig_sig': 0}
        
        for record in season_data:
            param = record['parameter']
            sig_level = record['significance']
            
            # Map significance level to correct key
            if sig_level == 'high':
                key = 'high_sig'
            elif sig_level == 'medium':
                key = 'medium_sig'
            else:  # insig
                key = 'insig_sig'
            
            param_summary[param][key] += 1
        
        # Prepare data for stacked bar plot - REVERSED ORDER
        params = list(param_summary.keys())
        insig = [param_summary[param]['insig_sig'] for param in params]  # Bottom layer
        medium_sig = [param_summary[param]['medium_sig'] for param in params]  # Middle layer
        high_sig = [param_summary[param]['high_sig'] for param in params]  # Top layer
        
        # Create the plot
        fig, ax = plt.subplots(figsize=(16, 10))
        
        bar_width = 0.8
        x_pos = np.arange(len(params))
        
        # Plot in reversed order: Not significant at bottom, then medium, then high
        bars1 = ax.bar(x_pos, insig, bar_width, label='Not Significant (p ≥ 0.2)', 
                       color='#d62728', edgecolor='black', linewidth=1)
        bars2 = ax.bar(x_pos, medium_sig, bar_width, bottom=insig, 
                       label='Moderately Significant (0.1 ≤ p < 0.2)', 
                       color='#ff7f0e', edgecolor='black', linewidth=1)
        bars3 = ax.bar(x_pos, high_sig, bar_width, bottom=np.array(insig) + np.array(medium_sig),
                       label='Highly Significant (p < 0.1)', 
                       color='#2ca02c', edgecolor='black', linewidth=1)
        
        # Add value labels on bars
        for i, (n, m, h) in enumerate(zip(insig, medium_sig, high_sig)):
            total = n + m + h
            if total > 0:
                # Total count at top of bar
                ax.text(i, total + 0.1, f'{total}', ha='center', va='bottom', fontweight='bold', fontsize=10)
                
                # Percentage labels for each segment
                if n > 0:  # Not significant
                    ax.text(i, n/2, f'{n}', ha='center', va='center', 
                           fontweight='bold', fontsize=9, color='white')
                
                if m > 0:  # Moderately significant
                    ax.text(i, n + m/2, f'{m}', ha='center', va='center', 
                           fontweight='bold', fontsize=9, color='white')
                
                if h > 0:  # Highly significant
                    ax.text(i, n + m + h/2, f'{h}', ha='center', va='center', 
                           fontweight='bold', fontsize=9, color='white')
        
        # Customize plot
        ax.set_xlabel('Parameters', fontsize=14, fontweight='bold')
        ax.set_ylabel('Number of Stations', fontsize=14, fontweight='bold')
        ax.set_title(f'Significance Analysis - {season_name} Season', 
                     fontsize=16, fontweight='bold', pad=20)
        ax.set_xticks(x_pos)
        ax.set_xticklabels(params, rotation=45, ha='right', fontsize=12)
        ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=12)
        ax.grid(True, axis='y', alpha=0.3)
        
        # Calculate statistics for annotation
        total_stations = len(set([record['station'] for record in season_data]))
        total_high = sum(high_sig)
        total_medium = sum(medium_sig)
        total_insig = sum(insig)
        total_all = total_high + total_medium + total_insig
        sig_percentage = (total_high + total_medium) / total_all * 100 if total_all > 0 else 0
        
        # Add statistics outside the plot area
        stats_text = (
            f'Total Stations: {total_stations}\n'
            f'Total Parameters: {total_all}\n'
            f' Significant : {total_high} ({total_high/total_all*100:.1f}%)\n'
            f'Moderately Significant: {total_medium} ({total_medium/total_all*100:.1f}%)\n'
            f'Not Significant: {total_insig} ({total_insig/total_all*100:.1f}%)\n'
            f'Overall Significant: {total_high + total_medium}/{total_all} ({sig_percentage:.1f}%)'
        )
        
        # Place statistics on the right side outside the plot
        ax.text(1.02, 0.5, stats_text, transform=ax.transAxes, fontsize=11, 
                fontweight='bold', verticalalignment='top',
                bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.8),
                linespacing=1.5)
        
        # Adjust plot margins to accommodate the statistics text
        plt.subplots_adjust(right=0.75)
        
        plt.tight_layout()
        plt.savefig(os.path.join(output_directory, f'comprehensive_significance_{season}.png'), 
                    dpi=300, bbox_inches='tight')
        plt.close()
        
        print(f"Comprehensive bar plot created for {season_name} season")

def create_combined_venn_diagrams_2x3():
    """Create combined Venn diagrams in 2x3 grid with 6 specific combinations"""
    
    if len(all_stations_selected_data) == 0:
        print("No selected period data found for Venn diagrams")
        return
    
    # Create a figure with 2 rows and 3 columns
    fig, axes = plt.subplots(2, 3, figsize=(24, 16))
    
    # Process data for both seasons
    yearly_data = [record for record in all_stations_selected_data if record['season'] == 'yearly']
    jja_data = [record for record in all_stations_selected_data if record['season'] == 'JJA']
    
    # Create sets for both seasons
    season_sets = {}
    
    for season, season_data in [('yearly', yearly_data), ('JJA', jja_data)]:
        sig_season_data = [record for record in season_data if record['p_value'] < 0.2]
        
        if len(sig_season_data) == 0:
            print(f"No significant records found for {season} season")
            season_sets[season] = {}
            continue
        
        # Create DataFrame for easier manipulation
        sig_df = pd.DataFrame(sig_season_data)
        
        # Group by station and parameter
        station_params = sig_df.groupby(['station', 'parameter']).size().reset_index()
        
        # Create sets for all parameters
        sets = {}
        for param in parameters:
            param_set = set(station_params[station_params['parameter'] == param]['station'])
            sets[param] = param_set
        
        # Also create sets for specific parameter combinations
        le_mds_stations = set(station_params[station_params['parameter'] == 'LE_F_MDS']['station'])
        h_mds_stations = set(station_params[station_params['parameter'] == 'H_F_MDS']['station'])
        netrad_stations = set(station_params[station_params['parameter'] == 'NETRAD']['station'])
        le_corr_stations = set(station_params[station_params['parameter'] == 'LE_CORR']['station'])
        h_corr_stations = set(station_params[station_params['parameter'] == 'H_CORR']['station'])
        gpp_stations = set(station_params[station_params['parameter'] == 'GPP_DT_VUT_MEAN']['station'])
        ta_stations = set(station_params[station_params['parameter'] == 'TA_F']['station'])
        vpd_stations = set(station_params[station_params['parameter'] == 'VPD_F']['station'])
        swc_stations = set(station_params[station_params['parameter'] == 'SWC_F_MDS_1']['station'])
        ts_stations = set(station_params[station_params['parameter'] == 'TS_F_MDS_1']['station'])
        gf_stations = set(station_params[station_params['parameter'] == 'G_F_MDS']['station'])
        p_stations = set(station_params[station_params['parameter'] == 'P_F']['station'])
        
        season_sets[season] = {
            'le_mds': le_mds_stations,
            'h_mds': h_mds_stations,
            'netrad': netrad_stations,
            'le_corr': le_corr_stations,
            'h_corr': h_corr_stations,
            'gpp': gpp_stations,
            'ta': ta_stations,
            'vpd': vpd_stations,
            'swc': swc_stations,
            'ts': ts_stations,
            'gf': gf_stations,
            'p': p_stations
        }
    
    # Create the 2x3 grid of Venn diagrams
    # ROW 1
    # 1. LE_CORR, H_CORR, SWC_F_MDS_1 (Top-left)
    ax1 = axes[0, 0]
    le_mds_yearly = season_sets['yearly'].get('le_mds', set())
    h_mds_yearly = season_sets['yearly'].get('h_mds', set())
    swc_yearly = season_sets['yearly'].get('swc', set())
    le_mds_jja = season_sets['JJA'].get('le_mds', set())
    h_mds_jja = season_sets['JJA'].get('h_mds', set())
    swc_jja = season_sets['JJA'].get('swc', set())
    
    # For Yearly season
    if le_mds_yearly or h_mds_yearly or swc_yearly:
        venn3([le_mds_yearly, h_mds_yearly, swc_yearly],
              ['LE_MDS', 'H_MDS', 'SWC'], ax=ax1)
    else:
        ax1.text(0.5, 0.5, 'No significant data', ha='center', va='center', 
                transform=ax1.transAxes, fontsize=12)
    ax1.set_title('1. LE_MDS vs H_MDS vs SWC\n(Yearly)', fontsize=14, fontweight='bold')
    
    # 2. LE_MDS vs TA_F vs VPD_F (Top-middle) - Your combination 3
    ax2 = axes[0, 1]
    le_mds_yearly = season_sets['yearly'].get('le_mds', set())
    ta_yearly = season_sets['yearly'].get('ta', set())
    vpd_yearly = season_sets['yearly'].get('vpd', set())
    
    if le_mds_yearly or ta_yearly or vpd_yearly:
        venn3([le_mds_yearly, ta_yearly, vpd_yearly],
              ['LE_MDS', 'TA_F', 'VPD_F'], ax=ax2)
    else:
        ax2.text(0.5, 0.5, 'No significant data', ha='center', va='center', 
                transform=ax2.transAxes, fontsize=12)
    ax2.set_title('2. LE_MDS vs TA_F vs VPD_F\n(Yearly)', fontsize=14, fontweight='bold')
    
    # 3. H_F_MDS vs TA_F vs TS_F_MDS (Top-right) - Your combination 5
    ax3 = axes[0, 2]
    h_mds_yearly = season_sets['yearly'].get('h_mds', set())
    ts_yearly = season_sets['yearly'].get('ts', set())
    
    if h_mds_yearly or ta_yearly or ts_yearly:
        venn3([h_mds_yearly, ta_yearly, ts_yearly],
              ['H_MDS', 'TA_F', 'TS_MDS'], ax=ax3)
    else:
        ax3.text(0.5, 0.5, 'No significant data', ha='center', va='center', 
                transform=ax3.transAxes, fontsize=12)
    ax3.set_title('3. H_MDS vs TA_F vs TS_MDS\n(Yearly)', fontsize=14, fontweight='bold')
    
    # ROW 2
    # 4. SWC_F_MDS vs P_F vs H_F_MDS (Bottom-left) - Your combination 4
    ax4 = axes[1, 0]
    netrad_yearly = season_sets['yearly'].get('netrad', set())
    
    if le_mds_yearly or  h_mds_yearly or netrad_yearly:
        venn3([le_mds_yearly, h_mds_yearly, netrad_yearly],
              ['LE_MDS', 'NETRAD', 'H_MDS'], ax=ax4)
    else:
        ax4.text(0.5, 0.5, 'No significant data', ha='center', va='center', 
                transform=ax4.transAxes, fontsize=12)
    ax4.set_title('4. LE_MDS vs H_MDS vs NETRAD\n(Yearly)', fontsize=14, fontweight='bold')
    
    # 5. H_F_MDS vs G_F_MDS vs SWC_F_MDS (Bottom-middle) - Your combination 6
    ax5 = axes[1, 1]
    gf_yearly = season_sets['yearly'].get('gf', set())
    
    if h_mds_yearly or gf_yearly or swc_yearly:
        venn3([h_mds_yearly, gf_yearly, swc_yearly],
              ['H_MDS', 'G_MDS', 'SWC_MDS'], ax=ax5)
    else:
        ax5.text(0.5, 0.5, 'No significant data', ha='center', va='center', 
                transform=ax5.transAxes, fontsize=12)
    ax5.set_title('5. H_MDS vs G_MDS vs SWC_MDS\n(Yearly)', fontsize=14, fontweight='bold')
    
    # 6. LE_MDS vs H_MDS vs GPP (Bottom-right) - Your combination 7
    ax6 = axes[1, 2]
    gpp_yearly = season_sets['yearly'].get('gpp', set())
    
    if le_mds_yearly or h_mds_yearly or gpp_yearly:
        venn3([le_mds_yearly, h_mds_yearly, gpp_yearly],
              ['LE_MDS', 'H_MDS', 'GPP'], ax=ax6)
    else:
        ax6.text(0.5, 0.5, 'No significant data', ha='center', va='center', 
                transform=ax6.transAxes, fontsize=12)
    ax6.set_title('6. LE_MDS vs H_MDS vs GPP\n(Yearly)', fontsize=14, fontweight='bold')
    
    # Add overall title
    plt.suptitle('Combined Venn Diagrams - Yearly Season (p < 0.2)\n6 Key Parameter Combinations', 
                 fontsize=18, fontweight='bold', y=0.98)
    
    plt.tight_layout(rect=[0, 0, 1, 0.96])
    plt.savefig(os.path.join(output_directory, 'combined_venn_diagrams_2x3_yearly.png'), 
                dpi=300, bbox_inches='tight')
    plt.close()
    
    print("Combined Venn diagrams created for Yearly season (2x3 layout)")
    
    # Now create the same for JJA season
    fig, axes = plt.subplots(2, 3, figsize=(24, 16))
    
    # ROW 1 for JJA
    # 1. LE_CORR, H_CORR, SWC_F_MDS_1
    ax1 = axes[0, 0]
    if le_mds_jja or h_mds_jja or swc_jja:
        venn3([le_mds_jja, h_mds_jja, swc_jja],
              ['LE_MDS', 'H_MDS', 'SWC'], ax=ax1)
    else:
        ax1.text(0.5, 0.5, 'No significant data', ha='center', va='center', 
                transform=ax1.transAxes, fontsize=12)
    ax1.set_title('1. LE_MDS vs H_MDS vs SWC\n(JJA Summer)', fontsize=14, fontweight='bold')
    
    # 2. LE_MDS vs TA_F vs VPD_F
    ax2 = axes[0, 1]
    le_mds_jja = season_sets['JJA'].get('le_mds', set())
    ta_jja = season_sets['JJA'].get('ta', set())
    vpd_jja = season_sets['JJA'].get('vpd', set())
    
    if le_mds_jja or ta_jja or vpd_jja:
        venn3([le_mds_jja, ta_jja, vpd_jja],
              ['LE_MDS', 'TA_F', 'VPD_F'], ax=ax2)
    else:
        ax2.text(0.5, 0.5, 'No significant data', ha='center', va='center', 
                transform=ax2.transAxes, fontsize=12)
    ax2.set_title('2. LE_MDS vs TA_F vs VPD_F\n(JJA Summer)', fontsize=14, fontweight='bold')
    
    # 3. H_F_MDS vs TA_F vs TS_F_MDS
    ax3 = axes[0, 2]
    h_mds_jja = season_sets['JJA'].get('h_mds', set())
    ts_jja = season_sets['JJA'].get('ts', set())
    
    if h_mds_jja or ta_jja or ts_jja:
        venn3([h_mds_jja, ta_jja, ts_jja],
              ['H_MDS', 'TA_F', 'TS_MDS'], ax=ax3)
    else:
        ax3.text(0.5, 0.5, 'No significant data', ha='center', va='center', 
                transform=ax3.transAxes, fontsize=12)
    ax3.set_title('3. H_MDS vs TA_F vs TS_MDS\n(JJA Summer)', fontsize=14, fontweight='bold')
    
    # ROW 2 for JJA
    # 4. SWC_F_MDS vs P_F vs H_F_MDS
    ax4 = axes[1, 0]
    netrad_jja = season_sets['JJA'].get('netrad', set())
    
    if le_mds_jja or h_mds_jja or netrad_jja:
        venn3([le_mds_jja, h_mds_jja, netrad_jja],
              ['LE_MDS', 'H_MDS', 'NETRAD'], ax=ax4)
    else:
        ax4.text(0.5, 0.5, 'No significant data', ha='center', va='center', 
                transform=ax4.transAxes, fontsize=12)
    ax4.set_title('4. LE_MDS vs H_MDS vs NETRAD\n(JJA Summer)', fontsize=14, fontweight='bold')
    
    # 5. H_F_MDS vs G_F_MDS vs SWC_F_MDS
    ax5 = axes[1, 1]
    gf_jja = season_sets['JJA'].get('gf', set())
    
    if h_mds_jja or gf_jja or swc_jja:
        venn3([h_mds_jja, gf_jja, swc_jja],
              ['H_MDS', 'G_MDS', 'SWC_MDS'], ax=ax5)
    else:
        ax5.text(0.5, 0.5, 'No significant data', ha='center', va='center', 
                transform=ax5.transAxes, fontsize=12)
    ax5.set_title('5. H_MDS vs G_MDS vs SWC_MDS\n(JJA Summer)', fontsize=14, fontweight='bold')
    
    # 6. LE_MDS vs H_MDS vs GPP
    ax6 = axes[1, 2]
    gpp_jja = season_sets['JJA'].get('gpp', set())
    
    if le_mds_jja or h_mds_jja or gpp_jja:
        venn3([le_mds_jja, h_mds_jja, gpp_jja],
              ['LE_MDS', 'H_MDS', 'GPP'], ax=ax6)
    else:
        ax6.text(0.5, 0.5, 'No significant data', ha='center', va='center', 
                transform=ax6.transAxes, fontsize=12)
    ax6.set_title('6. LE_MDS vs H_MDS vs GPP\n(JJA Summer)', fontsize=14, fontweight='bold')
    
    # Add overall title for JJA
    plt.suptitle('Combined Venn Diagrams - JJA Summer Season (p < 0.2)\n6 Key Parameter Combinations', 
                 fontsize=18, fontweight='bold', y=0.98)
    
    plt.tight_layout(rect=[0, 0, 1, 0.96])
    plt.savefig(os.path.join(output_directory, 'combined_venn_diagrams_2x3_jja.png'), 
                dpi=300, bbox_inches='tight')
    plt.close()
    
    print("Combined Venn diagrams created for JJA season (2x3 layout)")


        

def save_comprehensive_selection_file():
    """Save comprehensive selection data to Excel file with separate sheets for seasons"""
    if len(all_stations_selected_data) == 0:
        print("No selected period data to save")
        return
    
    # Convert to DataFrame
    comp_df = pd.DataFrame(all_stations_selected_data)
    
    # Save to Excel
    output_file = os.path.join(output_directory, 'comprehensive_selected_periods_analysis.xlsx')
    
    with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
        # Save main data
        comp_df.to_excel(writer, sheet_name='All_Selected_Periods_Data', index=False)
        
        # Create separate sheets for yearly and JJA
        for season, season_name in [('yearly', 'Yearly'), ('JJA', 'JJA_Summer')]:
            season_data = comp_df[comp_df['season'] == season]
            if len(season_data) > 0:
                season_data.to_excel(writer, sheet_name=f'{season_name}_Data', index=False)
        
        # Create summary sheet
        summary_data = []
        stations = comp_df['station'].unique()
        
        for station in stations:
            station_data = comp_df[comp_df['station'] == station]
            # Separate by season
            yearly_data = station_data[station_data['season'] == 'yearly']
            jja_data = station_data[station_data['season'] == 'JJA']
            
            # Yearly summary
            if len(yearly_data) > 0:
                selected_period_yearly = yearly_data['period'].iloc[0]
                high_sig_yearly = len(yearly_data[yearly_data['significance'] == 'high'])
                medium_sig_yearly = len(yearly_data[yearly_data['significance'] == 'medium'])
                insig_yearly = len(yearly_data[yearly_data['significance'] == 'insig'])
                total_params_yearly = len(yearly_data)
                total_score_yearly = yearly_data['score_contribution'].sum()
            else:
                selected_period_yearly = 'N/A'
                high_sig_yearly = medium_sig_yearly = insig_yearly = total_params_yearly = total_score_yearly = 0
            
            # JJA summary
            if len(jja_data) > 0:
                selected_period_jja = jja_data['period'].iloc[0]
                high_sig_jja = len(jja_data[jja_data['significance'] == 'high'])
                medium_sig_jja = len(jja_data[jja_data['significance'] == 'medium'])
                insig_jja = len(jja_data[jja_data['significance'] == 'insig'])
                total_params_jja = len(jja_data)
                total_score_jja = jja_data['score_contribution'].sum()
            else:
                selected_period_jja = 'N/A'
                high_sig_jja = medium_sig_jja = insig_jja = total_params_jja = total_score_jja = 0
            
            summary_data.append({
                'Station': station,
                'Yearly_Period': selected_period_yearly,
                'Yearly_Total_Params': total_params_yearly,
                'Yearly_High_Sig': high_sig_yearly,
                'Yearly_Medium_Sig': medium_sig_yearly,
                'Yearly_Not_Sig': insig_yearly,
                'Yearly_Total_Score': total_score_yearly,
                'JJA_Period': selected_period_jja,
                'JJA_Total_Params': total_params_jja,
                'JJA_High_Sig': high_sig_jja,
                'JJA_Medium_Sig': medium_sig_jja,
                'JJA_Not_Sig': insig_jja,
                'JJA_Total_Score': total_score_jja
            })
        
        summary_df = pd.DataFrame(summary_data)
        summary_df.to_excel(writer, sheet_name='Station_Summary', index=False)
        
        # Create parameter summary sheets for each season
        for season, season_name in [('yearly', 'Yearly'), ('JJA', 'JJA')]:
            season_data = comp_df[comp_df['season'] == season]
            if len(season_data) > 0:
                param_summary_data = []
                for param in parameters:
                    param_data = season_data[season_data['parameter'] == param]
                    total_stations = len(param_data)
                    high_sig = len(param_data[param_data['significance'] == 'high'])
                    medium_sig = len(param_data[param_data['significance'] == 'medium'])
                    insig = len(param_data[param_data['significance'] == 'insig'])
                    
                    param_summary_data.append({
                        'Parameter': param,
                        'Total_Stations': total_stations,
                        'Highly_Significant': high_sig,
                        'Moderately_Significant': medium_sig,
                        'Not_Significant': insig,
                        'Significant_Count': high_sig + medium_sig,
                        'Significant_Percentage': ((high_sig + medium_sig) / total_stations * 100) if total_stations > 0 else 0
                    })
                
                param_summary_df = pd.DataFrame(param_summary_data)
                param_summary_df.to_excel(writer, sheet_name=f'Parameter_Summary_{season_name}', index=False)
    
    print(f"Comprehensive selection file saved: {output_file}")

# Main processing - collect data from all stations
print("Collecting data from all stations...")

for base_name, files in file_pairs.items():
    try:
        # Process yearly data
        if files['yearly']:
            yearly_file_path = os.path.join(input_directory, files['yearly'])
            try:
                p_values_df_yearly = pd.read_excel(yearly_file_path, sheet_name='P-values')
                slopes_df_yearly = pd.read_excel(yearly_file_path, sheet_name='Slopes')
                
                # Clean the data
                p_values_df_yearly = p_values_df_yearly.dropna(how='all')
                slopes_df_yearly = slopes_df_yearly.dropna(how='all')
                p_values_df_yearly = p_values_df_yearly.reset_index(drop=True)
                slopes_df_yearly = slopes_df_yearly.reset_index(drop=True)
                
                # Calculate scores and selection for yearly data
                scores_data_yearly, selected_period_yearly = calculate_score_and_selection(
                    p_values_df_yearly, slopes_df_yearly, parameters)
                
                # Store selected period data for comprehensive analysis
                if selected_period_yearly is not None:
                    selected_data = get_selected_period_data(
                        base_name, p_values_df_yearly, slopes_df_yearly, 
                        parameters, selected_period_yearly, 'yearly')
                    all_stations_selected_data.extend(selected_data)
                    print(f"Collected yearly data for {base_name} - Period: {selected_period_yearly}")
                
            except Exception as e:
                print(f"Error loading yearly data for {base_name}: {str(e)}")
        
        # Process JJA data
        if files['jja']:
            jja_file_path = os.path.join(input_directory, files['jja'])
            try:
                p_values_df_jja = pd.read_excel(jja_file_path, sheet_name='P-values')
                slopes_df_jja = pd.read_excel(jja_file_path, sheet_name='Slopes')
                
                # Clean the data
                p_values_df_jja = p_values_df_jja.dropna(how='all')
                slopes_df_jja = slopes_df_jja.dropna(how='all')
                p_values_df_jja = p_values_df_jja.reset_index(drop=True)
                slopes_df_jja = slopes_df_jja.reset_index(drop=True)
                
                # Calculate scores and selection for JJA data
                scores_data_jja, selected_period_jja = calculate_score_and_selection(
                    p_values_df_jja, slopes_df_jja, parameters)
                
                # Store selected period data for comprehensive analysis
                if selected_period_jja is not None:
                    selected_data = get_selected_period_data(
                        base_name, p_values_df_jja, slopes_df_jja, 
                        parameters, selected_period_jja, 'JJA')
                    all_stations_selected_data.extend(selected_data)
                    print(f"Collected JJA data for {base_name} - Period: {selected_period_jja}")
                
            except Exception as e:
                print(f"Error loading JJA data for {base_name}: {str(e)}")
                
    except Exception as e:
        print(f"Error processing {base_name}: {str(e)}")

print(f"Data collection complete. Total records: {len(all_stations_selected_data)}")

# Create comprehensive plots and save file
print("Creating comprehensive analysis plots...")

# Save comprehensive selection file
save_comprehensive_selection_file()

# Create comprehensive bar plots (separate for yearly and JJA)
create_comprehensive_bar_plots_separate_seasons()
print("Comprehensive bar plots created for both seasons!")

# Create Venn diagrams (separate for yearly and JJA)
create_combined_venn_diagrams_2x3()
print("Venn diagrams created for both seasons!")

print("All comprehensive analysis completed!")

Collecting data from all stations...
Collected yearly data for progressive_trend_tables_AT-Neu - Period: 0
Collected JJA data for progressive_trend_tables_AT-Neu - Period: 0
Collected yearly data for progressive_trend_tables_BE-Bra - Period: 4
Collected JJA data for progressive_trend_tables_BE-Bra - Period: 8
Collected yearly data for progressive_trend_tables_BE-Lon - Period: 1
Collected JJA data for progressive_trend_tables_BE-Lon - Period: 7
Collected yearly data for progressive_trend_tables_BE-Vie - Period: 3
Collected JJA data for progressive_trend_tables_BE-Vie - Period: 6
Collected yearly data for progressive_trend_tables_CH-Cha - Period: 1
Collected JJA data for progressive_trend_tables_CH-Cha - Period: 2
Collected yearly data for progressive_trend_tables_CH-Dav - Period: 5
Collected JJA data for progressive_trend_tables_CH-Dav - Period: 4
Collected yearly data for progressive_trend_tables_CH-Fru - Period: 0
Collected JJA data for progressive_trend_tables_CH-Fru - Period: 0
Coll



Combined Venn diagrams created for Yearly season (2x3 layout)
Combined Venn diagrams created for JJA season (2x3 layout)
Venn diagrams created for both seasons!
All comprehensive analysis completed!


In [5]:
def create_combined_venn_diagrams_2x3_common_stations_only():
    """Create Venn diagrams only for stations that have data for all three parameters in each combination"""
    
    if len(all_stations_selected_data) == 0:
        print("No data found for Venn diagrams")
        return
    
    # Process data for both seasons
    yearly_data = [record for record in all_stations_selected_data if record['season'] == 'yearly']
    jja_data = [record for record in all_stations_selected_data if record['season'] == 'JJA']
    
    # Function to find stations that have data for ALL parameters (irrespective of significance)
    def get_stations_with_all_params(season_data, param_names):
        """Get stations that have data for ALL specified parameters"""
        if not season_data:
            return set()
        
        # Create DataFrame
        df = pd.DataFrame(season_data)
        
        # Group by station to see which parameters each station has
        station_summary = df.groupby('station')['parameter'].apply(set).reset_index()
        
        # Find stations that have ALL specified parameters
        stations_with_all_params = set()
        for _, row in station_summary.iterrows():
            station = row['station']
            station_params = row['parameter']
            
            # Check if station has all required parameters
            if all(param in station_params for param in param_names):
                stations_with_all_params.add(station)
        
        return stations_with_all_params
    
    # Function to get significance sets for stations that have all parameters
    def get_significance_sets_for_common_stations(season_data, param_names, common_stations):
        """Get significance sets for stations that have all parameters"""
        if not season_data or not common_stations:
            return {param: set() for param in param_names}
        
        # Filter data for common stations only
        common_data = [record for record in season_data if record['station'] in common_stations]
        
        if not common_data:
            return {param: set() for param in param_names}
        
        # Create DataFrame
        df = pd.DataFrame(common_data)
        
        # Get significant stations (p < 0.2) for each parameter
        significance_sets = {}
        for param in param_names:
            # Filter for this parameter and p < 0.2
            sig_data = df[(df['parameter'] == param) & (df['p_value'] < 0.2)]
            sig_stations = set(sig_data['station'].unique())
            significance_sets[param] = sig_stations
        
        return significance_sets
    
    # Get parameter combinations
    param_combinations = {
        'combo1': ['LE_F_MDS', 'H_F_MDS', 'SWC_F_MDS_1'],
        'combo2': ['LE_F_MDS', 'TA_F', 'VPD_F'],
        'combo3': ['H_F_MDS', 'TA_F', 'TS_F_MDS_1'],
        'combo4': ['LE_F_MDS', 'H_F_MDS', 'NETRAD'],
        'combo5': ['H_F_MDS', 'G_F_MDS', 'SWC_F_MDS_1'],
        'combo6': ['LE_F_MDS', 'H_F_MDS', 'GPP_DT_VUT_MEAN']
    }
    
    # Get common stations and significance sets for both seasons
    season_results = {}
    
    for season_name, season_data in [('yearly', yearly_data), ('JJA', jja_data)]:
        season_results[season_name] = {}
        
        for combo_name, params in param_combinations.items():
            # Step 1: Find stations that have data for ALL parameters (irrespective of significance)
            common_stations = get_stations_with_all_params(season_data, params)
            
            # Step 2: Get significance sets for these common stations
            if common_stations:
                sig_sets = get_significance_sets_for_common_stations(season_data, params, common_stations)
            else:
                sig_sets = {param: set() for param in params}
            
            season_results[season_name][combo_name] = {
                'common_stations': common_stations,
                'significance_sets': sig_sets,
                'total_common': len(common_stations)
            }
    
    # Create the 2x3 grid of Venn diagrams for Yearly season
    fig, axes = plt.subplots(2, 3, figsize=(24, 16))
    
    # ROW 1
    # 1. LE_MDS, H_MDS, SWC (Top-left)
    ax1 = axes[0, 0]
    combo1 = season_results['yearly']['combo1']
    sig_sets1 = combo1['significance_sets']
    
    if combo1['total_common'] > 0:
        # Get sets for each parameter
        le_mds_set = sig_sets1['LE_F_MDS']
        h_mds_set = sig_sets1['H_F_MDS']
        swc_set = sig_sets1['SWC_F_MDS_1']
        
        venn3([le_mds_set, h_mds_set, swc_set],
              ['LE_MDS', 'H_MDS', 'SWC'], ax=ax1)
        
        # Add annotation with total common stations
        total_common = combo1['total_common']
        ax1.text(0.5, -0.1, f'Stations with all params: {total_common}', 
                transform=ax1.transAxes, ha='center', va='top', 
                fontsize=11, fontweight='bold', color='darkblue')
    else:
        ax1.text(0.5, 0.5, 'No stations with all parameters', ha='center', va='center', 
                transform=ax1.transAxes, fontsize=12)
    ax1.set_title('1. LE_MDS vs H_MDS vs SWC\n(Yearly)', fontsize=14, fontweight='bold')
    
    # 2. LE_MDS vs TA_F vs VPD_F (Top-middle)
    ax2 = axes[0, 1]
    combo2 = season_results['yearly']['combo2']
    sig_sets2 = combo2['significance_sets']
    
    if combo2['total_common'] > 0:
        le_mds_set = sig_sets2['LE_F_MDS']
        ta_set = sig_sets2['TA_F']
        vpd_set = sig_sets2['VPD_F']
        
        venn3([le_mds_set, ta_set, vpd_set],
              ['LE_MDS', 'TA_F', 'VPD_F'], ax=ax2)
        
        total_common = combo2['total_common']
        ax2.text(0.5, -0.1, f'Stations with all params: {total_common}', 
                transform=ax2.transAxes, ha='center', va='top', 
                fontsize=11, fontweight='bold', color='darkblue')
    else:
        ax2.text(0.5, 0.5, 'No stations with all parameters', ha='center', va='center', 
                transform=ax2.transAxes, fontsize=12)
    ax2.set_title('2. LE_MDS vs TA_F vs VPD_F\n(Yearly)', fontsize=14, fontweight='bold')
    
    # 3. H_MDS vs TA_F vs TS_MDS (Top-right)
    ax3 = axes[0, 2]
    combo3 = season_results['yearly']['combo3']
    sig_sets3 = combo3['significance_sets']
    
    if combo3['total_common'] > 0:
        h_mds_set = sig_sets3['H_F_MDS']
        ta_set = sig_sets3['TA_F']
        ts_set = sig_sets3['TS_F_MDS_1']
        
        venn3([h_mds_set, ta_set, ts_set],
              ['H_MDS', 'TA_F', 'TS_MDS'], ax=ax3)
        
        total_common = combo3['total_common']
        ax3.text(0.5, -0.1, f'Stations with all params: {total_common}', 
                transform=ax3.transAxes, ha='center', va='top', 
                fontsize=11, fontweight='bold', color='darkblue')
    else:
        ax3.text(0.5, 0.5, 'No stations with all parameters', ha='center', va='center', 
                transform=ax3.transAxes, fontsize=12)
    ax3.set_title('3. H_MDS vs TA_F vs TS_MDS\n(Yearly)', fontsize=14, fontweight='bold')
    
    # ROW 2
    # 4. LE_MDS vs H_MDS vs NETRAD (Bottom-left)
    ax4 = axes[1, 0]
    combo4 = season_results['yearly']['combo4']
    sig_sets4 = combo4['significance_sets']
    
    if combo4['total_common'] > 0:
        le_mds_set = sig_sets4['LE_F_MDS']
        h_mds_set = sig_sets4['H_F_MDS']
        netrad_set = sig_sets4['NETRAD']
        
        venn3([le_mds_set, h_mds_set, netrad_set],
              ['LE_MDS', 'H_MDS', 'NETRAD'], ax=ax4)
        
        total_common = combo4['total_common']
        ax4.text(0.5, -0.1, f'Stations with all params: {total_common}', 
                transform=ax4.transAxes, ha='center', va='top', 
                fontsize=11, fontweight='bold', color='darkblue')
    else:
        ax4.text(0.5, 0.5, 'No stations with all parameters', ha='center', va='center', 
                transform=ax4.transAxes, fontsize=12)
    ax4.set_title('4. LE_MDS vs H_MDS vs NETRAD\n(Yearly)', fontsize=14, fontweight='bold')
    
    # 5. H_MDS vs G_MDS vs SWC_MDS (Bottom-middle)
    ax5 = axes[1, 1]
    combo5 = season_results['yearly']['combo5']
    sig_sets5 = combo5['significance_sets']
    
    if combo5['total_common'] > 0:
        h_mds_set = sig_sets5['H_F_MDS']
        g_set = sig_sets5['G_F_MDS']
        swc_set = sig_sets5['SWC_F_MDS_1']
        
        venn3([h_mds_set, g_set, swc_set],
              ['H_MDS', 'G_MDS', 'SWC_MDS'], ax=ax5)
        
        total_common = combo5['total_common']
        ax5.text(0.5, -0.1, f'Stations with all params: {total_common}', 
                transform=ax5.transAxes, ha='center', va='top', 
                fontsize=11, fontweight='bold', color='darkblue')
    else:
        ax5.text(0.5, 0.5, 'No stations with all parameters', ha='center', va='center', 
                transform=ax5.transAxes, fontsize=12)
    ax5.set_title('5. H_MDS vs G_MDS vs SWC_MDS\n(Yearly)', fontsize=14, fontweight='bold')
    
    # 6. LE_MDS vs H_MDS vs GPP (Bottom-right)
    ax6 = axes[1, 2]
    combo6 = season_results['yearly']['combo6']
    sig_sets6 = combo6['significance_sets']
    
    if combo6['total_common'] > 0:
        le_mds_set = sig_sets6['LE_F_MDS']
        h_mds_set = sig_sets6['H_F_MDS']
        gpp_set = sig_sets6['GPP_DT_VUT_MEAN']
        
        venn3([le_mds_set, h_mds_set, gpp_set],
              ['LE_MDS', 'H_MDS', 'GPP'], ax=ax6)
        
        total_common = combo6['total_common']
        ax6.text(0.5, -0.1, f'Stations with all params: {total_common}', 
                transform=ax6.transAxes, ha='center', va='top', 
                fontsize=11, fontweight='bold', color='darkblue')
    else:
        ax6.text(0.5, 0.5, 'No stations with all parameters', ha='center', va='center', 
                transform=ax6.transAxes, fontsize=12)
    ax6.set_title('6. LE_MDS vs H_MDS vs GPP\n(Yearly)', fontsize=14, fontweight='bold')
    
    # Add overall title
    plt.suptitle('Venn Diagrams - Yearly Season\n(Only stations with data for all 3 parameters, p < 0.2)', 
                 fontsize=18, fontweight='bold', y=0.98)
    
    plt.tight_layout(rect=[0, 0, 1, 0.96])
    plt.savefig(os.path.join(output_directory, 'venn_diagrams_common_stations_yearly.png'), 
                dpi=300, bbox_inches='tight')
    plt.close()
    
    # Print summary for Yearly
    print("\nYearly Season - Stations with all parameters:")
    for combo_name, combo_data in season_results['yearly'].items():
        print(f"  {combo_name}: {combo_data['total_common']} stations")
    
    # Create the same for JJA season
    fig, axes = plt.subplots(2, 3, figsize=(24, 16))
    
    # ROW 1 for JJA
    # 1. LE_MDS, H_MDS, SWC
    ax1 = axes[0, 0]
    combo1 = season_results['JJA']['combo1']
    sig_sets1 = combo1['significance_sets']
    
    if combo1['total_common'] > 0:
        le_mds_set = sig_sets1['LE_F_MDS']
        h_mds_set = sig_sets1['H_F_MDS']
        swc_set = sig_sets1['SWC_F_MDS_1']
        
        venn3([le_mds_set, h_mds_set, swc_set],
              ['LE_MDS', 'H_MDS', 'SWC'], ax=ax1)
        
        total_common = combo1['total_common']
        ax1.text(0.5, -0.1, f'Stations with all params: {total_common}', 
                transform=ax1.transAxes, ha='center', va='top', 
                fontsize=11, fontweight='bold', color='darkblue')
    else:
        ax1.text(0.5, 0.5, 'No stations with all parameters', ha='center', va='center', 
                transform=ax1.transAxes, fontsize=12)
    ax1.set_title('1. LE_MDS vs H_MDS vs SWC\n(JJA Summer)', fontsize=14, fontweight='bold')
    
    # 2. LE_MDS vs TA_F vs VPD_F
    ax2 = axes[0, 1]
    combo2 = season_results['JJA']['combo2']
    sig_sets2 = combo2['significance_sets']
    
    if combo2['total_common'] > 0:
        le_mds_set = sig_sets2['LE_F_MDS']
        ta_set = sig_sets2['TA_F']
        vpd_set = sig_sets2['VPD_F']
        
        venn3([le_mds_set, ta_set, vpd_set],
              ['LE_MDS', 'TA_F', 'VPD_F'], ax=ax2)
        
        total_common = combo2['total_common']
        ax2.text(0.5, -0.1, f'Stations with all params: {total_common}', 
                transform=ax2.transAxes, ha='center', va='top', 
                fontsize=11, fontweight='bold', color='darkblue')
    else:
        ax2.text(0.5, 0.5, 'No stations with all parameters', ha='center', va='center', 
                transform=ax2.transAxes, fontsize=12)
    ax2.set_title('2. LE_MDS vs TA_F vs VPD_F\n(JJA Summer)', fontsize=14, fontweight='bold')
    
    # 3. H_MDS vs TA_F vs TS_MDS
    ax3 = axes[0, 2]
    combo3 = season_results['JJA']['combo3']
    sig_sets3 = combo3['significance_sets']
    
    if combo3['total_common'] > 0:
        h_mds_set = sig_sets3['H_F_MDS']
        ta_set = sig_sets3['TA_F']
        ts_set = sig_sets3['TS_F_MDS_1']
        
        venn3([h_mds_set, ta_set, ts_set],
              ['H_MDS', 'TA_F', 'TS_MDS'], ax=ax3)
        
        total_common = combo3['total_common']
        ax3.text(0.5, -0.1, f'Stations with all params: {total_common}', 
                transform=ax3.transAxes, ha='center', va='top', 
                fontsize=11, fontweight='bold', color='darkblue')
    else:
        ax3.text(0.5, 0.5, 'No stations with all parameters', ha='center', va='center', 
                transform=ax3.transAxes, fontsize=12)
    ax3.set_title('3. H_MDS vs TA_F vs TS_MDS\n(JJA Summer)', fontsize=14, fontweight='bold')
    
    # ROW 2 for JJA
    # 4. LE_MDS vs H_MDS vs NETRAD
    ax4 = axes[1, 0]
    combo4 = season_results['JJA']['combo4']
    sig_sets4 = combo4['significance_sets']
    
    if combo4['total_common'] > 0:
        le_mds_set = sig_sets4['LE_F_MDS']
        h_mds_set = sig_sets4['H_F_MDS']
        netrad_set = sig_sets4['NETRAD']
        
        venn3([le_mds_set, h_mds_set, netrad_set],
              ['LE_MDS', 'H_MDS', 'NETRAD'], ax=ax4)
        
        total_common = combo4['total_common']
        ax4.text(0.5, -0.1, f'Stations with all params: {total_common}', 
                transform=ax4.transAxes, ha='center', va='top', 
                fontsize=11, fontweight='bold', color='darkblue')
    else:
        ax4.text(0.5, 0.5, 'No stations with all parameters', ha='center', va='center', 
                transform=ax4.transAxes, fontsize=12)
    ax4.set_title('4. LE_MDS vs H_MDS vs NETRAD\n(JJA Summer)', fontsize=14, fontweight='bold')
    
    # 5. H_MDS vs G_MDS vs SWC_MDS
    ax5 = axes[1, 1]
    combo5 = season_results['JJA']['combo5']
    sig_sets5 = combo5['significance_sets']
    
    if combo5['total_common'] > 0:
        h_mds_set = sig_sets5['H_F_MDS']
        g_set = sig_sets5['G_F_MDS']
        swc_set = sig_sets5['SWC_F_MDS_1']
        
        venn3([h_mds_set, g_set, swc_set],
              ['H_MDS', 'G_MDS', 'SWC_MDS'], ax=ax5)
        
        total_common = combo5['total_common']
        ax5.text(0.5, -0.1, f'Stations with all params: {total_common}', 
                transform=ax5.transAxes, ha='center', va='top', 
                fontsize=11, fontweight='bold', color='darkblue')
    else:
        ax5.text(0.5, 0.5, 'No stations with all parameters', ha='center', va='center', 
                transform=ax5.transAxes, fontsize=12)
    ax5.set_title('5. H_MDS vs G_MDS vs SWC_MDS\n(JJA Summer)', fontsize=14, fontweight='bold')
    
    # 6. LE_MDS vs H_MDS vs GPP
    ax6 = axes[1, 2]
    combo6 = season_results['JJA']['combo6']
    sig_sets6 = combo6['significance_sets']
    
    if combo6['total_common'] > 0:
        le_mds_set = sig_sets6['LE_F_MDS']
        h_mds_set = sig_sets6['H_F_MDS']
        gpp_set = sig_sets6['GPP_DT_VUT_MEAN']
        
        venn3([le_mds_set, h_mds_set, gpp_set],
              ['LE_MDS', 'H_MDS', 'GPP'], ax=ax6)
        
        total_common = combo6['total_common']
        ax6.text(0.5, -0.1, f'Stations with all params: {total_common}', 
                transform=ax6.transAxes, ha='center', va='top', 
                fontsize=11, fontweight='bold', color='darkblue')
    else:
        ax6.text(0.5, 0.5, 'No stations with all parameters', ha='center', va='center', 
                transform=ax6.transAxes, fontsize=12)
    ax6.set_title('6. LE_MDS vs H_MDS vs GPP\n(JJA Summer)', fontsize=14, fontweight='bold')
    
    # Add overall title for JJA
    plt.suptitle('Venn Diagrams - JJA Summer Season\n(Only stations with data for all 3 parameters, p < 0.2)', 
                 fontsize=18, fontweight='bold', y=0.98)
    
    plt.tight_layout(rect=[0, 0, 1, 0.96])
    plt.savefig(os.path.join(output_directory, 'venn_diagrams_common_stations_jja.png'), 
                dpi=300, bbox_inches='tight')
    plt.close()
    
    # Print summary for JJA
    print("\nJJA Season - Stations with all parameters:")
    for combo_name, combo_data in season_results['JJA'].items():
        print(f"  {combo_name}: {combo_data['total_common']} stations")
    
    # Save detailed results to CSV
    detailed_results = []
    for season_name, season_data in season_results.items():
        for combo_name, combo_info in season_data.items():
            common_stations = combo_info['common_stations']
            sig_sets = combo_info['significance_sets']
            
            # Get parameter names for this combo
            params = param_combinations[combo_name]
            
            for station in common_stations:
                # Check significance for each parameter
                sig_status = {}
                for param in params:
                    sig_status[param] = station in sig_sets[param]
                
                # Count significant parameters
                sig_count = sum(sig_status.values())
                
                detailed_results.append({
                    'Season': season_name,
                    'Combination': combo_name,
                    'Station': station,
                    'Parameters': ', '.join(params),
                    'Significant_Count': sig_count,
                    'LE_F_MDS_sig': sig_status.get('LE_F_MDS', False),
                    'H_F_MDS_sig': sig_status.get('H_F_MDS', False),
                    'SWC_F_MDS_1_sig': sig_status.get('SWC_F_MDS_1', False),
                    'TA_F_sig': sig_status.get('TA_F', False),
                    'VPD_F_sig': sig_status.get('VPD_F', False),
                    'TS_F_MDS_1_sig': sig_status.get('TS_F_MDS_1', False),
                    'NETRAD_sig': sig_status.get('NETRAD', False),
                    'G_F_MDS_sig': sig_status.get('G_F_MDS', False),
                    'GPP_DT_VUT_MEAN_sig': sig_status.get('GPP_DT_VUT_MEAN', False)
                })
    
    if detailed_results:
        detailed_df = pd.DataFrame(detailed_results)
        detailed_df.to_csv(os.path.join(output_directory, 'common_stations_significance_details.csv'), index=False)
        print("\nDetailed significance data saved to CSV file")
    
    print("\nVenn diagrams created for common stations only")


    # New combined 2x3 Venn diagrams (only common stations with data for all parameters)
create_combined_venn_diagrams_2x3_common_stations_only()

# Replace the call to create_venn_diagrams_separate_seasons() with:
print("Creating all Venn diagram variations...")





Yearly Season - Stations with all parameters:
  combo1: 3 stations
  combo2: 5 stations
  combo3: 6 stations
  combo4: 4 stations
  combo5: 1 stations
  combo6: 10 stations

JJA Season - Stations with all parameters:
  combo1: 14 stations
  combo2: 23 stations
  combo3: 22 stations
  combo4: 22 stations
  combo5: 11 stations
  combo6: 33 stations

Detailed significance data saved to CSV file

Venn diagrams created for common stations only
Creating all Venn diagram variations...
