In [8]:
import pandas as pd
import scipy.stats

def get_significant_correlations(df, variable, threshold=0.7):
    """
    Returns significant correlations for a given variable in a visually appealing format,
    avoiding repetitive pairs.

    :param df: pandas DataFrame containing the data
    :param variable: the variable for which to find significant correlations
    :param threshold: the threshold for significant correlations (default is 0.7)
    :return: a styled DataFrame with the significant correlations
    """
    if variable not in df.columns:
        raise ValueError(f"Variable '{variable}' not found in DataFrame.")

    # Calculate the correlation matrix
    corr_matrix = df.corr()

    # Create a DataFrame from the correlation matrix
    corr_pairs = corr_matrix.stack().reset_index()
    corr_pairs.columns = ['Variable 1', 'Variable 2', 'Correlation']

    # Calculate p-values
    def calculate_p_value(row):
        return scipy.stats.pearsonr(df[row['Variable 1']], df[row['Variable 2']])[1]

    corr_pairs['P-Value'] = corr_pairs.apply(calculate_p_value, axis=1)

    # Filter out correlations involving the specified variable and above the threshold
    significant_corrs = corr_pairs[((corr_pairs['Variable 1'] == variable) | 
                                   (corr_pairs['Variable 2'] == variable)) &
                                  (corr_pairs['Correlation'].abs() >= threshold)]

    # Remove mirrored duplicate pairs
    significant_corrs = significant_corrs[significant_corrs['Variable 1'] <= significant_corrs['Variable 2']]

    # Sort by the strength of correlation
    significant_corrs['Abs Correlation'] = significant_corrs['Correlation'].abs()
    significant_corrs = significant_corrs.sort_values(by='Abs Correlation', ascending=False)

    # Define a function for coloring
    def color_corr_value(val):
        color = 'green' if val > 0 else 'red'
        return f'color: {color}'

    # Apply styling
    return significant_corrs.style.applymap(color_corr_value, subset=['Correlation'])\
                                  .hide_columns('Abs Correlation')\
                                  .format("{:.2f}", subset=['Correlation', 'P-Value'])\
                                  .set_table_styles([{'selector': 'th', 'props': [('font-size', '12pt')]}])\
                                  .set_caption(f"Significant Correlations with {variable}")

# Usage example
try:
    df = pd.read_excel('solos_fitólitos2.xlsx')
    results = get_significant_correlations(df, 'ph')
    display(results)
except Exception as e:
    print(f"An error occurred: {e}")


Unnamed: 0,Variable 1,Variable 2,Correlation,P-Value
138,ph,ph,1.0,0.0
1173,base_al,ph,0.89,0.0
723,ca_k,ph,0.89,0.0
179,ph,redness,-0.88,0.0
1308,calmag,ph,-0.87,0.0
1488,map,ph,-0.87,0.0
149,ph,sat_al,-0.87,0.0
1263,ciw,ph,-0.87,0.0
1218,cia,ph,-0.86,0.0
318,mo,ph,0.85,0.0
