In [23]:
import pandas as pd
from scipy.stats import pearsonr

In [62]:
def hypothesis_test(column1, column2, data_file='after_fe_outlier.csv', alpha=0.05):
    """
    Performs a Pearson correlation test between two columns of credit score data.
    
    Args:
        column1 (str): the name of the first column to use in the test.
        column2 (str): the name of the second column to use in the test.
        data_file (str): the name of the CSV file containing the credit score data.
        alpha (float): the significance level for the test (default 0.05).
        
    Returns:
        A tuple containing the correlation coefficient and p-value.
    """
    # Load the credit score data
    df = pd.read_csv(data_file)
    
    # Perform the correlation test
    corr, pval = pearsonr(df[column1], df[column2])
    
    # Print the results
    print(f"The correlation coefficient between {column1} and {column2} is: {corr:.4f}")
    print(f"The p-value for this test is: {pval:.4f}")
    
    # Check for significance
    if pval < alpha:
        print(f"Alternative hypothesis (Ha): correlation is statistically significant at the {alpha} level.")
    else:
        print(f"Null hypothesis (H0): correlation is not statistically significant at the {alpha} level.")
    
    return corr, pval

In [63]:
corr, pval = hypothesis_test('Outstanding_Debt', 'Credit_Score')

The correlation coefficient between Outstanding_Debt and Credit_Score is: -0.3865
The p-value for this test is: 0.0000
Alternative hypothesis (Ha): correlation is statistically significant at the 0.05 level.


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=762520ba-b8cf-4151-96d5-65e8763600e9' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>