In [35]:
import pandas as pd
import numpy as np
from scipy.stats import pearsonr, spearmanr, kendalltau
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

data = pd.read_excel('Moderna CTAC Load vs gDNA.xlsx', index_col='Downstream Lot Number')

In [None]:
display(data)

In [37]:
def filter_data(data, column1, column2):
    selected_data = data[[column1, column2]].dropna()
    return selected_data

In [38]:
cond_1 = filter_data(data, 'CTAC Final Load Conductivity1', 'COA %')
cond_2 = filter_data(data, 'CTAC Final Load Conductivity 2', 'COA  %')
cond_3 = filter_data(data, 'CTAC Final Load Conductivity 3', 'COA  %')
cond_1_post = filter_data(data, 'CTAC Final Load Conductivity1', 'Post-CTAC ')
cond_2_post = filter_data(data, 'CTAC Final Load Conductivity 2', 'Post-CTAC ')
cond_3_post = filter_data(data, 'CTAC Final Load Conductivity 3', 'Post-CTAC ')

In [39]:
def correlation_analysis(data, column1, column2):
    # Select the two columns from the data
    selected_data = data[[column1, column2]]
    
    # Calculate Pearson's correlation coefficient and p-value
    pearson_corr, pearson_pvalue = pearsonr(selected_data[column1], selected_data[column2])
    
    # Calculate Spearman's rank correlation coefficient and p-value
    spearman_corr, spearman_pvalue = spearmanr(selected_data[column1], selected_data[column2])
    
    # Calculate Kendall's Tau correlation coefficient and p-value
    kendall_corr, kendall_pvalue = kendalltau(selected_data[column1], selected_data[column2])
    
    # Create scatter plot
    plt.figure(figsize=(10, 6))
    plt.scatter(selected_data[column1], selected_data[column2], alpha=0.7, label='Data')
    
    # Fit linear regression model
    X = selected_data[[column1]]
    y = selected_data[column2]
    reg_model = LinearRegression()
    reg_model.fit(X, y)
    y_pred = reg_model.predict(X)
    
    # Plot linear regression line
    plt.plot(X, y_pred, color='red', label='Regression Line')
    
    plt.title(f'Scatter Plot of {column1} vs {column2}')
    plt.xlabel(column1)
    plt.ylabel(column2)
    plt.legend()
    plt.grid(True)
    plt.show()
    
    # Print correlation results
    print(f"Pearson's Correlation: {pearson_corr:.3f} (p-value: {pearson_pvalue:.3f})")
    print(f"Spearman's Correlation: {spearman_corr:.3f} (p-value: {spearman_pvalue:.3f})")
    print(f"Kendall's Tau Correlation: {kendall_corr:.3f} (p-value: {kendall_pvalue:.3f})")

In [None]:
correlation_analysis(cond_1, 'CTAC Final Load Conductivity1', 'COAA %')
correlation_analysis(cond_2, 'CTAC Final Load Conductivity 2', 'COA  %')
correlation_analysis(cond_3, 'CTAC Final Load Conductivity 3', 'COA  %')

In [None]:
correlation_analysis(cond_1_post, 'CTAC Final Load Conductivity1', 'Post-CTAC ')
correlation_analysis(cond_2_post, 'CTAC Final Load Conductivity 2', 'Post-CTAC ')
correlation_analysis(cond_3_post, 'CTAC Final Load Conductivity 3', 'Post-CTAC ')