In [None]:
import pandas as pd
from helper.database_connection_helper import get_collection
from scipy.stats import pearsonr, spearmanr


collection = get_collection()
collection_filter = {
    'configuration': 'Africa',
    'instrument': 'Pufferfish'
}

documents = collection.find(collection_filter)
    
all_data = []

for doc in documents:
    print(doc['_id'])
    
    for defect_channel, distance, magnetization, timestamp in zip(doc['defect_channel'], doc['distance'], doc['magnetization'], doc['timestamp']):
        all_data.append({
            'defect_channel': defect_channel,
            'distance': distance,
            'magnetization': magnetization,
            'timestamp': timestamp
        })
        
df = pd.DataFrame(all_data)

Q1 = df.quantile(0.25)
Q3 = df.quantile(0.75)
IQR = Q3 - Q1

df_clean = df[~((df < (Q1 - 1.5 * IQR)) |(df > (Q3 + 1.5 * IQR))).any(axis=1)]

pearson_corr, _ = pearsonr(df_clean['magnetization'], df_clean['velocity'])
print(f"Pearson Korrelation: {pearson_corr}")

spearman_corr, _ = spearmanr(df_clean['magnetization'], df_clean['velocity'])
print(f"Spearman Korrelation: {spearman_corr}")