In [27]:
import pandas as pd
from collections import Counter

def count_unique_and_common_ids(paths):
    """
    Count unique 'Participant Public ID' and 'Participant Private ID' from each CSV in paths,
    and identify IDs common across multiple CSV files.
    
    Parameters:
        paths (list): List of paths to the CSV files.
        
    Returns:
        tuple: Number of unique public IDs, Number of unique private IDs,
               Common public IDs across multiple files, Common private IDs across multiple files.
    """
    
    # Placeholders for all IDs
    all_public_ids = []
    all_private_ids = []

    # Extract IDs from each CSV
    for path in paths:
        df = pd.read_csv(path)
        
        if 'Participant Public ID' in df.columns:
            all_public_ids.append(set(df['Participant Public ID'].dropna().tolist()))
            
        if 'Participant Private ID' in df.columns:
            all_private_ids.append(set(df['Participant Private ID'].dropna().tolist()))

    # Identify common IDs across all paths
    common_public_ids = set.intersection(*all_public_ids) if len(all_public_ids) > 1 else set()
    common_private_ids = set.intersection(*all_private_ids) if len(all_private_ids) > 1 else set()

    # Get number of unique IDs
    num_unique_public_ids = len(set().union(*all_public_ids))
    num_unique_private_ids = len(set().union(*all_private_ids))

    return num_unique_public_ids, num_unique_private_ids, common_public_ids, common_private_ids

# Your paths remain unchanged
pth1 = '/Volumes/T7/CMU LiMN Research/Perceptual Organization/Result & Analysis/Participants count/PA:SD task/data/data_exp_141161-v2/data_exp_141161-v2_task-dew6.csv'
pth2 = '/Volumes/T7/CMU LiMN Research/Perceptual Organization/Result & Analysis/Participants count/PA:SD task/data/data_exp_141875-v2/data_exp_141875-v2_task-dew6.csv'

# Example usage for one path:
paths = [pth1]
unique_public_count, unique_private_count, common_public_ids, common_private_ids = count_unique_and_common_ids(paths)
print(f"Number of unique Participant Public IDs: {unique_public_count}")
print(f"Number of unique Participant Private IDs: {unique_private_count}")
print(f"Common Participant Public IDs across files: {common_public_ids}")
print(f"Common Participant Private IDs across files: {common_private_ids}")


# Example usage for one path:
paths = [pth2]
unique_public_count, unique_private_count, common_public_ids, common_private_ids = count_unique_and_common_ids(paths)
print(f"Number of unique Participant Public IDs: {unique_public_count}")
print(f"Number of unique Participant Private IDs: {unique_private_count}")
print(f"Common Participant Public IDs across files: {common_public_ids}")
print(f"Common Participant Private IDs across files: {common_private_ids}")

# Example usage for multiple paths:
paths = [pth1, pth2]
unique_public_count, unique_private_count, common_public_ids, common_private_ids = count_unique_and_common_ids(paths)
print(f"\nNumber of unique Participant Public IDs: {unique_public_count}")
print(f"Number of unique Participant Private IDs: {unique_private_count}")
print(f"Common Participant Public IDs across files: {common_public_ids}")
print(f"Common Participant Private IDs across files: {common_private_ids}")


Number of unique Participant Public IDs: 41
Number of unique Participant Private IDs: 41
Common Participant Public IDs across files: set()
Common Participant Private IDs across files: set()
Number of unique Participant Public IDs: 40
Number of unique Participant Private IDs: 40
Common Participant Public IDs across files: set()
Common Participant Private IDs across files: set()

Number of unique Participant Public IDs: 79
Number of unique Participant Private IDs: 81
Common Participant Public IDs across files: {'5b222aff59f9620001c109cb', '617305abc8cde4cf0e08255b'}
Common Participant Private IDs across files: set()


5b is male   30
61 is female 35

In [28]:
# Given data
old_mean = 31.753086419753085
old_std = 9.711502026202655
count = 81
repeat1 = 35
repeat2 = 30

# Correcting the mean
corrected_mean = (old_mean * count - repeat1 - repeat2) / (count - 2)


# Correcting total_subjects
corrected_total_subjects = count - 2

corrected_mean, corrected_total_subjects


(31.734177215189874, 79)