In [5]:
import pandas as pd
import os

# Directories
output_dir_combined = "../../../Data/Filtered_unphased_training_data_union_final/"
maf_output_dir = "../../../Data/MAF_calculations/"

os.makedirs(maf_output_dir, exist_ok=True)

def calculate_maf(combined_data_df):
    maf_dict = {}
    for col in combined_data_df.columns:
        allele_counts = combined_data_df[col].value_counts()
        total_alleles = allele_counts.sum() * 2
        
        # Minor allele frequency calculation
        if 1 in allele_counts.index or 2 in allele_counts.index:
            minor_allele_count = allele_counts.get(1, 0) + allele_counts.get(2, 0) * 2
            maf = minor_allele_count / total_alleles
        else:
            maf = 0
        
        maf_dict[col] = maf
    return maf_dict

maf_results = {}

for chromosome_number in range(1, 23):
    file_dir = output_dir_combined + f"23AndMe_PRS313_merged_chr{chromosome_number}_matching_combined.parquet"

    # Load combined data into a pandas DataFrame
    combined_data_df = pd.read_parquet(file_dir)

    # Calculate MAF
    maf_dict = calculate_maf(combined_data_df)

    # Save MAF results
    maf_results[f'chr{chromosome_number}'] = maf_dict
    maf_output_file = maf_output_dir + f"23AndMe_PRS313_merged_chr{chromosome_number}_MAF.csv"
    maf_df = pd.DataFrame(list(maf_dict.items()), columns=['SNP', 'MAF'])
    maf_df.to_csv(maf_output_file, index=False)

    print(f"Chromosome {chromosome_number} MAF calculated and saved.")

# Optionally, save all MAF results in one file
maf_combined_output_file = maf_output_dir + "23AndMe_PRS313_all_chromosomes_MAF.csv"
maf_combined_df = pd.concat([pd.DataFrame.from_dict(maf_results[chr], orient='index', columns=['MAF']).reset_index().rename(columns={'index': 'SNP'}) for chr in maf_results.keys()])
maf_combined_df.to_csv(maf_combined_output_file, index=False)

print("All chromosomes MAF calculated and combined file saved.")


Chromosome 1 MAF calculated and saved.
Chromosome 2 MAF calculated and saved.
Chromosome 3 MAF calculated and saved.
Chromosome 4 MAF calculated and saved.
Chromosome 5 MAF calculated and saved.
Chromosome 6 MAF calculated and saved.
Chromosome 7 MAF calculated and saved.
Chromosome 8 MAF calculated and saved.
Chromosome 9 MAF calculated and saved.
Chromosome 10 MAF calculated and saved.
Chromosome 11 MAF calculated and saved.
Chromosome 12 MAF calculated and saved.
Chromosome 13 MAF calculated and saved.
Chromosome 14 MAF calculated and saved.
Chromosome 15 MAF calculated and saved.
Chromosome 16 MAF calculated and saved.
Chromosome 17 MAF calculated and saved.
Chromosome 18 MAF calculated and saved.
Chromosome 19 MAF calculated and saved.
Chromosome 20 MAF calculated and saved.
Chromosome 21 MAF calculated and saved.
Chromosome 22 MAF calculated and saved.
All chromosomes MAF calculated and combined file saved.
