In [2]:
import pandas as pd
import os
import pickle

## Download and prepare the IEDB Population coverage tool

1. Download the population coverage tool from [here](http://tools.iedb.org/population/download/).
2. Untar the contents of the downloaded file to the root folder of this notebook.
3. The root folder of this notebook will contain the `population_coverage` folder.
4. Run the cell below to make sure you have a working version of the population coverage tool (you should see the usage output).



In [4]:
os.system("python ./population_coverage/calculate_population_coverage.py --help")

usage: python calculate_population_coverage.py [-h] -p [POPULATION] -c [MHC_CLASS] -f [FILE]

Created on: 03/06/2017 @author: Dorjee Gyaltsen @brief: calculates population
coverage - standalone version

options:
  -h, --help            show this help message and exit
  --list                list all population and ethnicity
  --plot PATH           generate a plot.
  --version             show program's version number and exit

required arguments:
  -p POPULATION [POPULATION ...], --population POPULATION [POPULATION ...]
                        select comma-separated area(s) or population(s)
  -c MHC_CLASS [MHC_CLASS ...], --mhc_class MHC_CLASS [MHC_CLASS ...]
                        select one or more comma-separated mhc class option -
                        I, II, combined
  -f FILE, --file FILE  a file containing a list of epitopes and associated
                        alleles (comma-separated)


0

####  Format the AFND frequencies in a pickle to be used by the IEDB pop coverage

In [5]:
afnd_df = pd.read_csv("AFND_data_locus_all.csv")
afnd_df.Population.unique()
afnd_df["Population"] = afnd_df["Population"].apply(lambda x: x.replace("á", "a")) 
afnd_df.to_csv("AFND_data_locus_all.csv")
from collections import OrderedDict
full_dict = OrderedDict()
for population in afnd_df.Population.unique():
    pop_dict = OrderedDict()
    for locus in afnd_df.Locus.unique():
        tmp_df = afnd_df[(afnd_df.Population==population) & (afnd_df.Locus==locus)]
        freqs = list(tmp_df.apply(lambda x: ("HLA-"+x.Allele, x["Allele Frequency"]), axis=1))
        pop_dict["HLA-{}".format(locus)] = freqs
    full_dict[population] = pop_dict
final_dict = {"I": full_dict}
with open("our_frequencies_pickle.p", "wb") as pickle_file:
    pickle.dump(final_dict, pickle_file)

#### Move this file to ./population_coverage rename it to match and replace the old file

In [23]:
#in linux:
os.system("cp ./our_frequencies_pickle.p  ./population_coverage/population_coverage_pickle/population_genotype_map.p")

0

#### Fix the init file

In [35]:
init_file_content = '''
import pickle
from pkg_resources import resource_filename  # @UnresolvedImport

package_name = "population_coverage_pickle"
pickle_filename = "population_genotype_map.p"
pickle_file_path = resource_filename(package_name, pickle_filename)

with open(pickle_file_path, "rb") as pickle_file:
    population_coverage = pickle.load(pickle_file)
    #country_ethnicity = pickle.load(pickle_file)
    #ethnicity = pickle.load(pickle_file)
    country_ethnicity = population_coverage
    ethnicity = population_coverage
'''

init_file_location = "./population_coverage/population_coverage_pickle/__init__.py"

with open(init_file_location, "w") as tmp_file:
    tmp_file.write(init_file_content)

## Run population coverage for each of our populations

In [3]:
import os
from pathlib import Path
import pandas as pd
import plotly.express as px
from numpy import trapz
from scipy.spatial import distance
import numpy as np

In [11]:

def append_n_peptides(ideal_n_peptides, allele, ideal_dataset):
    ideal_n_peptides = int(ideal_n_peptides)
    peptides = range(ideal_n_peptides)
    allele = row.Allele
    alleles = list(np.repeat(allele, ideal_n_peptides))
    ideal_dataset["peptide"].extend(peptides)
    ideal_dataset["allele"].extend(alleles)
    return ideal_dataset

def run_population_coverage(population, afnd_df, dataset_name, exec_file, output_directory="."):
    
    # calculate for the real dataset
    print("===============================")
    print("Calculating PC for {} for dataset {}".format(population, dataset_name))
    print("===============================")
    root = Path(output_directory)
    print("Saving outputs to: {}".format(root))
    root.mkdir(parents=True, exist_ok=True)
    
    population_tmp_name = population.replace(" ", "_")
    population_tmp_name = population_tmp_name.replace("/", "_")
    dataset_real = os.path.join(Path(os.getcwd()), Path("./datasets/{}.csv".format(dataset_name)))
    dataset_real_pd = pd.read_csv(dataset_real, header=None, sep="\t")
    dataset_real_pd.columns = ["peptide", "allele", "locus"]
    dataset_real_pd["locus"] = dataset_real_pd["allele"].apply(lambda x: x[x.find("-")+1:x.find("*")])
    dataset_real_pd = dataset_real_pd[dataset_real_pd.locus.isin(["A", "B", "C"])]
    dataset_peptide_n = dataset_real_pd.shape[0]
    dataset_allele_n = dataset_real_pd["allele"].unique().shape[0]
    dataset_locus_n = dataset_real_pd["locus"].unique().shape[0]
    print(dataset_real_pd)
    print("Looking at dataset {} with total {} peptides across {} alleles {} loci".format(dataset_name, \
                                                                                 dataset_peptide_n,\
                                                                                 dataset_allele_n, \
                                                                                 dataset_locus_n))
    
    output_file_real = os.path.join(root, "{}_{}_real.txt".format(dataset_name, population_tmp_name))
    command = "python {} -p \"{}\" -c I -f {} > {}".format(exec_file, population, dataset_real, output_file_real)
    print("Running IEDB tool for population coverage and saving output to: {}".format(output_file_real))
    print(command)
    os.system(command)
    print(output_file_real)
    res_pd_real = pd.read_csv(output_file_real, skiprows=6, sep="\t")
    
    # calculate for the ideal dataset
    print("===============================")
    print("Calculating PC for {} for ideal dataset {}".format(population, dataset_name))
    print("===============================")
    tmp_afnd_df = afnd_df[afnd_df.Population == population]
    tmp_afnd_df = tmp_afnd_df[tmp_afnd_df.Locus.isin(["A", "B", "C"])]
    locus_n = len(tmp_afnd_df["Locus"].unique())
    allele_n = len(tmp_afnd_df["Allele"].unique())
    print("The original dataset {} has {} peptides".format(dataset_name, dataset_peptide_n))
    print("The population has {} alleles across {} loci".format(allele_n, locus_n))
    tmp_afnd_df["ideal_n_peptides"] = tmp_afnd_df["Allele Frequency"].apply(lambda x: int((dataset_peptide_n * x)/locus_n))
    tmp_afnd_df = tmp_afnd_df.sort_values(by=["Allele", "Locus"])
    tmp_afnd_df = tmp_afnd_df[tmp_afnd_df.ideal_n_peptides > 0]
    ideal_dataset_df = tmp_afnd_df.loc[tmp_afnd_df.index.repeat(tmp_afnd_df.ideal_n_peptides)].reset_index(drop=True)
    ideal_dataset_df["peptide"] = ideal_dataset_df.index
    ideal_dataset_df["peptide"] =  ideal_dataset_df["peptide"].apply(lambda x: "pep_{}".format(x))
    ideal_dataset_df["allele"] = ideal_dataset_df.Allele.apply(lambda x: "HLA-{}".format(x))
    ideal_dataset_df = ideal_dataset_df[["peptide", "allele"]]
    ideal_dataset_file = os.path.join(root, Path("./{}_{}_ideal.csv".format(dataset_name, population_tmp_name)))
    ideal_dataset_df.to_csv(ideal_dataset_file, header=None, index=False, sep="\t")
    print("Ideal dataset has {} peptides across {} alleles".format(len(ideal_dataset_df.peptide),
                                                                       len(ideal_dataset_df.allele.unique())))
    print(tmp_afnd_df[["Allele", "Locus", "ideal_n_peptides"]].reset_index())
    
    print("Saved dummy ideal dataset to: {}".format(ideal_dataset_file))
    output_file_ideal = os.path.join(root, "{}_{}_ideal.txt".format(dataset_name, population_tmp_name))
    print("Running IEDB tool for population coverage and saving output to: {}".format(output_file_ideal))
    command = "python {} -p \"{}\" -c I -f {} > {}".format(exec_file, population, ideal_dataset_file, output_file_ideal)
    os.system(command)
    
    print("===============================")
    print("Plotting results")
    print("===============================")
    #plot and calculate differences
    res_pd_ideal = pd.read_csv(output_file_ideal, skiprows=6, sep="\t")
    
    res_pd_ideal["type"] = res_pd_ideal.epitope_hits.apply(lambda x: "ideal")
    res_pd_real["type"] = res_pd_real.epitope_hits.apply(lambda x: "real")
    res_pd_all = pd.concat([res_pd_ideal, res_pd_real])
    results_output = os.path.join(root, "{}_{}_results.csv".format(dataset_name, population_tmp_name))
    res_pd_all.to_csv(results_output)
    
    fig = px.scatter(res_pd_all\
             , x="epitope_hits", y="cumulative_coverage", color="type")
    #fig.show()
    results_fig_output = os.path.join(root, "{}_{}_plot.svg".format(dataset_name, population_tmp_name))
    fig.write_image(results_fig_output, width=1500, height=500)

    print("===============================")
    print("Calculating JS divergence")
    print("===============================")
    # Compute JS-divergence
    max_y = max(res_pd_real.epitope_hits.max(), res_pd_ideal.epitope_hits.max())
    y_real = list(res_pd_real.cumulative_coverage)
    y_ideal = list(res_pd_ideal.cumulative_coverage)
    max_y_len = max(len(y_real), len(y_ideal))
    y_real_fit = [y_real[i] if i<len(y_real) else 0 for i in range(max_y_len)]
    y_ideal_fit = [y_ideal[i] if i<len(y_ideal) else 0 for i in range(max_y_len)] 
    dist = distance.jensenshannon(y_real_fit, y_ideal_fit)
    print("JS divergence: {}".format(dist))
    return dist
    

### Test for single population

In [12]:
from pathlib import Path

out_dir = "./iedb_afnd_pc_results"
exec_file = Path("./population_coverage/calculate_population_coverage.py")
tmp_res = run_population_coverage("American Samoa", afnd_df, "mhcflurry_peptides_ba", exec_file, ".")

Calculating PC for American Samoa for dataset mhcflurry_peptides_ba
Saving outputs to: .
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
165527   YVYFYDLSY  HLA-C*15:02     C
165528   YWMGGTTYF  HLA-C*15:02     C
165529   YYFSYPLFV  HLA-C*15:02     C
165530   YYGRWVHEF  HLA-C*15:02     C
165531   YYKKTFSAL  HLA-C*15:02     C

[165532 rows x 3 columns]
Looking at dataset mhcflurry_peptides_ba with total 165532 peptides across 131 alleles 3 loci
Running IEDB tool for population coverage and saving output to: ./mhcflurry_peptides_ba_American_Samoa_real.txt
python population_coverage/calculate_population_coverage.py -p "American Samoa" -c I -f /home/anja/Documents/HLAequity/HLAlleleBias/Datasets_population_coverage/datasets/mhcflurry_peptides_ba.csv > ./mhcflurry_peptides_ba_

ValueError: 
Image export using the "kaleido" engine requires the kaleido package,
which can be installed using pip:
    $ pip install -U kaleido


### Run for all populations

In [7]:
import numpy as np
# - if you want to restart from middle
#all_populations = afnd_df.Population.unique()
#np.argwhere(all_populations=='Russia Bering Island Aleuts')
#all_populations = all_populations[166:]

In [58]:
all_populations = afnd_df.Population.unique()
all_datasets = ["mhcflurry_peptides_ba", "netmhc_peptides_ba"]
out_dir = "./iedb_afnd_pc_results"
exec_file = Path("./population_coverage/calculate_population_coverage.py")

results = {"population": [], "dataset": [], "js_div": []}
failed = {"population": [], "dataset": []}
for population in all_populations:
    for dataset in all_datasets:
        try:
            res = run_population_coverage(population, afnd_df, dataset, exec_file, out_dir)
            results["population"].append(population)
            results["dataset"].append(dataset)
            results["js_div"].append(res)
        except:
            print("Failed for {}".format(population))
            failed["population"].append(population)
            failed["dataset"].append(dataset)
            
pd.DataFrame(results).to_csv(out_dir+"/all_results.csv")

Calculating PC for American Samoa for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_American_Samoa_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "American Samoa" -c I -f C:\Users\ac121\Documents\HLAequity\HLAlleleBias\Datasets_coverage\datasets\mhcflurry_peptides.csv > i

Plotting results
Calculating JS divergence
JS divergence: 0.5032643364599204
Calculating PC for American Samoa for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_American_Samoa_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "American Samoa" -c I -f C:\U

Plotting results
Calculating JS divergence
JS divergence: 0.4480874451587635
Calculating PC for Australia Cape York Peninsula Aborigine for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Australia_Cape_York_Peninsula_Aborigine_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Austral

Calculating JS divergence
JS divergence: 0.6051816478218356
Calculating PC for Australia Groote Eylandt Aborigine for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_Australia_Groote_Eylandt_Aborigine_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Australia Groote Eylandt 

iedb_afnd_pc_results\hlathena_Australia_Groote_Eylandt_Aborigine_real.txt
Calculating PC for Australia Groote Eylandt Aborigine for ideal dataset hlathena
The original dataset hlathena has 142300 peptides
The population has 37 alleles across 3 loci
Ideal dataset has 142480 peptides across 33 alleles
    index   Allele Locus  ideal_n_peptides
0      23  A*01:01     A              1280
1      24  A*02:01     A              5075
2      25  A*11:01     A             11384
3      26  A*24:02     A             13897
4      29  A*31:01     A               332
5      30  A*32:01     A               332
6      31  A*34:01     A             15178
7   15145  B*08:01     B               948
8   15146  B*13:01     B             11051
9   15147  B*14:01     B               332
10  15148  B*15:02     B               332
11  15149  B*15:21     B              2846
12  15150  B*15:25     B              3794
13  15152  B*40:01     B              8205
14  15153  B*40:02     B              8538
15  15154  

iedb_afnd_pc_results\hlathena_Australia_Kimberly_Aborigine_real.txt
Calculating PC for Australia Kimberly Aborigine for ideal dataset hlathena
The original dataset hlathena has 142300 peptides
The population has 18 alleles across 3 loci
Ideal dataset has 142292 peptides across 18 alleles
    index   Allele Locus  ideal_n_peptides
0      32  A*02:01     A              5265
1      33  A*03:01     A               664
2      34  A*11:01     A              4601
3      35  A*24:02     A              3936
4      36  A*24:13     A               664
5      37  A*34:01     A             32302
6   15161  B*13:01     B              6261
7   15162  B*15:21     B              1233
8   15163  B*39:01     B               616
9   15164  B*40:01     B              8727
10  15165  B*40:02     B             13091
11  15166  B*56:01     B             16838
12  15167  B*56:02     B               616
13  40319  C*01:02     C             17787
14  40320  C*03:03     C              5929
15  40321  C*04:01     

Ideal dataset has 139508 peptides across 77 alleles
    index   Allele Locus  ideal_n_peptides
0      38  A*01:01     A              8870
1      39  A*02:01     A             12380
2      41  A*02:05     A               379
3      43  A*02:07     A               379
4      44  A*03:01     A              6545
..    ...      ...   ...               ...
72  40338  C*14:02     C               474
73  40339  C*15:02     C               711
74  40340  C*15:05     C               237
75  40341  C*16:01     C               948
76  40342  C*16:02     C               474

[77 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\hlathena_Australia_New_South_Wales_Caucasian_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Australia_New_South_Wales_Caucasian_ideal.txt
Plotting results
Calculating JS divergence
JS divergence: 0.6028115044495533
Calculating PC for Australia Yuendumu Aborigine for dataset mhcflurry
Saving outputs t

iedb_afnd_pc_results\hlathena_Australia_Yuendumu_Aborigine_real.txt
Calculating PC for Australia Yuendumu Aborigine for ideal dataset hlathena
The original dataset hlathena has 142300 peptides
The population has 106 alleles across 3 loci
Ideal dataset has 142380 peptides across 29 alleles
    index   Allele Locus  ideal_n_peptides
0      65  A*01:01     A               379
1      66  A*02:01     A              5359
2      71  A*03:01     A               142
3      72  A*11:01     A              3604
4      74  A*24:02     A             14135
5      75  A*24:06     A               616
6      77  A*24:13     A               853
7      86  A*31:01     A              1375
8      87  A*32:01     A               142
9      89  A*34:01     A             20870
10  15230  B*13:01     B             11573
11  15234  B*15:01     B               379
12  15244  B*15:21     B              5881
13  15245  B*15:25     B              2703
14  15259  B*39:01     B               996
15  15263  B*40:01    

Plotting results
Calculating JS divergence
JS divergence: 0.6190839810630205
Calculating PC for Bolivia/Chile Aymara NA-DHS_13 (G) for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Bolivia_Chile_Aymara_NA-DHS_13_(G)_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_cove

iedb_afnd_pc_results\netmhc_Bolivia_Peru_Quechua_NA-DHS_12_(G)_real.txt
Calculating PC for Bolivia/Peru Quechua NA-DHS_12 (G) for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 39 alleles across 3 loci
Ideal dataset has 170068 peptides across 39 alleles
    index   Allele Locus  ideal_n_peptides
0      99  A*02:01     A             37803
1     100  A*02:07     A              1349
2     101  A*02:11     A              1349
3     102  A*02:13     A              1349
4     103  A*02:22     A              1349
5     104  A*02:64     A              1349
6     105  A*23:01     A              1349
7     106  A*24:02     A              8102
8     107  A*29:02     A              1349
9     108  A*33:01     A              1349
10  15300  B*07:02     B              1349
11  15301  B*14:02     B              2699
12  15302  B*15:01     B              2699
13  15303  B*15:04     B              9452
14  15304  B*15:05     B              2699
15  15305  B*35:0

Plotting results
Calculating JS divergence
JS divergence: 0.07779022503414394
Calculating PC for Brazil Barra Mansa Rio State Black for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Brazil_Barra_Mansa_Rio_State_Black_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Brazil Barra Man

iedb_afnd_pc_results\netmhc_Brazil_Barra_Mansa_Rio_State_Caucasian_real.txt
Calculating PC for Brazil Barra Mansa Rio State Caucasian for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 143 alleles across 3 loci
Ideal dataset has 166378 peptides across 143 alleles
     index   Allele Locus  ideal_n_peptides
0      138  A*01:01     A              6651
1      139  A*01:02     A                68
2      140  A*02:01     A             11692
3      141  A*02:02     A               419
4      142  A*02:04     A               209
..     ...      ...   ...               ...
138  40434  C*16:02     C               844
139  40435  C*17:01     C              1230
140  40436  C*17:03     C               306
141  40437  C*18:01     C               232
142  40438  C*18:02     C               459

[143 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\netmhc_Brazil_Barra_Mansa_Rio_State_Caucasian_ideal.csv
Running IEDB tool for population cov

Plotting results
Calculating JS divergence
JS divergence: 0.1369550543978408
Calculating PC for Brazil Mixed for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Brazil_Mixed_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Brazil Mixed" -c I -f C:\Users\a

iedb_afnd_pc_results\hlathena_Brazil_Puyanawa_real.txt
Calculating PC for Brazil Puyanawa for ideal dataset hlathena
The original dataset hlathena has 142300 peptides
The population has 78 alleles across 3 loci
Ideal dataset has 142176 peptides across 78 alleles
    index   Allele Locus  ideal_n_peptides
0     210  A*01:01     A              2039
1     211  A*02:01     A             14087
2     212  A*02:05     A               332
3     213  A*02:11     A               142
4     214  A*03:01     A              2513
..    ...      ...   ...               ...
73  40485  C*14:02     C               806
74  40486  C*15:02     C              2371
75  40487  C*16:01     C               616
76  40488  C*17:01     C               332
77  40489  C*18:01     C               948

[78 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\hlathena_Brazil_Puyanawa_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Brazil_Puyanawa_id

Plotting results
Calculating JS divergence
JS divergence: 0.37618020595963975
Calculating PC for Brazil Rio de Janeiro Caucasian for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_Brazil_Rio_de_Janeiro_Caucasian_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Brazil Rio de

iedb_afnd_pc_results\mhcflurry_Brazil_Rio_de_Janeiro_Parda_real.txt
Calculating PC for Brazil Rio de Janeiro Parda for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 125 alleles across 3 loci
Ideal dataset has 184063 peptides across 125 alleles
     index   Allele Locus  ideal_n_peptides
0      290  A*01:01     A              4962
1      291  A*02:01     A             11399
2      292  A*02:02     A              1099
3      293  A*02:04     A               368
4      294  A*02:05     A               918
..     ...      ...   ...               ...
120  40574  C*16:01     C              3474
121  40575  C*16:02     C               581
122  40576  C*16:04     C               287
123  40577  C*17:01     C              1443
124  40578  C*17:03     C               581

[125 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\mhcflurry_Brazil_Rio_de_Janeiro_Parda_ideal.csv
Running IEDB tool for population coverage and saving outp

Plotting results
Calculating JS divergence
JS divergence: 0.4798008437509109
Calculating PC for Brazil Terena for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Brazil_Terena_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Brazil Terena" -c I -f C:\Users\ac121\Documents\HLAequity\H

iedb_afnd_pc_results\mhcflurry_Bulgaria_Romani_real.txt
Calculating PC for Bulgaria Romani for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 30 alleles across 3 loci
Ideal dataset has 184226 peptides across 30 alleles
    index   Allele Locus  ideal_n_peptides
0     334  A*01:01     A             20810
1     335  A*02:01     A             12998
2     336  A*02:11     A              5187
3     337  A*11:01     A             12998
4     338  A*24:02     A              5187
5     339  A*26:01     A              2624
6     340  A*32:01     A              2624
7   15749  B*08:01     B              2812
8   15750  B*15:01     B              2812
9   15751  B*18:01     B              2812
10  15752  B*27:02     B              2812
11  15753  B*27:04     B              2812
12  15754  B*35:03     B              2812
13  15755  B*35:08     B              2812
14  15756  B*40:01     B              2812
15  15757  B*40:06     B             17061
16 

Plotting results
Calculating JS divergence
JS divergence: 0.6299833491298655
Calculating PC for Cameroon Baka Pygmy for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_Cameroon_Baka_Pygmy_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Cameroon Baka Pygmy" -c I -f C:\Users\

Plotting results
Calculating JS divergence
JS divergence: 0.6179318807507778
Calculating PC for Cameroon Beti for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_Cameroon_Beti_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Cameroon Beti" -c I -f C:\Users\ac121\Documents\HL

iedb_afnd_pc_results\mhcflurry_Cameroon_Sawa_real.txt
Calculating PC for Cameroon Sawa for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 95 alleles across 3 loci
Ideal dataset has 184528 peptides across 36 alleles
    index   Allele Locus  ideal_n_peptides
0     401  A*02:01     A              2374
1     403  A*02:05     A              2374
2     404  A*03:01     A              2374
3     407  A*24:02     A              4812
4     411  A*29:02     A              4812
5     412  A*30:01     A              2374
6     413  A*30:02     A              4812
7     414  A*30:04     A              7186
8     418  A*33:03     A             14436
9     421  A*66:01     A              4812
10    423  A*66:03     A              4812
11    424  A*68:01     A              2374
12    425  A*68:02     A              2374
13    426  A*74:01     A              2374
14  15849  B*08:01     B              2374
15  15854  B*15:03     B              4812
16  158

Plotting results
Calculating JS divergence
JS divergence: 0.5620527563513653
Calculating PC for Canada Chipewyan NA-DHS_2 (G) for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_Canada_Chipewyan_NA-DHS_2_(G)_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Canada Chipewyan N

Ideal dataset has 142289 peptides across 31 alleles
    index   Allele Locus  ideal_n_peptides
0     428  A*01:01     A              1897
1     429  A*02:01     A             22768
2     430  A*02:06     A              4743
3     431  A*03:01     A              1897
4     432  A*24:02     A             12332
5     433  A*26:01     A               948
6     434  A*31:01     A              2846
7   15889  B*07:02     B              3794
8   15890  B*08:01     B               948
9   15891  B*15:01     B              2846
10  15892  B*27:05     B              6640
11  15893  B*35:01     B              7589
12  15894  B*37:01     B               948
13  15895  B*38:01     B               948
14  15896  B*39:01     B              4743
15  15897  B*40:02     B              6640
16  15898  B*44:02     B              5692
17  15899  B*49:01     B              1897
18  15900  B*51:01     B              4743
19  40663  C*01:02     C              2846
20  40664  C*02:02     C              6640
21

Plotting results
Calculating JS divergence
JS divergence: 0.4456727358849732
Calculating PC for Canada Cree NA-DHS_3 (G) for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Canada_Cree_NA-DHS_3_(G)_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Canada C

iedb_afnd_pc_results\netmhc_Canada_Ojibwa_NA-DHS_4_(G)_real.txt
Calculating PC for Canada Ojibwa NA-DHS_4 (G) for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 34 alleles across 3 loci
Ideal dataset has 170088 peptides across 34 alleles
    index   Allele Locus  ideal_n_peptides
0     444  A*01:01     A              5670
1     445  A*02:01     A             17010
2     446  A*02:06     A              7558
3     447  A*03:01     A              1888
4     448  A*24:02     A              9452
5     449  A*26:01     A              1888
6     450  A*31:01     A              9452
7     451  A*68:01     A              3782
8   15916  B*08:01     B              1888
9   15917  B*15:01     B              1888
10  15918  B*18:01     B              1888
11  15919  B*27:05     B              3782
12  15920  B*35:01     B             15122
13  15921  B*39:01     B              5670
14  15922  B*40:01     B              5670
15  15923  B*40:02     B         

Plotting results
Calculating JS divergence
JS divergence: 0.5206410520683047
Calculating PC for Chile Easter Island for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Chile_Easter_Island_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Chile Easter Island" -c I -f C:\Users\ac121\Doc

Plotting results
Calculating JS divergence
JS divergence: 0.48436746437938716
Calculating PC for Chile Huilliche NA-DHS_14 (G) for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Chile_Huilliche_NA-DHS_14_(G)_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Chile Huilliche NA-DHS_14 

iedb_afnd_pc_results\mhcflurry_China_Beijing_real.txt
Calculating PC for China Beijing for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 73 alleles across 3 loci
Ideal dataset has 190574 peptides across 61 alleles
    index   Allele Locus  ideal_n_peptides
0     469  A*01:01     A              2312
1     470  A*02:01     A             11686
2     471  A*02:03     A               499
3     473  A*02:06     A              2999
4     474  A*02:07     A              4187
..    ...      ...   ...               ...
56  40734  C*12:02     C              3812
57  40735  C*12:03     C              1437
58  40736  C*14:02     C              1437
59  40737  C*15:02     C              2874
60  40739  C*16:01     C               499

[61 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\mhcflurry_China_Beijing_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_China_Beijing_ideal

Plotting results
Calculating JS divergence
JS divergence: 0.30823796341311815
Calculating PC for China Canton Han for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_China_Canton_Han_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "China Canton Han" -c I -f C:\Users\ac121\Documents\H

iedb_afnd_pc_results\netmhc_China_Guangdong_Province_Meizhou_Han_real.txt
Calculating PC for China Guangdong Province Meizhou Han for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 88 alleles across 3 loci
Ideal dataset has 169222 peptides across 88 alleles
    index   Allele Locus  ideal_n_peptides
0     506  A*01:01     A               850
1     507  A*01:03     A               283
2     508  A*02:01     A               850
3     509  A*02:03     A               567
4     510  A*02:12     A               283
..    ...      ...   ...               ...
83  40781  C*08:01     C              4592
84  40782  C*08:03     C              3175
85  40783  C*12:02     C               567
86  40784  C*15:02     C               283
87  40785  C*15:07     C               283

[88 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\netmhc_China_Guangdong_Province_Meizhou_Han_ideal.csv
Running IEDB tool for population coverage and saving outp

Plotting results
Calculating JS divergence
JS divergence: 0.37007854116995664
Calculating PC for China Guangzhou for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_China_Guangzhou_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "China Guangzhou" -c I -f C:\Users\ac121\Documents\HLAe

Plotting results
Calculating JS divergence
JS divergence: 0.5663748020376312
Calculating PC for China Guizhou Province Bouyei for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_China_Guizhou_Province_Bouyei_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "China Guizhou Prov

Plotting results
Calculating JS divergence
JS divergence: 0.5560172058654886
Calculating PC for China Guizhou Province Bouyei for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_China_Guizhou_Province_Bouyei_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p

Plotting results
Calculating JS divergence
JS divergence: 0.5203598000692536
Calculating PC for China Guizhou Province Miao pop 2 for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_China_Guizhou_Province_Miao_pop_2_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "China Guizhou Provi

Plotting results
Calculating JS divergence
JS divergence: 0.6046047475324366
Calculating PC for China Guizhou Province Shui for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_China_Guizhou_Province_Shui_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "China Guizhou Province

Plotting results
Calculating JS divergence
JS divergence: 0.5596035346198492
Calculating PC for China Guizhou Province Shui for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_China_Guizhou_Province_Shui_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Ch

Plotting results
Calculating JS divergence
JS divergence: 0.2821458796050343
Calculating PC for China Han HIV negative for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_China_Han_HIV_negative_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "China Han HIV negative" -c I -f C:\Users\

Plotting results
Calculating JS divergence
JS divergence: 0.5270763215849172
Calculating PC for China Henan HIV negative for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_China_Henan_HIV_negative_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "China Henan HIV negative" -c

Plotting results
Calculating JS divergence
JS divergence: 0.3659312369131319
Calculating PC for China Henan HIV negative for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_China_Henan_HIV_negative_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "China He

Plotting results
Calculating JS divergence
JS divergence: 0.3944862743068137
Calculating PC for China Hubei Han for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_China_Hubei_Han_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "China Hubei Han" -c I -f C

iedb_afnd_pc_results\hlathena_China_Inner_Mongolia_Region_real.txt
Calculating PC for China Inner Mongolia Region for ideal dataset hlathena
The original dataset hlathena has 142300 peptides
The population has 144 alleles across 3 loci
Ideal dataset has 139794 peptides across 99 alleles
    index   Allele Locus  ideal_n_peptides
0     704  A*01:01     A              2561
1     705  A*02:01     A              6024
2     707  A*02:03     A               237
3     708  A*02:05     A               237
4     709  A*02:06     A              1185
..    ...      ...   ...               ...
94  40960  C*14:02     C               948
95  40961  C*14:03     C               474
96  40962  C*15:02     C              2561
97  40963  C*15:04     C               474
98  40964  C*15:05     C               237

[99 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\hlathena_China_Inner_Mongolia_Region_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_p

Plotting results
Calculating JS divergence
JS divergence: 0.6037541930348097
Calculating PC for China Marrow Donor Registry for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_China_Marrow_Donor_Registry_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "China Marrow Donor Reg

iedb_afnd_pc_results\mhcflurry_China_North_Han_real.txt
Calculating PC for China North Han for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 1014 alleles across 3 loci
Ideal dataset has 187871 peptides across 91 alleles
    index   Allele Locus  ideal_n_peptides
0     790  A*01:01     A              4437
1     802  A*02:01     A             11874
2     804  A*02:03     A              1499
3     806  A*02:05     A               874
4     807  A*02:06     A              4437
..    ...      ...   ...               ...
86  41144  C*15:02     C              3874
87  41146  C*15:04     C               312
88  41147  C*15:05     C               624
89  41150  C*15:08     C               312
90  41162  C*18:01     C               312

[91 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\mhcflurry_China_North_Han_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_China_North

Plotting results
Calculating JS divergence
JS divergence: 0.12799478728787628
Calculating PC for China Qinghai Province Hui for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_China_Qinghai_Province_Hui_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "China Qinghai Province Hui" -c I

Plotting results
Calculating JS divergence
JS divergence: 0.24653209398643564
Calculating PC for China Shanxi HIV negative for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_China_Shanxi_HIV_negative_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "China Shanxi HIV negative" -c I -f

Calculating JS divergence
JS divergence: 0.5437189465935627
Calculating PC for China Sichuan HIV negative for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_China_Sichuan_HIV_negative_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "China Sichuan HIV negative" -c I -f C:\Us

iedb_afnd_pc_results\mhcflurry_China_South_Han_real.txt
Calculating PC for China South Han for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 88 alleles across 3 loci
Ideal dataset has 187170 peptides across 88 alleles
    index   Allele Locus  ideal_n_peptides
0    1172  A*01:01     A               312
1    1173  A*02:01     A              3312
2    1174  A*02:03     A              6749
3    1175  A*02:06     A              2187
4    1176  A*02:07     A              5874
..    ...      ...   ...               ...
83  41241  C*12:03     C               999
84  41242  C*14:02     C              2249
85  41243  C*14:03     C               124
86  41244  C*15:02     C              2312
87  41245  C*15:05     C               749

[88 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\mhcflurry_China_South_Han_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_China_South_H

Plotting results
Calculating JS divergence
JS divergence: 0.5265820453845454
Calculating PC for China Southwest Dai for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_China_Southwest_Dai_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "China Southwest Dai" -c I -f C:\Users\ac121\Doc

iedb_afnd_pc_results\netmhc_China_Tibet_Region_Tibetan_real.txt
Calculating PC for China Tibet Region Tibetan for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 1040 alleles across 3 loci
Ideal dataset has 169175 peptides across 88 alleles
    index   Allele Locus  ideal_n_peptides
0    1207  A*01:01     A              1247
1    1218  A*02:01     A             12361
2    1221  A*02:04     A               170
3    1223  A*02:06     A              5726
4    1224  A*02:07     A               907
..    ...      ...   ...               ...
83  41371  C*12:03     C              1417
84  41384  C*14:02     C              4309
85  41389  C*15:02     C              5386
86  41392  C*15:05     C               170
87  41395  C*15:08     C               170

[88 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\netmhc_China_Tibet_Region_Tibetan_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\

           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_China_Uyghur_HIV_negative_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "China Uyghur HIV negative" -c I -f C:\Users\ac121\Documents\HLAequity\HLAlleleBias\Datasets_coverage\datasets\netmhc_peptides.csv > iedb_afnd_pc_results\netmhc_China_Uyghur_HIV_negative_real.txt
iedb_afnd_pc_results\

Plotting results
Calculating JS divergence
JS divergence: 0.5530252544616806
Calculating PC for China Yunnan Bulang for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_China_Yunnan_Bulang_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "China Yunnan Bulang" -c I -f C:\Users\

iedb_afnd_pc_results\hlathena_China_Yunnan_Bulang_real.txt
Calculating PC for China Yunnan Bulang for ideal dataset hlathena
The original dataset hlathena has 142300 peptides
The population has 44 alleles across 3 loci
Ideal dataset has 142227 peptides across 44 alleles
    index   Allele Locus  ideal_n_peptides
0    1532  A*02:01     A               616
1    1533  A*02:03     A              3889
2    1534  A*02:07     A               189
3    1535  A*11:01     A             25756
4    1536  A*24:02     A             11241
5    1537  A*24:07     A              4885
6    1538  A*30:01     A               189
7    1539  A*31:01     A               189
8    1540  A*33:03     A               426
9   17874  B*07:05     B              1423
10  17875  B*13:01     B               426
11  17876  B*13:02     B               189
12  17877  B*15:01     B              1043
13  17878  B*15:02     B             16981
14  17879  B*15:07     B              1849
15  17880  B*15:21     B               18

Plotting results
Calculating JS divergence
JS divergence: 0.581810781097555
Calculating PC for China Yunnan Hani for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_China_Yunnan_Hani_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "China Yunnan Hani" -c I

Plotting results
Calculating JS divergence
JS divergence: 0.4854773528618165
Calculating PC for China Yunnan Province Han for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_China_Yunnan_Province_Han_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "China Yunnan Province Han" -c I -f 

Plotting results
Calculating JS divergence
JS divergence: 0.561864486620042
Calculating PC for China Yunnan Province Lisu for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_China_Yunnan_Province_Lisu_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "China Yunnan Province Lis

Calculating JS divergence
JS divergence: 0.5394382803860398
Calculating PC for China Yunnan Province Lisu for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_China_Yunnan_Province_Lisu_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "China Yunnan Province

Plotting results
Calculating JS divergence
JS divergence: 0.3863778278420952
Calculating PC for China Zhejiang Han for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_China_Zhejiang_Han_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "China Zhejiang Han" 

Plotting results
Calculating JS divergence
JS divergence: 0.6587368729428451
Calculating PC for Colombia Arhuaco NA-DHS_16 (G) for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Colombia_Arhuaco_NA-DHS_16_(G)_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py 

             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Colombia_Bogota_Cord_Blood_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Colombia Bogota Cord Blood" -c I -f C:\Users\ac121\Documents\HLAequity\HLAlleleBias\Datasets_coverage\datasets\hlathena_peptides.csv > iedb_afnd_pc_results\hlathena_Colombia_Bogota_Cord_Bl

             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Colombia_Embera_NA-DHS_19_(G)_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Colombia Embera NA-DHS_19 (G)" -c I -f C:\Users\ac121\Documents\HLAequity\HLAlleleBias\Datasets_coverage\datasets\hlathena_peptides.csv > iedb_afnd_pc_results\hlathena_Colombia_Embera_N

Plotting results
Calculating JS divergence
JS divergence: 0.5836280986455366
Calculating PC for Colombia Inga NA-DHS_11 (G) for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Colombia_Inga_NA-DHS_11_(G)_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Co

Plotting results
Calculating JS divergence
JS divergence: 0.6853785801029327
Calculating PC for Colombia Kogi NA-DHS_17 (G) for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Colombia_Kogi_NA-DHS_17_(G)_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Co

             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Colombia_North_Chimila_Amerindians_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Colombia North Chimila Amerindians" -c I -f C:\Users\ac121\Documents\HLAequity\HLAlleleBias\Datasets_coverage\datasets\hlathena_peptides.csv > iedb_afnd_pc_results\hlathena_Colombi

iedb_afnd_pc_results\hlathena_Colombia_North_Wiwa_El_Encanto_real.txt
Calculating PC for Colombia North Wiwa El Encanto for ideal dataset hlathena
The original dataset hlathena has 142300 peptides
The population has 67 alleles across 3 loci
Ideal dataset has 142221 peptides across 67 alleles
    index   Allele Locus  ideal_n_peptides
0    1725  A*01:01     A               910
1    1726  A*02:01     A              5018
2    1727  A*02:14     A               455
3    1728  A*03:01     A              1366
4    1729  A*11:01     A               910
..    ...      ...   ...               ...
62  41607  C*07:05     C               455
63  41608  C*08:02     C              1826
64  41609  C*12:03     C              1366
65  41610  C*15:02     C               455
66  41611  C*16:01     C              2281

[67 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\hlathena_Colombia_North_Wiwa_El_Encanto_ideal.csv
Running IEDB tool for population coverage and saving output to: ied

iedb_afnd_pc_results\hlathena_Colombia_Waunana_NA-DHS_20_(G)_real.txt
Calculating PC for Colombia Waunana NA-DHS_20 (G) for ideal dataset hlathena
The original dataset hlathena has 142300 peptides
The population has 24 alleles across 3 loci
Ideal dataset has 142287 peptides across 24 alleles
    index   Allele Locus  ideal_n_peptides
0    1748  A*02:01     A              1185
1    1749  A*02:11     A             13044
2    1750  A*02:13     A              2371
3    1751  A*24:02     A             28460
4    1752  A*24:03     A              1185
5    1753  A*30:02     A              1185
6   18242  B*15:04     B              7115
7   18243  B*15:39     B              1185
8   18244  B*18:01     B              2371
9   18245  B*35:04     B              1185
10  18246  B*35:05     B              1185
11  18247  B*35:10     B              3557
12  18248  B*39:05     B              7115
13  18249  B*40:02     B             11858
14  18250  B*40:04     B              8300
15  18251  B*51:01 

Plotting results
Calculating JS divergence
JS divergence: 0.5105780370052934
Calculating PC for Colombia Wayuu NA-DHS_15 (G) for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Colombia_Wayuu_NA-DHS_15_(G)_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "

Ideal dataset has 170112 peptides across 23 alleles
    index   Allele Locus  ideal_n_peptides
0    1766  A*02:06     A              4723
1    1767  A*02:22     A              9452
2    1768  A*24:02     A             23627
3    1769  A*24:03     A              2364
4    1770  A*31:01     A              4723
5    1771  A*68:01     A             11811
6   18266  B*07:02     B              1774
7   18267  B*18:01     B              1774
8   18268  B*35:01     B              3543
9   18269  B*35:12     B              1774
10  18270  B*35:43     B             10631
11  18271  B*35:49     B              8862
12  18272  B*39:05     B              1774
13  18273  B*39:19     B              1774
14  18274  B*40:02     B             14175
15  18275  B*51:10     B             10631
16  41630  C*01:02     C             11811
17  41631  C*03:04     C             11811
18  41632  C*03:05     C              4723
19  41633  C*04:01     C              9452
20  41634  C*07:02     C              2364
21

iedb_afnd_pc_results\netmhc_Colombia_Brazil_Ticuna_Arara_NA-DHS_21_(G)_real.txt
Calculating PC for Colombia/Brazil Ticuna Arara NA-DHS_21 (G) for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 21 alleles across 3 loci
Ideal dataset has 170080 peptides across 21 alleles
    index   Allele Locus  ideal_n_peptides
0    1772  A*02:11     A              1888
1    1773  A*02:13     A              3782
2    1774  A*24:02     A             28351
3    1775  A*31:01     A             22680
4   18276  B*15:03     B              2024
5   18277  B*15:04     B              2024
6   18278  B*35:04     B              6072
7   18279  B*35:20     B              2024
8   18280  B*39:02     B              2024
9   18281  B*39:03     B             16199
10  18282  B*39:05     B              4048
11  18283  B*39:09     B              2024
12  18284  B*40:02     B             14175
13  18285  B*52:01     B              6072
14  41637  C*02:10     C              2024
1

iedb_afnd_pc_results\netmhc_Colombia_Brazil_Ticuna_Tarapaca_NA-DHS_22_(G)_real.txt
Calculating PC for Colombia/Brazil Ticuna Tarapaca NA-DHS_22 (G) for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 20 alleles across 3 loci
Ideal dataset has 170076 peptides across 20 alleles
    index   Allele Locus  ideal_n_peptides
0    1776  A*02:01     A              4473
1    1777  A*02:11     A              5970
2    1778  A*24:02     A             29842
3    1779  A*24:03     A              2982
4    1780  A*31:01     A             13427
5   18286  B*15:04     B              4473
6   18287  B*35:04     B              8953
7   18288  B*35:06     B              1491
8   18289  B*35:20     B              4473
9   18290  B*39:03     B              2982
10  18291  B*39:05     B              2982
11  18292  B*40:02     B             25368
12  18293  B*40:04     B              1491
13  18294  B*52:01     B              4473
14  41644  C*01:02     C              

iedb_afnd_pc_results\netmhc_Costa_Rica_African_-Caribbean_(G)_real.txt
Calculating PC for Costa Rica African -Caribbean (G) for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 96 alleles across 3 loci
Ideal dataset has 169498 peptides across 96 alleles
    index   Allele Locus  ideal_n_peptides
0    1781  A*01:01     A              2494
1    1782  A*01:02     A               567
2    1783  A*02:01     A              4706
3    1784  A*02:02     A              3628
4    1785  A*02:05     A               567
..    ...      ...   ...               ...
91  41669  C*15:09     C               283
92  41670  C*16:01     C              3912
93  41671  C*16:02     C               283
94  41672  C*17:01     C              4989
95  41673  C*18:01     C              2211

[96 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\netmhc_Costa_Rica_African_-Caribbean_(G)_ideal.csv
Running IEDB tool for population coverage and saving output to: ie

Plotting results
Calculating JS divergence
JS divergence: 0.5519654844307749
Calculating PC for Costa Rica Amerindians (G) for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Costa_Rica_Amerindians_(G)_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Cost

Calculating JS divergence
JS divergence: 0.6856616322096291
Calculating PC for Costa Rica Cabecar NA-DHS_9 (G) for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Costa_Rica_Cabecar_NA-DHS_9_(G)_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Costa Rica 

             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Costa_Rica_Central_Valley_Mestizo_(G)_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Costa Rica Central Valley Mestizo (G)" -c I -f C:\Users\ac121\Documents\HLAequity\HLAlleleBias\Datasets_coverage\datasets\hlathena_peptides.csv > iedb_afnd_pc_results\hlathena_C

iedb_afnd_pc_results\hlathena_Costa_Rica_Guanacaste_Mestizo_(G)_real.txt
Calculating PC for Costa Rica Guanacaste Mestizo (G) for ideal dataset hlathena
The original dataset hlathena has 142300 peptides
The population has 108 alleles across 3 loci
Ideal dataset has 142379 peptides across 108 alleles
     index   Allele Locus  ideal_n_peptides
0     1878  A*01:01     A              2181
1     1879  A*02:01     A              7968
2     1880  A*02:02     A               664
3     1881  A*02:05     A              1280
4     1882  A*02:06     A              1517
..     ...      ...   ...               ...
103  41742  C*15:09     C               426
104  41743  C*16:01     C              2371
105  41744  C*16:02     C               237
106  41745  C*17:01     C              1517
107  41746  C*18:01     C               664

[108 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\hlathena_Costa_Rica_Guanacaste_Mestizo_(G)_ideal.csv
Running IEDB tool for population coverage a

Ideal dataset has 142303 peptides across 16 alleles
    index    Allele Locus  ideal_n_peptides
0    1910   A*02:01     A              1318
1    1911   A*02:22     A             21079
2    1912   A*24:02     A             17128
3    1913   A*30:01     A              1318
4    1914   A*31:01     A              2637
5    1915   A*68:01     A              3951
6   18490   B*15:01     B              2637
7   18491   B*35:01     B             19765
8   18492  B*35:102     B              1318
9   18493   B*35:43     B             13176
10  18494   B*35:49     B              1318
11  18495   B*40:02     B              7907
12  18496   B*51:01     B              1318
13  41747   C*01:02     C             17128
14  41748   C*03:05     C              7907
15  41749   C*04:01     C             22398
Saved dummy ideal dataset to: iedb_afnd_pc_results\hlathena_Costa_Rica_Guaymi_NA-DHS_10_(G)_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Cost

Plotting results
Calculating JS divergence
JS divergence: 0.5629563512438951
Calculating PC for England Blood Donors of Mixed Ethnicity for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_England_Blood_Donors_of_Mixed_Ethnicity_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p

iedb_afnd_pc_results\mhcflurry_England_North_West_real.txt
Calculating PC for England North West for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 90 alleles across 3 loci
Ideal dataset has 187427 peptides across 90 alleles
    index   Allele Locus  ideal_n_peptides
0    2010  A*01:01     A             12998
1    2011  A*01:06     A               124
2    2012  A*02:01     A             18061
3    2013  A*02:05     A               624
4    2014  A*02:20     A               124
..    ...      ...   ...               ...
85  41849  C*15:02     C              1437
86  41850  C*15:05     C               187
87  41851  C*16:01     C              2812
88  41852  C*16:02     C               187
89  41853  C*17:01     C               124

[90 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\mhcflurry_England_North_West_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_Engl

Plotting results
Calculating JS divergence
JS divergence: 0.2742195690333527
Calculating PC for Finland for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Finland_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Finland" -c I -f C:\Users\ac121\Documents\HLAequity\HLAlleleBias\Datase

Plotting results
Calculating JS divergence
JS divergence: 0.6385800420833955
Calculating PC for France Southeast for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_France_Southeast_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "France Southeast" -c I -f C:\Users\ac121\Doc

iedb_afnd_pc_results\mhcflurry_Gaza_real.txt
Calculating PC for Gaza for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 79 alleles across 3 loci
Ideal dataset has 187406 peptides across 79 alleles
    index   Allele Locus  ideal_n_peptides
0    2075  A*01:01     A             11161
1    2076  A*02:01     A              7436
2    2077  A*02:02     A               743
3    2078  A*02:05     A              3718
4    2079  A*02:08     A               743
..    ...      ...   ...               ...
74  41910  C*16:01     C              2231
75  41911  C*16:02     C              1487
76  41912  C*16:04     C              2249
77  41913  C*17:01     C              5949
78  41914  C*17:03     C              1487

[79 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\mhcflurry_Gaza_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_Gaza_ideal.txt
Plotting results
Calculating JS

           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Georgia_Tibilisi_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Georgia Tibilisi" -c I -f C:\Users\ac121\Documents\HLAequity\HLAlleleBias\Datasets_coverage\datasets\netmhc_peptides.csv > iedb_afnd_pc_results\netmhc_Georgia_Tibilisi_real.txt
iedb_afnd_pc_results\netmhc_Georgia_Tibilisi_rea

Plotting results
Calculating JS divergence
JS divergence: 0.27416123894676087
Calculating PC for Georgia Tibilisi Kurd for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Georgia_Tibilisi_Kurd_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Georgia Tibilisi Kurd" -c I -f C:\Users\ac

Plotting results
Calculating JS divergence
JS divergence: 0.5049509887209002
Calculating PC for Germany DKMS - Austria minority for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_Germany_DKMS_-_Austria_minority_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Germany DKMS -

iedb_afnd_pc_results\mhcflurry_Germany_DKMS_-_Bosnia_and_Herzegovina_minority_real.txt
Calculating PC for Germany DKMS - Bosnia and Herzegovina minority for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 112 alleles across 3 loci
Ideal dataset has 187407 peptides across 112 alleles
     index   Allele Locus  ideal_n_peptides
0     2178  A*01:01     A              9455
1     2179  A*02:01     A             19060
2     2180  A*02:02     A                93
3     2181  A*02:05     A               549
4     2182  A*02:11     A                62
..     ...      ...   ...               ...
107  42012  C*16:01     C               456
108  42013  C*16:02     C               456
109  42014  C*16:04     C               306
110  42015  C*17:01     C               712
111  42016  C*18:01     C                62

[112 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\mhcflurry_Germany_DKMS_-_Bosnia_and_Herzegovina_minority_ideal.csv


Ideal dataset has 187019 peptides across 171 alleles
     index   Allele Locus  ideal_n_peptides
0     2210  A*01:01     A              2374
1     2211  A*02:01     A              7661
2     2212  A*02:03     A              2093
3     2213  A*02:05     A               268
4     2214  A*02:06     A              3393
..     ...      ...   ...               ...
166  42045  C*15:04     C                24
167  42046  C*15:05     C               537
168  42047  C*16:02     C               112
169  42048  C*16:04     C                74
170  42049  C*17:01     C               168

[171 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\mhcflurry_Germany_DKMS_-_China_minority_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_Germany_DKMS_-_China_minority_ideal.txt
Plotting results
Calculating JS divergence
JS divergence: 0.17735797949635254
Calculating PC for Germany DKMS - China minority for dataset netmhc
Saving output

Plotting results
Calculating JS divergence
JS divergence: 0.16637787599437248
Calculating PC for Germany DKMS - Croatia minority for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Germany_DKMS_-_Croatia_minority_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Germany DKMS - Croatia

iedb_afnd_pc_results\netmhc_Germany_DKMS_-_France_minority_real.txt
Calculating PC for Germany DKMS - France minority for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 157 alleles across 3 loci
Ideal dataset has 169881 peptides across 157 alleles
     index   Allele Locus  ideal_n_peptides
0     2299  A*01:01     A              7802
1     2300  A*01:03     A                39
2     2301  A*02:01     A             14583
3     2302  A*02:02     A                45
4     2303  A*02:03     A                22
..     ...      ...   ...               ...
152  42117  C*16:01     C              2268
153  42118  C*16:02     C               374
154  42119  C*16:04     C               102
155  42120  C*17:01     C               584
156  42121  C*18:01     C                22

[157 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\netmhc_Germany_DKMS_-_France_minority_ideal.csv
Running IEDB tool for population coverage and saving output 

Plotting results
Calculating JS divergence
JS divergence: 0.29134322239939997
Calculating PC for Germany DKMS - German donors for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Germany_DKMS_-_German_donors_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p 

iedb_afnd_pc_results\hlathena_Germany_DKMS_-_Greece_minority_real.txt
Calculating PC for Germany DKMS - Greece minority for ideal dataset hlathena
The original dataset hlathena has 142300 peptides
The population has 153 alleles across 3 loci
Ideal dataset has 142133 peptides across 153 alleles
     index   Allele Locus  ideal_n_peptides
0     3176  A*01:01     A              4923
1     3177  A*01:02     A                14
2     3178  A*01:03     A                37
3     3179  A*02:01     A             12470
4     3180  A*02:02     A                61
..     ...      ...   ...               ...
148  43049  C*15:09     C                23
149  43050  C*16:01     C               275
150  43051  C*16:02     C              1090
151  43052  C*16:04     C               450
152  43053  C*17:01     C               702

[153 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\hlathena_Germany_DKMS_-_Greece_minority_ideal.csv
Running IEDB tool for population coverage and saving

Plotting results
Calculating JS divergence
JS divergence: 0.5269783553018812
Calculating PC for Germany DKMS - Netherlands minority for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_Germany_DKMS_-_Netherlands_minority_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "German

iedb_afnd_pc_results\mhcflurry_Germany_DKMS_-_Portugal_minority_real.txt
Calculating PC for Germany DKMS - Portugal minority for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 147 alleles across 3 loci
Ideal dataset has 186977 peptides across 147 alleles
     index   Allele Locus  ideal_n_peptides
0     3304  A*01:01     A              7236
1     3305  A*01:02     A                81
2     3306  A*01:03     A                24
3     3307  A*02:01     A             16023
4     3308  A*02:02     A               318
..     ...      ...   ...               ...
142  43160  C*16:01     C              2524
143  43161  C*16:02     C               956
144  43162  C*16:04     C                24
145  43163  C*17:01     C               693
146  43164  C*18:01     C                81

[147 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\mhcflurry_Germany_DKMS_-_Portugal_minority_ideal.csv
Running IEDB tool for population coverage 

Plotting results
Calculating JS divergence
JS divergence: 0.15449463901002855
Calculating PC for Germany DKMS - Romania minority for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Germany_DKMS_-_Romania_minority_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Germany DKMS - Romania

iedb_afnd_pc_results\netmhc_Germany_DKMS_-_Spain_minority_real.txt
Calculating PC for Germany DKMS - Spain minority for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 160 alleles across 3 loci
Ideal dataset has 169720 peptides across 160 alleles
     index   Allele Locus  ideal_n_peptides
0     3385  A*01:01     A              6741
1     3386  A*01:02     A               153
2     3387  A*01:03     A                28
3     3388  A*02:01     A             13421
4     3389  A*02:02     A                79
..     ...      ...   ...               ...
155  43230  C*16:01     C              3725
156  43231  C*16:02     C               578
157  43232  C*16:04     C                51
158  43233  C*17:01     C               385
159  43234  C*18:01     C                51

[160 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\netmhc_Germany_DKMS_-_Spain_minority_ideal.csv
Running IEDB tool for population coverage and saving output to:

Plotting results
Calculating JS divergence
JS divergence: 0.24606959244814153
Calculating PC for Germany DKMS - Turkey minority for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Germany_DKMS_-_Turkey_minority_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py

iedb_afnd_pc_results\hlathena_Germany_DKMS_-_United_Kingdom_minority_real.txt
Calculating PC for Germany DKMS - United Kingdom minority for ideal dataset hlathena
The original dataset hlathena has 142300 peptides
The population has 132 alleles across 3 loci
Ideal dataset has 141819 peptides across 132 alleles
     index   Allele Locus  ideal_n_peptides
0     3493  A*01:01     A              8552
1     3494  A*01:02     A                47
2     3495  A*02:01     A             12991
3     3496  A*02:02     A                47
4     3497  A*02:05     A               407
..     ...      ...   ...               ...
127  43307  C*15:06     C                23
128  43308  C*16:01     C              1498
129  43309  C*16:02     C                90
130  43310  C*17:01     C               521
131  43311  C*18:01     C                23

[132 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\hlathena_Germany_DKMS_-_United_Kingdom_minority_ideal.csv
Running IEDB tool for popula

Plotting results
Calculating JS divergence
JS divergence: 0.5889683597580377
Calculating PC for Germany pop 8 for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_Germany_pop_8_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Germany pop 8" -c I -f C:\Users\ac121\Documents\HL

iedb_afnd_pc_results\mhcflurry_Ghana_Ga-Adangbe_real.txt
Calculating PC for Ghana Ga-Adangbe for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 73 alleles across 3 loci
Ideal dataset has 187454 peptides across 73 alleles
    index   Allele Locus  ideal_n_peptides
0    3735  A*01:01     A              1431
1    3736  A*02:01     A              6680
2    3737  A*02:02     A              4055
3    3738  A*02:05     A               956
4    3739  A*02:06     A               237
..    ...      ...   ...               ...
68  43498  C*15:04     C               237
69  43499  C*15:05     C               718
70  43500  C*16:01     C             10736
71  43501  C*17:01     C              8349
72  43502  C*18:01     C              2862

[73 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\mhcflurry_Ghana_Ga-Adangbe_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_Ghana_Ga-A

Plotting results
Calculating JS divergence
JS divergence: 0.21213566545285611
Calculating PC for Greece pop 6 for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Greece_pop_6_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Greece pop 6" -c I -f C:\Users\ac121\Documents\HLAequity\HLA

iedb_afnd_pc_results\netmhc_Greece_pop_8_real.txt
Calculating PC for Greece pop 8 for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 79 alleles across 3 loci
Ideal dataset has 169736 peptides across 79 alleles
    index   Allele Locus  ideal_n_peptides
0    3788  A*01:01     A              8199
1    3789  A*02:01     A             17418
2    3790  A*02:05     A               686
3    3791  A*03:01     A              4780
4    3792  A*11:01     A              4099
..    ...      ...   ...               ...
74  43548  C*15:02     C              3759
75  43549  C*16:02     C              2046
76  43550  C*16:04     C               340
77  43551  C*17:01     C               340
78  43552  C*17:03     C               340

[79 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\netmhc_Greece_pop_8_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Greece_pop_8_ideal.txt
Plotting resul

iedb_afnd_pc_results\netmhc_Guatemala_Kaqchikel_NA-DHS_8_(G)_real.txt
Calculating PC for Guatemala Kaqchikel NA-DHS_8 (G) for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 48 alleles across 3 loci
Ideal dataset has 170049 peptides across 48 alleles
    index   Allele Locus  ideal_n_peptides
0    3806  A*02:01     A             14924
1    3807  A*03:01     A              1491
2    3808  A*11:01     A              1491
3    3809  A*24:02     A             13427
4    3810  A*25:01     A              1491
5    3811  A*26:01     A              2982
6    3812  A*29:02     A              1491
7    3813  A*30:01     A              1491
8    3814  A*30:02     A              2982
9    3815  A*31:01     A              1491
10   3816  A*66:01     A              1491
11   3817  A*68:01     A              2982
12   3818  A*68:03     A              7462
13   3819  A*68:05     A              1491
14  21286  B*18:01     B              2982
15  21287  B*27:05   

Ideal dataset has 187465 peptides across 162 alleles
     index   Allele Locus  ideal_n_peptides
0     3820  A*01:01     A               524
1     3821  A*01:03     A                18
2     3822  A*02:01     A             11817
3     3823  A*02:03     A              4749
4     3824  A*02:05     A                68
..     ...      ...   ...               ...
157  43599  C*15:05     C               637
158  43600  C*16:01     C                43
159  43601  C*16:02     C                56
160  43602  C*16:04     C                 6
161  43603  C*17:01     C                12

[162 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\mhcflurry_Hong_Kong_Chinese_BMDR_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_Hong_Kong_Chinese_BMDR_ideal.txt
Plotting results
Calculating JS divergence
JS divergence: 0.32531479180706124
Calculating PC for Hong Kong Chinese BMDR for dataset netmhc
Saving outputs to: iedb_afnd_pc_re

Plotting results
Calculating JS divergence
JS divergence: 0.37505685175663717
Calculating PC for Hong Kong Chinese HKBMDR HLA 11 loci for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Hong_Kong_Chinese_HKBMDR_HLA_11_loci_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Hong Kong Ch

iedb_afnd_pc_results\netmhc_India_Andhra_Pradesh_Telugu_Speaking_real.txt
Calculating PC for India Andhra Pradesh Telugu Speaking for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 88 alleles across 3 loci
Ideal dataset has 166770 peptides across 88 alleles
    index   Allele Locus  ideal_n_peptides
0    3902  A*01:01     A              8386
1    3903  A*02:01     A              1831
2    3904  A*02:03     A              1372
3    3905  A*02:06     A               759
4    3906  A*02:09     A               306
..    ...      ...   ...               ...
83  43674  C*15:02     C              6248
84  43675  C*15:04     C               306
85  43676  C*15:05     C               306
86  43677  C*15:07     C               306
87  43678  C*16:02     C              2132

[88 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\netmhc_India_Andhra_Pradesh_Telugu_Speaking_ideal.csv
Running IEDB tool for population coverage and saving outp

Plotting results
Calculating JS divergence
JS divergence: 0.3620461621384537
Calculating PC for India Karnataka Kannada Speaking for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_India_Karnataka_Kannada_Speaking_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage

iedb_afnd_pc_results\hlathena_India_Kerala_Malayalam_speaking_real.txt
Calculating PC for India Kerala Malayalam speaking for ideal dataset hlathena
The original dataset hlathena has 142300 peptides
The population has 118 alleles across 3 loci
Ideal dataset has 137286 peptides across 118 alleles
     index   Allele Locus  ideal_n_peptides
0     3950  A*01:01     A              4065
1     3951  A*02:01     A              3462
2     3952  A*02:03     A               284
3     3953  A*02:05     A               142
4     3954  A*02:06     A               398
..     ...      ...   ...               ...
113  43730  C*15:05     C               474
114  43731  C*15:07     C                47
115  43732  C*15:64     C                47
116  43733  C*16:02     C              1755
117  43734  C*17:01     C               142

[118 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\hlathena_India_Kerala_Malayalam_speaking_ideal.csv
Running IEDB tool for population coverage and sav

Plotting results
Calculating JS divergence
JS divergence: 0.517099023321074
Calculating PC for India Khandesh Region Pawra for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_India_Khandesh_Region_Pawra_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Ind

           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_India_Mumbai_Maratha_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "India Mumbai Maratha" -c I -f C:\Users\ac121\Documents\HLAequity\HLAlleleBias\Datasets_coverage\datasets\netmhc_peptides.csv > iedb_afnd_pc_results\netmhc_India_Mumbai_Maratha_real.txt
iedb_afnd_pc_results\netmhc_India_Mu

Plotting results
Calculating JS divergence
JS divergence: 0.5383082103323523
Calculating PC for India New Delhi for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_India_New_Delhi_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "India New Delhi" -c I -f C:\Users\ac121\Docume

iedb_afnd_pc_results\mhcflurry_India_Tamil_Nadu_real.txt
Calculating PC for India Tamil Nadu for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 169 alleles across 3 loci
Ideal dataset has 187347 peptides across 169 alleles
     index    Allele Locus  ideal_n_peptides
0     4079   A*01:01     A              9955
1     4080  A*01:136     A                31
2     4081   A*02:01     A              3368
3     4082   A*02:03     A               731
4     4083   A*02:05     A               199
..     ...       ...   ...               ...
164  43862   C*15:07     C               199
165  43863   C*16:02     C              1606
166  43864   C*16:04     C                49
167  43865   C*16:12     C                12
168  43866   C*17:01     C               112

[169 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\mhcflurry_India_Tamil_Nadu_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_r

Plotting results
Calculating JS divergence
JS divergence: 0.35979927577626475
Calculating PC for India Tamil Nadu Nadar for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_India_Tamil_Nadu_Nadar_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "India Tamil Nadu Nadar" -c I -f C:\Users

Ideal dataset has 183052 peptides across 72 alleles
    index   Allele Locus  ideal_n_peptides
0    4134  A*01:01     A               624
1    4135  A*01:06     A              4999
2    4142  A*03:01     A               624
3    4143  A*03:02     A              1249
4    4144  A*11:01     A              5624
..    ...      ...   ...               ...
67  43904  C*14:02     C              1249
68  43907  C*15:02     C              1874
69  43909  C*15:08     C              1874
70  43910  C*16:01     C              1874
71  43911  C*18:01     C               624

[72 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\mhcflurry_India_West_Coast_Parsi_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_India_West_Coast_Parsi_ideal.txt
Plotting results
Calculating JS divergence
JS divergence: 0.5522528872960905
Calculating PC for India West Coast Parsi for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
         

           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Iran_Baloch_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Iran Baloch" -c I -f C:\Users\ac121\Documents\HLAequity\HLAlleleBias\Datasets_coverage\datasets\netmhc_peptides.csv > iedb_afnd_pc_results\netmhc_Iran_Baloch_real.txt
iedb_afnd_pc_results\netmhc_Iran_Baloch_real.txt
Calculating PC

Ideal dataset has 170345 peptides across 85 alleles
    index   Allele Locus  ideal_n_peptides
0    4211  A*01:01     A              1757
1    4212  A*01:02     A              2665
2    4213  A*02:01     A              6180
3    4214  A*02:34     A              2211
4    4215  A*02:48     A               453
..    ...      ...   ...               ...
80  43948  C*12:03     C              5318
81  43949  C*14:02     C              1774
82  43950  C*15:02     C              3543
83  43951  C*16:02     C              1326
84  43952  C*18:01     C               442

[85 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\netmhc_Iran_Gorgan_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Iran_Gorgan_ideal.txt
Plotting results
Calculating JS divergence
JS divergence: 0.3276856699887318
Calculating PC for Iran Gorgan for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0     

             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Ireland_Northern_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Ireland Northern" -c I -f C:\Users\ac121\Documents\HLAequity\HLAlleleBias\Datasets_coverage\datasets\hlathena_peptides.csv > iedb_afnd_pc_results\hlathena_Ireland_Northern_real.txt
iedb_afnd_pc_resu

Ideal dataset has 142159 peptides across 82 alleles
    index   Allele Locus  ideal_n_peptides
0    4323  A*01:01     A             11858
1    4324  A*02:01     A             11668
2    4325  A*02:05     A               189
3    4326  A*03:01     A              5502
4    4327  A*11:01     A              2940
..    ...      ...   ...               ...
77  44036  C*15:02     C               379
78  44037  C*16:01     C              1897
79  44038  C*16:02     C               189
80  44039  C*17:01     C               284
81  44040  C*17:03     C               379

[82 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\hlathena_Ireland_South_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Ireland_South_ideal.txt
Plotting results
Calculating JS divergence
JS divergence: 0.6293028295727031
Calculating PC for Israel Arab Druze for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele

           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_Israel_Ashkenazi_and_Non_Ashkenazi_Jews_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Israel Ashkenazi and Non Ashkenazi Jews" -c I -f C:\Users\ac121\Documents\HLAequity\HLAlleleBias\Datasets_coverage\datasets\mhcflurry_peptides.csv > iedb_afnd_pc_results\mhcflurry_Israel_Ashkenazi

iedb_afnd_pc_results\mhcflurry_Italy_pop_5_real.txt
Calculating PC for Italy pop 5 for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 190 alleles across 3 loci
Ideal dataset has 187586 peptides across 141 alleles
     index   Allele Locus  ideal_n_peptides
0     4412  A*01:01     A              6374
1     4413  A*01:02     A               187
2     4414  A*02:01     A             15061
3     4415  A*02:02     A               249
4     4416  A*02:04     A                62
..     ...      ...   ...               ...
136  44136  C*16:01     C              2312
137  44137  C*16:02     C              1249
138  44138  C*16:04     C               187
139  44140  C*17:01     C               499
140  44141  C*17:03     C               187

[141 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\mhcflurry_Italy_pop_5_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_Italy_pop_

Plotting results
Calculating JS divergence
JS divergence: 0.35370914788425273
Calculating PC for Japan Central for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Japan_Central_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Japan Central" -c I -f C:\Users\ac121\Documents\HLAequity\

iedb_afnd_pc_results\netmhc_Japan_pop_16_real.txt
Calculating PC for Japan pop 16 for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 148 alleles across 3 loci
Ideal dataset has 170188 peptides across 148 alleles
     index   Allele Locus  ideal_n_peptides
0     4480  A*01:01     A               226
1     4481  A*02:01     A              6588
2     4482  A*02:03     A                34
3     4483  A*02:05     A                 5
4     4484  A*02:06     A              5148
..     ...      ...   ...               ...
143  44184  C*15:02     C              1746
144  44185  C*15:05     C                11
145  44186  C*15:10     C                 5
146  44187  C*16:04     C                 5
147  44188  C*17:01     C                 5

[148 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\netmhc_Japan_pop_16_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Japan_pop_16_ideal.txt

Calculating JS divergence
JS divergence: 0.4346807781987884
Calculating PC for Japan pop 3 for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Japan_pop_3_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Japan pop 3" -c I -f C:\Users\ac121\Documents\HLAeq

Plotting results
Calculating JS divergence
JS divergence: 0.4287001460073635
Calculating PC for Japan pop 5 for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Japan_pop_5_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Japan pop 5" -c I -f C:\Users\ac12

Plotting results
Calculating JS divergence
JS divergence: 0.3164138755372648
Calculating PC for Kenya for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Kenya_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Kenya" -c I -f C:\Users\ac121\Documents\HLAequ

iedb_afnd_pc_results\hlathena_Kenya_Luo_real.txt
Calculating PC for Kenya Luo for ideal dataset hlathena
The original dataset hlathena has 142300 peptides
The population has 143 alleles across 3 loci
Ideal dataset has 142480 peptides across 97 alleles
    index   Allele Locus  ideal_n_peptides
0    4596  A*01:01     A              3510
1    4597  A*01:02     A               189
2    4599  A*02:01     A              5454
3    4600  A*02:02     A              1423
4    4602  A*02:05     A              1233
..    ...      ...   ...               ...
92  44268  C*15:05     C               284
93  44269  C*16:01     C              2134
94  44270  C*16:02     C               284
95  44272  C*17:01     C              4126
96  44273  C*18:01     C              2324

[97 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\hlathena_Kenya_Luo_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Kenya_Luo_ideal.txt
Plotting result

Calculating JS divergence
JS divergence: 0.4638852539201878
Calculating PC for Kenya, Nyanza Province, Luo tribe for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_Kenya,_Nyanza_Province,_Luo_tribe_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Kenya, Nyanza Province, Luo

Plotting results
Calculating JS divergence
JS divergence: 0.35061366265779353
Calculating PC for Kosovo for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Kosovo_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Kosovo" -c I -f C:\Users\ac121\Documents\HL

iedb_afnd_pc_results\hlathena_Malaysia_Peninsular_Chinese_real.txt
Calculating PC for Malaysia Peninsular Chinese for ideal dataset hlathena
The original dataset hlathena has 142300 peptides
The population has 118 alleles across 3 loci
Ideal dataset has 142350 peptides across 118 alleles
     index   Allele Locus  ideal_n_peptides
0     4743  A*01:01     A               246
1     4744  A*02:01     A              6726
2     4745  A*02:02     A              1100
3     4746  A*02:03     A              3054
4     4747  A*02:06     A              1954
..     ...      ...   ...               ...
113  44375  C*14:02     C              1223
114  44376  C*15:02     C              1100
115  44377  C*15:04     C               123
116  44378  C*16:01     C               123
117  44379  C*16:02     C               246

[118 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\hlathena_Malaysia_Peninsular_Chinese_ideal.csv
Running IEDB tool for population coverage and saving output t

Plotting results
Calculating JS divergence
JS divergence: 0.4861955661663419
Calculating PC for Malaysia Peninsular Malay for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_Malaysia_Peninsular_Malay_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Malaysia Peninsular Malay"

iedb_afnd_pc_results\mhcflurry_Mali_Bandiagara_real.txt
Calculating PC for Mali Bandiagara for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 142 alleles across 3 loci
Ideal dataset has 186816 peptides across 69 alleles
    index   Allele Locus  ideal_n_peptides
0    4891  A*01:01     A               437
1    4892  A*01:02     A               687
2    4894  A*02:01     A              5187
3    4895  A*02:02     A              4749
4    4897  A*02:05     A              1374
..    ...      ...   ...               ...
64  44495  C*14:02     C              1187
65  44500  C*16:01     C             17686
66  44502  C*16:04     C               249
67  44503  C*17:01     C              8936
68  44504  C*18:01     C               999

[69 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\mhcflurry_Mali_Bandiagara_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_Mali_Bandiag

Plotting results
Calculating JS divergence
JS divergence: 0.506172200220958
Calculating PC for Mexico Chiapas Lacandon Mayans for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Mexico_Chiapas_Lacandon_Mayans_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Mexico Chiapas Lacandon Ma

Plotting results
Calculating JS divergence
JS divergence: 0.4722475900075061
Calculating PC for Mexico Chihuahua Tarahumara for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Mexico_Chihuahua_Tarahumara_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Mexico Chihuahua Tarahumara" -c

iedb_afnd_pc_results\mhcflurry_Mexico_Hidalgo_Mezquital_Valley__Otomi_real.txt
Calculating PC for Mexico Hidalgo Mezquital Valley/ Otomi for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 52 alleles across 3 loci
Ideal dataset has 185676 peptides across 52 alleles
    index   Allele Locus  ideal_n_peptides
0    4966  A*01:01     A              3474
1    4967  A*02:01     A             25172
2    4968  A*03:01     A               431
3    4969  A*11:01     A               868
4    4970  A*23:01     A               431
5    4971  A*24:02     A             11717
6    4972  A*29:01     A               868
7    4973  A*31:01     A              6074
8    4974  A*33:01     A               431
9    4975  A*66:01     A               868
10   4976  A*68:01     A              7811
11   4977  A*68:03     A              3905
12  23222  B*07:02     B               431
13  23223  B*08:01     B               868
14  23224  B*08:12     B               431


iedb_afnd_pc_results\hlathena_Mexico_Hidalgo_Mezquital_Valley__Otomi_real.txt
Calculating PC for Mexico Hidalgo Mezquital Valley/ Otomi for ideal dataset hlathena
The original dataset hlathena has 142300 peptides
The population has 52 alleles across 3 loci
Ideal dataset has 140927 peptides across 52 alleles
    index   Allele Locus  ideal_n_peptides
0    4966  A*01:01     A              2637
1    4967  A*02:01     A             19106
2    4968  A*03:01     A               327
3    4969  A*11:01     A               659
4    4970  A*23:01     A               327
5    4971  A*24:02     A              8893
6    4972  A*29:01     A               659
7    4973  A*31:01     A              4610
8    4974  A*33:01     A               327
9    4975  A*66:01     A               659
10   4976  A*68:01     A              5929
11   4977  A*68:03     A              2964
12  23222  B*07:02     B               327
13  23223  B*08:01     B               659
14  23224  B*08:12     B               327
15 

             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Mexico_Mexico_City_Mestizo_pop_2_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Mexico Mexico City Mestizo pop 2" -c I -f C:\Users\ac121\Documents\HLAequity\HLAlleleBias\Datasets_coverage\datasets\hlathena_peptides.csv > iedb_afnd_pc_results\hlathena_Mexico_Mexi

iedb_afnd_pc_results\hlathena_Mexico_Mexico_City_Mestizo_population_real.txt
Calculating PC for Mexico Mexico City Mestizo population for ideal dataset hlathena
The original dataset hlathena has 142300 peptides
The population has 118 alleles across 3 loci
Ideal dataset has 139509 peptides across 118 alleles
     index   Allele Locus  ideal_n_peptides
0     5012  A*01:01     A              4145
1     5013  A*02:01     A              9619
2     5014  A*02:02     A               166
3     5015  A*02:05     A               166
4     5016  A*02:06     A              2817
..     ...      ...   ...               ...
113  44595  C*15:09     C               332
114  44596  C*16:01     C              1992
115  44597  C*16:02     C               166
116  44598  C*17:01     C               498
117  44599  C*18:01     C               664

[118 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\hlathena_Mexico_Mexico_City_Mestizo_population_ideal.csv
Running IEDB tool for populatio

iedb_afnd_pc_results\hlathena_Mexico_Mixe_NA-DHS_6_(G)_real.txt
Calculating PC for Mexico Mixe NA-DHS_6 (G) for ideal dataset hlathena
The original dataset hlathena has 142300 peptides
The population has 22 alleles across 3 loci
Ideal dataset has 142287 peptides across 22 alleles
    index   Allele Locus  ideal_n_peptides
0    5042  A*01:01     A              1185
1    5043  A*02:01     A             15415
2    5044  A*02:06     A             17787
3    5045  A*24:02     A              3557
4    5046  A*31:01     A              9486
5   23372  B*35:01     B              4743
6   23373  B*35:12     B              7115
7   23374  B*35:14     B              3557
8   23375  B*35:17     B              3557
9   23376  B*39:02     B             16601
10  23377  B*39:05     B              1185
11  23378  B*40:02     B              7115
12  23379  B*40:11     B              1185
13  23380  B*52:01     B              1185
14  23381  B*57:01     B              1185
15  44600  C*03:03     C       

             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Mexico_Mixtec_NA-DHS_5_(G)_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Mexico Mixtec NA-DHS_5 (G)" -c I -f C:\Users\ac121\Documents\HLAequity\HLAlleleBias\Datasets_coverage\datasets\hlathena_peptides.csv > iedb_afnd_pc_results\hlathena_Mexico_Mixtec_NA-DHS_5_

Plotting results
Calculating JS divergence
JS divergence: 0.5934120564176368
Calculating PC for Mexico Oaxaca Mixe for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Mexico_Oaxaca_Mixe_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Mexico Oaxaca Mixe" 

Ideal dataset has 169698 peptides across 35 alleles
    index   Allele Locus  ideal_n_peptides
0    5062  A*02:01     A             16103
1    5063  A*02:05     A               567
2    5064  A*02:06     A             12247
3    5066  A*11:01     A              1644
4    5067  A*24:02     A             13892
5    5069  A*31:01     A              3912
6    5071  A*33:01     A               567
7    5072  A*68:01     A              7768
8   23409  B*14:01     B               567
9   23410  B*14:02     B              3345
10  23414  B*27:05     B               567
11  23415  B*35:01     B             12247
12  23417  B*35:12     B              8902
13  23418  B*35:17     B              2778
14  23420  B*35:23     B              2211
15  23421  B*35:24     B              1134
16  23422  B*38:01     B               567
17  23423  B*39:01     B              3345
18  23424  B*39:02     B              3345
19  23425  B*39:05     B              5556
20  23426  B*39:06     B              4989
21

Plotting results
Calculating JS divergence
JS divergence: 0.43910983982725776
Calculating PC for Mexico Oaxaca Zapotec for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Mexico_Oaxaca_Zapotec_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Mexico Oaxaca Zapotec" -c I -f C:\Users\ac

           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_Mexico_Zapotec_NA-DHS_7_(G)_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Mexico Zapotec NA-DHS_7 (G)" -c I -f C:\Users\ac121\Documents\HLAequity\HLAlleleBias\Datasets_coverage\datasets\mhcflurry_peptides.csv > iedb_afnd_pc_results\mhcflurry_Mexico_Zapotec_NA-DHS_7_(G)_real.txt
ied

Plotting results
Calculating JS divergence
JS divergence: 0.6828341589541141
Calculating PC for Morocco Atlantic Coast Chaouya for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_Morocco_Atlantic_Coast_Chaouya_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Morocco Atlantic

iedb_afnd_pc_results\mhcflurry_Morocco_Nador_Metalsa_pop_2_real.txt
Calculating PC for Morocco Nador Metalsa pop 2 for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 632 alleles across 3 loci
Ideal dataset has 183442 peptides across 74 alleles
    index   Allele Locus  ideal_n_peptides
0    5123  A*01:01     A              8561
1    5129  A*02:01     A             11124
2    5173  A*03:01     A              2124
3    5174  A*03:02     A              2562
4    5180  A*11:01     A              2999
..    ...      ...   ...               ...
69  44764  C*15:05     C               999
70  44771  C*16:01     C              1999
71  44772  C*16:02     C               999
72  44774  C*17:01     C               499
73  44776  C*17:03     C              3937

[74 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\mhcflurry_Morocco_Nador_Metalsa_pop_2_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_af

Plotting results
Calculating JS divergence
JS divergence: 0.15762342689660305
Calculating PC for Morocco Settat Chaouya for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Morocco_Settat_Chaouya_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Morocco Settat Chaouya" -c I -f C:\Users

iedb_afnd_pc_results\netmhc_Netherlands_Leiden_real.txt
Calculating PC for Netherlands Leiden for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 1902 alleles across 3 loci
Ideal dataset has 169095 peptides across 104 alleles
     index   Allele Locus  ideal_n_peptides
0     6063  A*01:01     A              9922
1     6095  A*02:01     A             16557
2     6097  A*02:03     A                56
3     6099  A*02:05     A               283
4     6100  A*02:06     A               170
..     ...      ...   ...               ...
99   45547  C*16:01     C              1530
100  45548  C*16:02     C               113
101  45549  C*16:04     C                56
102  45557  C*17:01     C               226
103  45559  C*17:03     C               170

[104 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\netmhc_Netherlands_Leiden_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Net

           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Netherlands_UMCU_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Netherlands UMCU" -c I -f C:\Users\ac121\Documents\HLAequity\HLAlleleBias\Datasets_coverage\datasets\netmhc_peptides.csv > iedb_afnd_pc_results\netmhc_Netherlands_UMCU_real.txt
iedb_afnd_pc_results\netmhc_Netherlands_UMCU_rea

Plotting results
Calculating JS divergence
JS divergence: 0.6289817656418624
Calculating PC for New Caledonia for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_New_Caledonia_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "New Caledonia" -c I -f C:\Users\ac121\Documents\HL

iedb_afnd_pc_results\hlathena_New_Caledonia_real.txt
Calculating PC for New Caledonia for ideal dataset hlathena
The original dataset hlathena has 142300 peptides
The population has 51 alleles across 3 loci
Ideal dataset has 142381 peptides across 39 alleles
    index   Allele Locus  ideal_n_peptides
0    6688  A*02:01     A              1138
1    6690  A*03:01     A               569
2    6691  A*11:01     A              6213
3    6692  A*24:02     A             28792
4    6694  A*25:01     A               569
5    6695  A*26:01     A               569
6    6696  A*31:01     A              1138
7    6697  A*32:01     A               569
8    6698  A*34:01     A              7352
9    6699  A*68:02     A               569
10  26165  B*07:02     B               901
11  26166  B*13:01     B              3652
12  26167  B*14:01     B               474
13  26168  B*15:01     B               901
14  26170  B*15:06     B              1849
15  26171  B*15:21     B              2751
16  26174 

iedb_afnd_pc_results\hlathena_New_Zealand_Maori_with_Admixed_History_real.txt
Calculating PC for New Zealand Maori with Admixed History for ideal dataset hlathena
The original dataset hlathena has 142300 peptides
The population has 65 alleles across 3 loci
Ideal dataset has 142801 peptides across 65 alleles
    index    Allele Locus  ideal_n_peptides
0    6700   A*01:01     A              2940
1    6701   A*02:01     A              3842
2    6702   A*02:05     A               901
3    6703   A*02:06     A             10625
4    6704  A*02:119     A               237
..    ...       ...   ...               ...
60  45606   C*08:02     C              1375
61  45607   C*12:02     C               237
62  45608   C*12:03     C              1138
63  45609   C*15:02     C              2466
64  45610   C*15:05     C               237

[65 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\hlathena_New_Zealand_Maori_with_Admixed_History_ideal.csv
Running IEDB tool for populatio

Plotting results
Calculating JS divergence
JS divergence: 0.6135191469821837
Calculating PC for New Zealand Maori with Full Ancestry for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_New_Zealand_Maori_with_Full_Ancestry_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_

iedb_afnd_pc_results\netmhc_New_Zealand_Polynesians_with_Admixed_History_real.txt
Calculating PC for New Zealand Polynesians with Admixed History for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 36 alleles across 3 loci
Ideal dataset has 170482 peptides across 36 alleles
    index   Allele Locus  ideal_n_peptides
0    6724  A*01:01     A              3175
1    6725  A*02:01     A              6293
2    6726  A*02:06     A              6293
3    6727  A*11:01     A              8391
4    6728  A*24:02     A             18881
5    6729  A*26:01     A              2097
6    6730  A*34:01     A             11567
7   26238  B*07:02     B              1077
8   26239  B*08:01     B              1077
9   26240  B*13:01     B              3175
10  26241  B*15:01     B              1077
11  26242  B*15:06     B              2097
12  26243  B*18:01     B              1077
13  26244  B*27:05     B              1077
14  26245  B*39:01     B              31

Plotting results
Calculating JS divergence
JS divergence: 0.4410520811662677
Calculating PC for New Zealand Polynesians with Full Ancestry for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_New_Zealand_Polynesians_with_Full_Ancestry_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "N

iedb_afnd_pc_results\mhcflurry_Nicaragua_Managua_real.txt
Calculating PC for Nicaragua Managua for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 142 alleles across 3 loci
Ideal dataset has 187376 peptides across 142 alleles
     index   Allele Locus  ideal_n_peptides
0     6738  A*01:01     A              2337
1     6739  A*01:02     A                93
2     6740  A*02:01     A             11036
3     6741  A*02:02     A               187
4     6742  A*02:05     A               656
..     ...      ...   ...               ...
137  45674  C*15:05     C               112
138  45675  C*15:09     C               343
139  45676  C*16:01     C              2974
140  45677  C*17:01     C              1031
141  45678  C*18:01     C               799

[142 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\mhcflurry_Nicaragua_Managua_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mh

Plotting results
Calculating JS divergence
JS divergence: 0.0887400246171453
Calculating PC for Nicaragua Mestizo (G) for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Nicaragua_Mestizo_(G)_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Nicaragua Mestizo (G)" -c I -f C:\Users\ac1

iedb_afnd_pc_results\netmhc_Panama_real.txt
Calculating PC for Panama for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 188 alleles across 3 loci
Ideal dataset has 169339 peptides across 188 alleles
     index   Allele Locus  ideal_n_peptides
0     6809  A*01:01     A              2245
1     6810  A*01:02     A               255
2     6811  A*02:01     A              8465
3     6812  A*02:02     A              1411
4     6813  A*02:03     A                62
..     ...      ...   ...               ...
183  45747  C*16:01     C              3016
184  45748  C*16:02     C               124
185  45749  C*16:04     C                62
186  45750  C*17:01     C              1570
187  45751  C*18:01     C               249

[188 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\netmhc_Panama_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Panama_ideal.txt
Plotting results
Calcul

Ideal dataset has 168622 peptides across 24 alleles
    index   Allele Locus  ideal_n_peptides
0    6865  A*11:01     A             14742
1    6866  A*24:02     A             17917
2    6867  A*24:07     A              2154
3    6869  A*26:01     A              4139
4    6870  A*31:01     A               567
5    6872  A*34:01     A             17124
6   26492  B*13:01     B              7484
7   26494  B*15:01     B               737
8   26505  B*39:03     B              7484
9   26507  B*40:01     B              5216
10  26508  B*40:02     B              2268
11  26509  B*40:10     B              8958
12  26510  B*44:02     B               737
13  26511  B*48:01     B               737
14  26515  B*56:01     B             17917
15  26516  B*56:02     B              3742
16  45752  C*01:02     C              4025
17  45754  C*03:03     C              9129
18  45755  C*03:04     C              1020
19  45756  C*04:01     C              2041
20  45757  C*04:03     C              5046
21

iedb_afnd_pc_results\netmhc_Papua_New_Guinea_Eastern_Highlands_Goroka_Asaro_real.txt
Calculating PC for Papua New Guinea Eastern Highlands Goroka Asaro for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 51 alleles across 3 loci
Ideal dataset has 170210 peptides across 21 alleles
    index   Allele Locus  ideal_n_peptides
0    6875  A*02:06     A              1474
1    6877  A*11:01     A              1474
2    6878  A*24:02     A             42186
3    6884  A*34:01     A             11623
4   26518  B*13:01     B              3232
5   26522  B*15:06     B             12984
6   26523  B*15:21     B              4025
7   26524  B*15:25     B               793
8   26527  B*27:04     B              4025
9   26533  B*40:01     B             10546
10  26534  B*40:02     B              3232
11  26541  B*56:01     B             12984
12  26542  B*56:02     B              4876
13  45765  C*01:02     C             17124
14  45767  C*03:03     C          

iedb_afnd_pc_results\netmhc_Papua_New_Guinea_Karimui_Plateau_Pawaia_real.txt
Calculating PC for Papua New Guinea Karimui Plateau Pawaia for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 51 alleles across 3 loci
Ideal dataset has 170211 peptides across 21 alleles
    index   Allele Locus  ideal_n_peptides
0    6889  A*11:01     A             10149
1    6890  A*24:02     A             42186
2    6894  A*31:01     A              1701
3    6896  A*34:01     A              2721
4   26544  B*13:01     B              3912
5   26548  B*15:06     B              5670
6   26549  B*15:21     B              1417
7   26553  B*27:04     B             12758
8   26556  B*39:01     B              1077
9   26559  B*40:01     B              4989
10  26560  B*40:02     B             15933
11  26567  B*56:01     B              8845
12  26568  B*56:02     B              2154
13  45778  C*01:02     C             11340
14  45780  C*03:03     C             15933
15  457

iedb_afnd_pc_results\netmhc_Papua_New_Guinea_Madang_real.txt
Calculating PC for Papua New Guinea Madang for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 51 alleles across 3 loci
Ideal dataset has 170265 peptides across 26 alleles
    index   Allele Locus  ideal_n_peptides
0    6901  A*11:01     A             36062
1    6902  A*24:02     A              8675
2    6905  A*26:01     A              1417
3    6906  A*31:01     A              2891
4    6908  A*34:01     A              7711
5   26570  B*13:01     B             10830
6   26574  B*15:06     B             10830
7   26577  B*15:36     B              4139
8   26578  B*18:01     B               510
9   26579  B*27:04     B              3118
10  26582  B*39:01     B              3628
11  26586  B*40:02     B              1020
12  26587  B*40:10     B              1020
13  26589  B*48:01     B              1020
14  26592  B*55:02     B               510
15  26593  B*56:01     B             19

iedb_afnd_pc_results\netmhc_Papua_New_Guinea_West_Schrader_Ranges_Haruai_real.txt
Calculating PC for Papua New Guinea West Schrader Ranges Haruai for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 51 alleles across 3 loci
Ideal dataset has 165617 peptides across 19 alleles
    index   Allele Locus  ideal_n_peptides
0    6913  A*11:01     A             31186
1    6914  A*24:02     A              8051
2    6918  A*31:01     A               963
3    6920  A*34:01     A             16557
4   26596  B*13:01     B              2041
5   26600  B*15:06     B             15479
6   26602  B*15:25     B              4649
7   26603  B*15:36     B              2608
8   26605  B*27:04     B              5159
9   26611  B*40:01     B              2608
10  26612  B*40:02     B              7201
11  26619  B*56:01     B             11850
12  26620  B*56:02     B              5159
13  45804  C*01:02     C             10716
14  45806  C*03:03     C               9

iedb_afnd_pc_results\netmhc_Papua_New_Guinea_Wosera_Abelam_real.txt
Calculating PC for Papua New Guinea Wosera Abelam for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 51 alleles across 3 loci
Ideal dataset has 173839 peptides across 22 alleles
    index   Allele Locus  ideal_n_peptides
0    6925  A*11:01     A             21830
1    6926  A*24:02     A             29088
2    6930  A*31:01     A               737
3    6932  A*34:01     A              5103
4   26622  B*13:01     B             16046
5   26626  B*15:06     B              5330
6   26628  B*15:25     B               850
7   26631  B*27:04     B              4536
8   26634  B*39:01     B              3969
9   26637  B*40:01     B               850
10  26638  B*40:02     B             20979
11  26639  B*40:10     B               396
12  26645  B*56:01     B              6577
13  26646  B*56:02     B               850
14  45817  C*01:02     C              6067
15  45819  C*03:03     C 

iedb_afnd_pc_results\netmhc_Paraguay_Argentina_Ache_NA-DHS_24_(G)_real.txt
Calculating PC for Paraguay/Argentina Ache NA-DHS_24 (G) for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 12 alleles across 3 loci
Ideal dataset has 170106 peptides across 12 alleles
    index   Allele Locus  ideal_n_peptides
0    6934  A*02:01     A              2183
1    6935  A*02:11     A             15264
2    6936  A*24:02     A              4360
3    6937  A*68:01     A             34894
4   26647  B*39:05     B              2579
5   26648  B*40:02     B              5154
6   26649  B*40:03     B              2579
7   26650  B*40:04     B             36085
8   26651  B*48:02     B             10308
9   45830  C*03:04     C             42526
10  45831  C*04:01     C             10631
11  45832  C*07:02     C              3543
Saved dummy ideal dataset to: iedb_afnd_pc_results\netmhc_Paraguay_Argentina_Ache_NA-DHS_24_(G)_ideal.csv
Running IEDB tool for population c

iedb_afnd_pc_results\netmhc_Paraguay_Argentina_Guarani_NA-DHS_23_(G)_real.txt
Calculating PC for Paraguay/Argentina Guarani NA-DHS_23 (G) for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 22 alleles across 3 loci
Ideal dataset has 167274 peptides across 22 alleles
    index   Allele Locus  ideal_n_peptides
0    6938  A*02:01     A             19845
1    6939  A*02:11     A              5670
2    6940  A*02:12     A              2835
3    6941  A*24:02     A              8505
4    6942  A*24:03     A              8505
5    6943  A*31:15     A              2835
6    6944  A*68:01     A              2835
7    6945  A*68:02     A              5670
8   26652  B*14:02     B              2835
9   26653  B*15:01     B              5670
10  26654  B*15:04     B             19845
11  26655  B*35:04     B              2835
12  26656  B*39:05     B              2835
13  26657  B*40:02     B              2835
14  26658  B*40:04     B             14175
15  2

           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Philippines_Ivatan_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Philippines Ivatan" -c I -f C:\Users\ac121\Documents\HLAequity\HLAlleleBias\Datasets_coverage\datasets\netmhc_peptides.csv > iedb_afnd_pc_results\netmhc_Philippines_Ivatan_real.txt
iedb_afnd_pc_results\netmhc_Philippines_Iv

Plotting results
Calculating JS divergence
JS divergence: 0.09721605689050741
Calculating PC for Poland for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Poland_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Poland" -c I -f C:\Users\ac121\Documents\HLAequity\HLAlleleBias\Datasets

iedb_afnd_pc_results\netmhc_Poland_BMR_real.txt
Calculating PC for Poland BMR for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 293 alleles across 3 loci
Ideal dataset has 168784 peptides across 179 alleles
     index   Allele Locus  ideal_n_peptides
0     7099  A*01:01     A              7779
1     7100  A*01:02     A                 5
2     7107  A*02:01     A             16148
3     7108  A*02:02     A                34
4     7110  A*02:05     A               368
..     ...      ...   ...               ...
174  45998  C*16:04     C                56
175  46000  C*17:01     C               232
176  46001  C*17:03     C               691
177  46002  C*17:38     C                17
178  46003  C*18:02     C                17

[179 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\netmhc_Poland_BMR_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Poland_BMR_ideal.txt
Plottin

Plotting results
Calculating JS divergence
JS divergence: 0.28529920369299894
Calculating PC for Poland DKMS for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Poland_DKMS_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Poland DKMS" -c I -f C:\Users\ac1

iedb_afnd_pc_results\hlathena_Portugal_Azores_Terceira_Island_real.txt
Calculating PC for Portugal Azores Terceira Island for ideal dataset hlathena
The original dataset hlathena has 142300 peptides
The population has 96 alleles across 3 loci
Ideal dataset has 142298 peptides across 96 alleles
    index   Allele Locus  ideal_n_peptides
0    7247  A*01:01     A              5198
1    7248  A*02:01     A             11028
2    7249  A*02:02     A              1247
3    7250  A*02:05     A              1247
4    7251  A*02:06     A               208
..    ...      ...   ...               ...
91  46072  C*15:05     C               208
92  46073  C*16:01     C              1038
93  46074  C*16:02     C               417
94  46075  C*17:01     C               626
95  46076  C*17:03     C               208

[96 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\hlathena_Portugal_Azores_Terceira_Island_ideal.csv
Running IEDB tool for population coverage and saving output to: 

Plotting results
Calculating JS divergence
JS divergence: 0.5656967346327831
Calculating PC for Russia Bering Island Aleuts for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_Russia_Bering_Island_Aleuts_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Russia Bering Island A

iedb_afnd_pc_results\mhcflurry_Russia_Karelia_real.txt
Calculating PC for Russia Karelia for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 117 alleles across 3 loci
Ideal dataset has 187519 peptides across 117 alleles
     index   Allele Locus  ideal_n_peptides
0     7314  A*01:01     A              6336
1     7315  A*02:01     A             18517
2     7316  A*02:05     A               106
3     7317  A*02:06     A               318
4     7318  A*02:07     A                31
..     ...      ...   ...               ...
112  46143  C*15:11     C                31
113  46144  C*16:01     C               381
114  46145  C*16:02     C               174
115  46146  C*16:04     C                56
116  46147  C*17:01     C              1137

[117 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\mhcflurry_Russia_Karelia_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_R

iedb_afnd_pc_results\netmhc_Russia_Nizhny_Novgorod,_Russians_real.txt
Failed for Russia Nizhny Novgorod, Russians
Calculating PC for Russia Nizhny Novgorod, Russians for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Russia_Nizhny_Novgorod,_Russians_real.txt
python IEDB_Population_Coverage-3.0.2\population_c

Plotting results
Calculating JS divergence
JS divergence: 0.4792172284548332
Calculating PC for Russia Tundra Nentsi NA-DHS_1 (G) for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Russia_Tundra_Nentsi_NA-DHS_1_(G)_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_covera

Plotting results
Calculating JS divergence
JS divergence: 0.2433693940361371
Calculating PC for Russia Tuva pop 2 for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Russia_Tuva_pop_2_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Russia Tuva pop 2" -c 

iedb_afnd_pc_results\hlathena_Saudi_Arabia_Guraiat_and_Hail_real.txt
Calculating PC for Saudi Arabia Guraiat and Hail for ideal dataset hlathena
The original dataset hlathena has 142300 peptides
The population has 115 alleles across 3 loci
Ideal dataset has 139778 peptides across 115 alleles
     index   Allele Locus  ideal_n_peptides
0     7434  A*01:01     A              5454
1     7435  A*01:03     A               332
2     7436  A*02:01     A              9012
3     7437  A*02:02     A              1138
4     7438  A*02:05     A              2893
..     ...      ...   ...               ...
110  46249  C*16:04     C               569
111  46250  C*17:01     C               474
112  46251  C*17:02     C               237
113  46252  C*17:03     C               332
114  46253  C*18:01     C               237

[115 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\hlathena_Saudi_Arabia_Guraiat_and_Hail_ideal.csv
Running IEDB tool for population coverage and saving ou

Plotting results
Calculating JS divergence
JS divergence: 0.5012420141908726
Calculating PC for Singapore Javaneses for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_Singapore_Javaneses_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Singapore Javaneses" -c I -f C:\Users\

iedb_afnd_pc_results\mhcflurry_Singapore_Riau_Malay_real.txt
Calculating PC for Singapore Riau Malay for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 82 alleles across 3 loci
Ideal dataset has 186433 peptides across 82 alleles
    index   Allele Locus  ideal_n_peptides
0    7580  A*01:01     A              1999
1    7581  A*02:01     A              4062
2    7582  A*02:03     A              3249
3    7583  A*02:06     A              1249
4    7584  A*02:07     A              1249
..    ...      ...   ...               ...
77  46355  C*12:02     C              2312
78  46356  C*12:03     C               312
79  46357  C*14:02     C              2937
80  46358  C*15:02     C              1437
81  46359  C*15:05     C               312

[82 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\mhcflurry_Singapore_Riau_Malay_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurr

Plotting results
Calculating JS divergence
JS divergence: 0.366125465128332
Calculating PC for Singapore SGVP Chinese CHS for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Singapore_SGVP_Chinese_CHS_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Singapore SGVP Chinese CHS" -c I -

iedb_afnd_pc_results\netmhc_Singapore_SGVP_Malay_MAS_real.txt
Calculating PC for Singapore SGVP Malay MAS for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 114 alleles across 3 loci
Ideal dataset has 170416 peptides across 73 alleles
    index   Allele Locus  ideal_n_peptides
0    7634  A*01:01     A              2381
1    7635  A*02:01     A              6747
2    7637  A*02:03     A              2041
3    7638  A*02:05     A               340
4    7639  A*02:06     A               680
..    ...      ...   ...               ...
68  46401  C*12:02     C              1247
69  46402  C*12:03     C               963
70  46403  C*14:02     C              1587
71  46405  C*15:02     C               963
72  46407  C*15:05     C               340

[73 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\netmhc_Singapore_SGVP_Malay_MAS_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_

Plotting results
Calculating JS divergence
JS divergence: 0.35270792625792674
Calculating PC for Singapore SGVP. Indian INS for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Singapore_SGVP._Indian_INS_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Sin

iedb_afnd_pc_results\hlathena_Singapore_Thai_real.txt
Calculating PC for Singapore Thai for ideal dataset hlathena
The original dataset hlathena has 142300 peptides
The population has 72 alleles across 3 loci
Ideal dataset has 141746 peptides across 72 alleles
    index   Allele Locus  ideal_n_peptides
0    7694  A*01:01     A               237
1    7695  A*02:01     A              1470
2    7696  A*02:03     A              6308
3    7697  A*02:06     A               948
4    7698  A*02:07     A              5786
..    ...      ...   ...               ...
67  46449  C*12:03     C               758
68  46450  C*14:02     C              1280
69  46451  C*15:02     C               758
70  46452  C*15:05     C               237
71  46453  C*15:07     C              1280

[72 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\hlathena_Singapore_Thai_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Singapore_Thai_ideal.

Plotting results
Calculating JS divergence
JS divergence: 0.5776061748200735
Calculating PC for South Africa Natal Tamil for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_South_Africa_Natal_Tamil_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "South Africa Natal Tamil" -c

Plotting results
Calculating JS divergence
JS divergence: 0.4473655649705684
Calculating PC for South Africa Natal Tamil for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_South_Africa_Natal_Tamil_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "South Af

iedb_afnd_pc_results\netmhc_South_Africa_Worcester_real.txt
Calculating PC for South Africa Worcester for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 111 alleles across 3 loci
Ideal dataset has 169389 peptides across 111 alleles
     index   Allele Locus  ideal_n_peptides
0     7767  A*01:01     A              3742
1     7768  A*01:23     A               907
2     7769  A*02:01     A              4252
3     7770  A*02:02     A               510
4     7771  A*02:05     A              2835
..     ...      ...   ...               ...
106  46531  C*15:02     C               340
107  46532  C*15:05     C               510
108  46533  C*16:01     C              2324
109  46534  C*17:01     C              3005
110  46535  C*18:01     C              1587

[111 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\netmhc_South_Africa_Worcester_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results

Plotting results
Calculating JS divergence
JS divergence: 0.300110098043438
Calculating PC for South African Black for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_South_African_Black_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "South African Black

iedb_afnd_pc_results\hlathena_South_African_Indian_population_real.txt
Calculating PC for South African Indian population for ideal dataset hlathena
The original dataset hlathena has 142300 peptides
The population has 71 alleles across 3 loci
Ideal dataset has 139901 peptides across 71 alleles
    index   Allele Locus  ideal_n_peptides
0    7833  A*01:01     A              6166
1    7834  A*02:01     A              1897
2    7835  A*02:05     A               474
3    7836  A*02:06     A              1423
4    7837  A*02:11     A              1897
..    ...      ...   ...               ...
66  46577  C*14:02     C              2371
67  46578  C*15:02     C              5217
68  46579  C*15:07     C               474
69  46580  C*16:01     C               474
70  46581  C*16:02     C               474

[71 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\hlathena_South_African_Indian_population_ideal.csv
Running IEDB tool for population coverage and saving output to: 

Plotting results
Calculating JS divergence
JS divergence: 0.41934023491881794
Calculating PC for South Korea pop 3 for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_South_Korea_pop_3_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "South Korea pop 3" -c I -f C:\Users\ac121

iedb_afnd_pc_results\mhcflurry_Spain_(Catalunya,_Navarra,_Extremadura,_Aaragón,_Cantabria,_real.txt
Failed for Spain (Catalunya, Navarra, Extremadura, Aaragón, Cantabria,
Calculating PC for Spain (Catalunya, Navarra, Extremadura, Aaragón, Cantabria, for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Spain_(Catalunya,_Navarra,_Extremadur

iedb_afnd_pc_results\mhcflurry_Sri_Lanka_Colombo_real.txt
Calculating PC for Sri Lanka Colombo for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 113 alleles across 3 loci
Ideal dataset has 187731 peptides across 113 alleles
     index   Allele Locus  ideal_n_peptides
0     8151  A*01:01     A              7061
1     8152  A*02:01     A              2124
2     8153  A*02:03     A               437
3     8154  A*02:05     A               249
4     8155  A*02:06     A              1312
..     ...      ...   ...               ...
108  46786  C*15:05     C               437
109  46787  C*15:07     C               124
110  46788  C*15:15     C                62
111  46789  C*16:02     C              1187
112  46790  C*17:01     C                62

[113 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\mhcflurry_Sri_Lanka_Colombo_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mh

Plotting results
Calculating JS divergence
JS divergence: 0.6278303378880973
Calculating PC for Taiwan Ami for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Taiwan_Ami_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Taiwan Ami" -c I -f C:\Users\ac121\Documents\HLAequity\HLAlleleBi

Plotting results
Calculating JS divergence
JS divergence: 0.5685149238239695
Calculating PC for Taiwan Atayal for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Taiwan_Atayal_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Taiwan Atayal" -c I -f C:\Users\ac121\Documents\HLAequity\H

Plotting results
Calculating JS divergence
JS divergence: 0.5819056441705305
Calculating PC for Taiwan Bunun for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Taiwan_Bunun_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Taiwan Bunun" -c I -f C:\Users\ac121\Documents\HLAequity\HLAl

Plotting results
Calculating JS divergence
JS divergence: 0.453329698828323
Calculating PC for Taiwan Hakka for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Taiwan_Hakka_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Taiwan Hakka" -c I -f C:\Users\ac121\Documents\HLAequity\HLAll

Plotting results
Calculating JS divergence
JS divergence: 0.5844033548269316
Calculating PC for Taiwan Han Chinese for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_Taiwan_Han_Chinese_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Taiwan Han Chinese" -c I -f C:\Users\ac1

iedb_afnd_pc_results\mhcflurry_Taiwan_Minnan_pop_1_real.txt
Calculating PC for Taiwan Minnan pop 1 for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 374 alleles across 3 loci
Ideal dataset has 187637 peptides across 63 alleles
    index   Allele Locus  ideal_n_peptides
0    8722  A*01:01     A               312
1    8726  A*02:01     A              5499
2    8728  A*02:03     A              3999
3    8731  A*02:06     A              1812
4    8732  A*02:07     A              7061
..    ...      ...   ...               ...
58  47071  C*12:02     C              1812
59  47072  C*12:03     C               624
60  47076  C*14:02     C              1812
61  47079  C*15:02     C              3062
62  47086  C*16:02     C               312

[63 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\mhcflurry_Taiwan_Minnan_pop_1_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_

Plotting results
Calculating JS divergence
JS divergence: 0.634369927988826
Calculating PC for Taiwan Paiwan for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Taiwan_Paiwan_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Taiwan Paiwan" -c I -f C:\Users\ac121\Documents\HLAequity\HL

Plotting results
Calculating JS divergence
JS divergence: 0.46687012691190233
Calculating PC for Taiwan Pazeh for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Taiwan_Pazeh_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Taiwan Pazeh" -c I -f C:\Users\ac121\Documents\HLAequity\HLA

iedb_afnd_pc_results\mhcflurry_Taiwan_Puyuma_real.txt
Calculating PC for Taiwan Puyuma for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 369 alleles across 3 loci
Ideal dataset has 184343 peptides across 27 alleles
    index   Allele Locus  ideal_n_peptides
0    9116  A*02:01     A              1249
1    9118  A*02:03     A              3749
2    9121  A*02:06     A              7499
3    9152  A*11:01     A              1249
4    9153  A*11:02     A              4374
5    9163  A*24:02     A             39996
6    9183  A*26:01     A              1874
7    9217  A*34:01     A              2499
8   30100  B*13:01     B             10624
9   30110  B*15:02     B             11249
10  30130  B*15:25     B              4374
11  30154  B*27:04     B              5624
12  30183  B*38:02     B               624
13  30185  B*39:01     B              3124
14  30198  B*40:01     B             10624
15  30199  B*40:02     B              3749
16  30

iedb_afnd_pc_results\mhcflurry_Taiwan_Rukai_real.txt
Calculating PC for Taiwan Rukai for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 367 alleles across 3 loci
Ideal dataset has 187472 peptides across 22 alleles
    index   Allele Locus  ideal_n_peptides
0    9246  A*02:01     A              1874
1    9251  A*02:06     A              1874
2    9282  A*11:01     A              2499
3    9293  A*24:02     A             47496
4    9313  A*26:01     A              8749
5   30284  B*13:01     B             17498
6   30293  B*15:01     B              3749
7   30314  B*15:25     B              4374
8   30367  B*39:01     B              8124
9   30380  B*40:01     B             11874
10  30381  B*40:02     B              8124
11  30419  B*48:01     B              4374
12  30443  B*55:02     B              3749
13  30447  B*56:01     B               624
14  47255  C*01:02     C              4374
15  47262  C*03:03     C              8124
16  4726

iedb_afnd_pc_results\mhcflurry_Taiwan_Saisiat_real.txt
Calculating PC for Taiwan Saisiat for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 369 alleles across 3 loci
Ideal dataset has 187656 peptides across 25 alleles
    index   Allele Locus  ideal_n_peptides
0    9376  A*02:01     A              6749
1    9378  A*02:03     A               624
2    9381  A*02:06     A              1249
3    9412  A*11:01     A              7374
4    9413  A*11:02     A              7936
5    9423  A*24:02     A             35559
6    9443  A*26:01     A              3062
7   30466  B*13:01     B              1812
8   30548  B*38:02     B               624
9   30550  B*39:01     B             34309
10  30563  B*40:01     B             19623
11  30564  B*40:02     B               624
12  30602  B*48:01     B              2437
13  30609  B*51:01     B              1249
14  30626  B*55:02     B              1249
15  30631  B*56:03     B               624
16  

iedb_afnd_pc_results\mhcflurry_Taiwan_Siraya_real.txt
Calculating PC for Taiwan Siraya for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 373 alleles across 3 loci
Ideal dataset has 187705 peptides across 50 alleles
    index   Allele Locus  ideal_n_peptides
0    9506  A*02:01     A              2437
1    9508  A*02:03     A              3687
2    9511  A*02:06     A              1249
3    9512  A*02:07     A              4312
4    9542  A*11:01     A             10999
5    9543  A*11:02     A              4312
6    9553  A*24:02     A             29435
7    9573  A*26:01     A              1249
8    9590  A*30:01     A               624
9    9596  A*31:01     A              1249
10   9604  A*33:03     A              3062
11  30644  B*08:01     B               624
12  30650  B*13:01     B              7936
13  30651  B*13:02     B               624
14  30659  B*15:01     B              1812
15  30660  B*15:02     B               624
16  30

Ideal dataset has 142466 peptides across 50 alleles
    index   Allele Locus  ideal_n_peptides
0    9506  A*02:01     A              1849
1    9508  A*02:03     A              2798
2    9511  A*02:06     A               948
3    9512  A*02:07     A              3272
4    9542  A*11:01     A              8348
5    9543  A*11:02     A              3272
6    9553  A*24:02     A             22341
7    9573  A*26:01     A               948
8    9590  A*30:01     A               474
9    9596  A*31:01     A               948
10   9604  A*33:03     A              2324
11  30644  B*08:01     B               474
12  30650  B*13:01     B              6024
13  30651  B*13:02     B               474
14  30659  B*15:01     B              1375
15  30660  B*15:02     B               474
16  30669  B*15:11     B               474
17  30680  B*15:25     B              2324
18  30681  B*15:27     B               948
19  30704  B*27:04     B              3272
20  30712  B*35:01     B              1375
21

             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Taiwan_Tao_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Taiwan Tao" -c I -f C:\Users\ac121\Documents\HLAequity\HLAlleleBias\Datasets_coverage\datasets\hlathena_peptides.csv > iedb_afnd_pc_results\hlathena_Taiwan_Tao_real.txt
iedb_afnd_pc_results\hlathena_Taiwa

             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Taiwan_Taroko_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Taiwan Taroko" -c I -f C:\Users\ac121\Documents\HLAequity\HLAlleleBias\Datasets_coverage\datasets\hlathena_peptides.csv > iedb_afnd_pc_results\hlathena_Taiwan_Taroko_real.txt
iedb_afnd_pc_results\hlath

Calculating JS divergence
JS divergence: 0.6110070399753106
Calculating PC for Taiwan Thao for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Taiwan_Thao_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Taiwan Thao" -c I -f C:\Users\ac121\Documents\HLAeq

Plotting results
Calculating JS divergence
JS divergence: 0.6517621544933192
Calculating PC for Taiwan Tsou for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Taiwan_Tsou_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Taiwan Tsou" -c I -f C:\Users\ac12

iedb_afnd_pc_results\hlathena_Taiwan_pop_2_real.txt
Calculating PC for Taiwan pop 2 for ideal dataset hlathena
The original dataset hlathena has 142300 peptides
The population has 86 alleles across 3 loci
Ideal dataset has 140078 peptides across 86 alleles
    index   Allele Locus  ideal_n_peptides
0   10150  A*01:01     A               284
1   10151  A*02:01     A              6118
2   10152  A*02:03     A              2656
3   10153  A*02:05     A                94
4   10154  A*02:06     A              1233
..    ...      ...   ...               ...
81  47658  C*14:02     C              1660
82  47659  C*14:03     C                47
83  47660  C*15:02     C              1565
84  47661  C*15:05     C                47
85  47662  C*17:01     C                47

[86 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\hlathena_Taiwan_pop_2_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Taiwan_pop_2_ideal.txt
Plot

Plotting results
Calculating JS divergence
JS divergence: 0.42908355350834726
Calculating PC for USA African American for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_USA_African_American_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "USA African American" -c I -f C:\Us

iedb_afnd_pc_results\mhcflurry_USA_African_American_Bethesda_real.txt
Calculating PC for USA African American Bethesda for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 927 alleles across 3 loci
Ideal dataset has 181627 peptides across 75 alleles
    index   Allele Locus  ideal_n_peptides
0   10253  A*01:01     A              4624
1   10262  A*02:01     A              5249
2   10263  A*02:02     A              2499
3   10266  A*02:05     A               937
4   10329  A*03:01     A              7436
..    ...      ...   ...               ...
70  47884  C*14:02     C               999
71  47888  C*15:02     C               687
72  47891  C*15:05     C               687
73  47899  C*16:01     C              3999
74  47903  C*17:01     C              3624

[75 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\mhcflurry_USA_African_American_Bethesda_ideal.csv
Running IEDB tool for population coverage and saving output to: i

Plotting results
Calculating JS divergence
JS divergence: 0.13483544234665795
Calculating PC for USA African American pop 3 for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_USA_African_American_pop_3_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "USA African American pop 3" -c I

iedb_afnd_pc_results\netmhc_USA_African_American_pop_4_real.txt
Calculating PC for USA African American pop 4 for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 374 alleles across 3 loci
Ideal dataset has 169981 peptides across 192 alleles
     index   Allele Locus  ideal_n_peptides
0    10571  A*01:01     A              2687
1    10572  A*01:02     A               368
2    10573  A*01:03     A                11
3    10575  A*02:01     A              7065
4    10576  A*02:02     A              2381
..     ...      ...   ...               ...
187  48025  C*16:08     C                11
188  48026  C*17:01     C              4173
189  48027  C*17:04     C                11
190  48028  C*18:01     C              1808
191  48029  C*18:03     C                11

[192 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\netmhc_USA_African_American_pop_4_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afn

           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_USA_Alaska_Yupik_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "USA Alaska Yupik" -c I -f C:\Users\ac121\Documents\HLAequity\HLAlleleBias\Datasets_coverage\datasets\netmhc_peptides.csv > iedb_afnd_pc_results\netmhc_USA_Alaska_Yupik_real.txt
iedb_afnd_pc_results\netmhc_USA_Alaska_Yupik_rea

Plotting results
Calculating JS divergence
JS divergence: 0.7089740113948844
Calculating PC for USA Arizona Gila River Amerindian for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_USA_Arizona_Gila_River_Amerindian_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "USA Arizon

iedb_afnd_pc_results\hlathena_USA_Arizona_Gila_River_Amerindian_real.txt
Calculating PC for USA Arizona Gila River Amerindian for ideal dataset hlathena
The original dataset hlathena has 142300 peptides
The population has 35 alleles across 3 loci
Ideal dataset has 142136 peptides across 35 alleles
    index   Allele Locus  ideal_n_peptides
0   10785  A*01:01     A               189
1   10786  A*02:01     A             22483
2   10787  A*02:06     A              3272
3   10788  A*03:01     A                94
4   10789  A*24:02     A             16174
5   10790  A*31:01     A              5075
6   10791  A*68:01     A                94
7   32747  B*07:02     B               237
8   32748  B*08:01     B                94
9   32749  B*14:01     B                94
10  32750  B*15:01     B               426
11  32751  B*27:05     B              2940
12  32752  B*35:01     B              8585
13  32753  B*39:01     B              2419
14  32754  B*39:03     B                94
15  32755  B*

iedb_afnd_pc_results\hlathena_USA_Arizona_Gila_River_Pima_real.txt
Calculating PC for USA Arizona Gila River Pima for ideal dataset hlathena
The original dataset hlathena has 142300 peptides
The population has 142 alleles across 3 loci
Ideal dataset has 141973 peptides across 142 alleles
     index   Allele Locus  ideal_n_peptides
0    10792  A*01:01     A                42
1    10793  A*02:01     A             20121
2    10794  A*02:05     A                66
3    10795  A*02:06     A              2722
4    10796  A*02:13     A                23
..     ...      ...   ...               ...
137  48134  C*15:02     C                99
138  48135  C*15:05     C                 9
139  48136  C*16:01     C                90
140  48137  C*16:02     C                 9
141  48138  C*17:01     C                61

[142 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\hlathena_USA_Arizona_Gila_River_Pima_ideal.csv
Running IEDB tool for population coverage and saving output t

Plotting results
Calculating JS divergence
JS divergence: 0.491588171381999
Calculating PC for USA Asian pop 2 for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_USA_Asian_pop_2_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "USA Asian pop 2" -c I -f C:\Users\ac121\Documen

iedb_afnd_pc_results\mhcflurry_USA_Caucasian_Bethesda_real.txt
Calculating PC for USA Caucasian Bethesda for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 927 alleles across 3 loci
Ideal dataset has 184808 peptides across 77 alleles
    index   Allele Locus  ideal_n_peptides
0   10982  A*01:01     A             10311
1   10991  A*02:01     A             16311
2   10995  A*02:05     A               437
3   10996  A*02:06     A               437
4   11007  A*02:17     A               249
..    ...      ...   ...               ...
72  48376  C*15:02     C              1624
73  48379  C*15:05     C               249
74  48387  C*16:01     C              1874
75  48388  C*16:02     C               499
76  48391  C*17:01     C               499

[77 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\mhcflurry_USA_Caucasian_Bethesda_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\m

Plotting results
Calculating JS divergence
JS divergence: 0.15938625743215773
Calculating PC for USA Caucasian pop 2 for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_USA_Caucasian_pop_2_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "USA Caucasian pop 2" -c I -f C:\Users\ac121\Do

iedb_afnd_pc_results\netmhc_USA_Caucasian_pop_4_real.txt
Calculating PC for USA Caucasian pop 4 for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 128 alleles across 3 loci
Ideal dataset has 169989 peptides across 114 alleles
     index   Allele Locus  ideal_n_peptides
0    11306  A*01:01     A              9537
1    11307  A*01:02     A                51
2    11308  A*02:01     A             16693
3    11309  A*02:02     A                51
4    11310  A*02:03     A                51
..     ...      ...   ...               ...
109  48449  C*16:01     C              2489
110  48450  C*16:02     C               266
111  48451  C*16:04     C                51
112  48452  C*17:01     C               476
113  48453  C*18:01     C                51

[114 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\netmhc_USA_Caucasian_pop_4_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_U

Plotting results
Calculating JS divergence
JS divergence: 0.26513985355086617
Calculating PC for USA Eastern European for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_USA_Eastern_European_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "USA Eastern Eur

Plotting results
Calculating JS divergence
JS divergence: 0.4792053563938871
Calculating PC for USA Hawaii Okinawa for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_USA_Hawaii_Okinawa_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "USA Hawaii Okinawa" 

Plotting results
Calculating JS divergence
JS divergence: 0.19445541790028603
Calculating PC for USA Hispanic for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_USA_Hispanic_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "USA Hispanic" -c I -f C:\Users\

iedb_afnd_pc_results\hlathena_USA_Hispanic_pop_2_real.txt
Calculating PC for USA Hispanic pop 2 for ideal dataset hlathena
The original dataset hlathena has 142300 peptides
The population has 374 alleles across 3 loci
Ideal dataset has 142400 peptides across 227 alleles
     index   Allele Locus  ideal_n_peptides
0    11434  A*01:01     A              3178
1    11435  A*01:02     A               142
2    11438  A*02:01     A              9202
3    11439  A*02:02     A               322
4    11440  A*02:03     A                14
..     ...      ...   ...               ...
222  48605  C*16:01     C              2395
223  48606  C*16:02     C               260
224  48607  C*16:04     C                37
225  48609  C*17:01     C              1067
226  48611  C*18:01     C               166

[227 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\hlathena_USA_Hispanic_pop_2_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hla

Plotting results
Calculating JS divergence
JS divergence: 0.5475640370411016
Calculating PC for USA Mexican American Mestizo for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_USA_Mexican_American_Mestizo_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "USA Mexican American

iedb_afnd_pc_results\mhcflurry_USA_NMDP_African_real.txt
Calculating PC for USA NMDP African for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 357 alleles across 3 loci
Ideal dataset has 187217 peptides across 209 alleles
     index   Allele Locus  ideal_n_peptides
0    11606  A*01:01     A              3162
1    11607  A*01:02     A               193
2    11608  A*01:03     A               156
3    11610  A*01:09     A                 6
4    11612  A*02:01     A              7161
..     ...      ...   ...               ...
204  48728  C*16:01     C              5712
205  48729  C*16:02     C               199
206  48730  C*16:04     C                93
207  48731  C*17:01     C              4468
208  48732  C*18:01     C              2031

[209 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\mhcflurry_USA_NMDP_African_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcfl

Plotting results
Calculating JS divergence
JS divergence: 0.14394679004441327
Calculating PC for USA NMDP African American pop 2 for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_USA_NMDP_African_American_pop_2_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "USA NMDP African Ameri

iedb_afnd_pc_results\netmhc_USA_NMDP_Alaska_Native_or_Aleut_real.txt
Calculating PC for USA NMDP Alaska Native or Aleut for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 113 alleles across 3 loci
Ideal dataset has 169651 peptides across 113 alleles
     index   Allele Locus  ideal_n_peptides
0    11913  A*01:01     A              5953
1    11914  A*02:01     A             10886
2    11915  A*02:02     A               113
3    11916  A*02:05     A               283
4    11917  A*02:06     A              5840
..     ...      ...   ...               ...
108  48883  C*15:04     C               113
109  48884  C*15:05     C                56
110  48885  C*16:01     C              1020
111  48886  C*16:04     C                56
112  48887  C*17:01     C               113

[113 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\netmhc_USA_NMDP_Alaska_Native_or_Aleut_ideal.csv
Running IEDB tool for population coverage and saving outp

Plotting results
Calculating JS divergence
JS divergence: 0.24002569525644685
Calculating PC for USA NMDP American Indian South or Central America for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_USA_NMDP_American_Indian_South_or_Central_America_real.txt
python IEDB_Population_Coverage-3.0.2\population_cov

iedb_afnd_pc_results\hlathena_USA_NMDP_Caribean_Black_real.txt
Calculating PC for USA NMDP Caribean Black for ideal dataset hlathena
The original dataset hlathena has 142300 peptides
The population has 205 alleles across 3 loci
Ideal dataset has 141484 peptides across 177 alleles
     index   Allele Locus  ideal_n_peptides
0    11980  A*01:01     A              2129
1    11981  A*01:02     A               156
2    11982  A*01:03     A                 4
3    11983  A*02:01     A              5250
4    11984  A*02:02     A              1968
..     ...      ...   ...               ...
172  48944  C*15:05     C               801
173  48946  C*16:01     C              4582
174  48947  C*16:02     C                33
175  48949  C*17:01     C              3220
176  48950  C*18:01     C              1645

[177 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\hlathena_USA_NMDP_Caribean_Black_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd

Plotting results
Calculating JS divergence
JS divergence: 0.42832778952854633
Calculating PC for USA NMDP Caribean Indian for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_USA_NMDP_Caribean_Indian_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "USA NMDP Caribean Indian" -

iedb_afnd_pc_results\mhcflurry_USA_NMDP_Chinese_real.txt
Calculating PC for USA NMDP Chinese for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 417 alleles across 3 loci
Ideal dataset has 187274 peptides across 170 alleles
     index   Allele Locus  ideal_n_peptides
0    12227  A*01:01     A               906
1    12228  A*01:03     A                 6
2    12229  A*02:01     A              5912
3    12230  A*02:02     A                 6
4    12231  A*02:03     A              4837
..     ...      ...   ...               ...
165  49078  C*15:05     C               462
166  49084  C*16:01     C                12
167  49085  C*16:02     C                43
168  49086  C*16:04     C                12
169  49087  C*17:01     C                31

[170 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\mhcflurry_USA_NMDP_Chinese_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcfl

Plotting results
Calculating JS divergence
JS divergence: 0.17521623048388804
Calculating PC for USA NMDP European Caucasian for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_USA_NMDP_European_Caucasian_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "USA NMDP European Caucasian" -

iedb_afnd_pc_results\netmhc_USA_NMDP_Filipino_real.txt
Calculating PC for USA NMDP Filipino for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 337 alleles across 3 loci
Ideal dataset has 169917 peptides across 189 alleles
     index   Allele Locus  ideal_n_peptides
0    12677  A*01:01     A               708
1    12679  A*01:03     A                11
2    12680  A*02:01     A              3804
3    12681  A*02:02     A                11
4    12682  A*02:03     A               844
..     ...      ...   ...               ...
184  49328  C*16:01     C               153
185  49329  C*16:02     C                11
186  49330  C*16:04     C                17
187  49331  C*17:01     C                79
188  49332  C*18:01     C                 5

[189 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\netmhc_USA_NMDP_Filipino_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_USA_NMD

Plotting results
Calculating JS divergence
JS divergence: 0.34060314827835303
Calculating PC for USA NMDP Hawaiian or other Pacific Islander for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_USA_NMDP_Hawaiian_or_other_Pacific_Islander_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calcul

             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_USA_NMDP_Hispanic_South_or_Central_American_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "USA NMDP Hispanic South or Central American" -c I -f C:\Users\ac121\Documents\HLAequity\HLAlleleBias\Datasets_coverage\datasets\hlathena_peptides.csv > iedb_afnd_pc_result

iedb_afnd_pc_results\hlathena_USA_NMDP_Japanese_real.txt
Calculating PC for USA NMDP Japanese for ideal dataset hlathena
The original dataset hlathena has 142300 peptides
The population has 165 alleles across 3 loci
Ideal dataset has 136487 peptides across 134 alleles
     index   Allele Locus  ideal_n_peptides
0    13070  A*01:01     A               474
1    13071  A*02:01     A              7020
2    13073  A*02:03     A                66
3    13074  A*02:05     A                28
4    13075  A*02:06     A              3548
..     ...      ...   ...               ...
129  49474  C*15:04     C                 4
130  49475  C*15:05     C                23
131  49476  C*16:01     C                75
132  49477  C*16:02     C                 4
133  49478  C*17:01     C                33

[134 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\hlathena_USA_NMDP_Japanese_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathe

Plotting results
Calculating JS divergence
JS divergence: 0.511745127027505
Calculating PC for USA NMDP Mexican or Chicano for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_USA_NMDP_Mexican_or_Chicano_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "USA NMDP Mexican or Chi

iedb_afnd_pc_results\mhcflurry_USA_NMDP_Middle_Eastern_or_North_Coast_of_Africa_real.txt
Calculating PC for USA NMDP Middle Eastern or North Coast of Africa for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 511 alleles across 3 loci
Ideal dataset has 187145 peptides across 190 alleles
     index   Allele Locus  ideal_n_peptides
0    13436  A*01:01     A              8436
1    13437  A*01:02     A                18
2    13438  A*01:03     A                68
3    13439  A*01:06     A                 6
4    13444  A*02:01     A             12330
..     ...      ...   ...               ...
185  49713  C*16:01     C              1056
186  49714  C*16:02     C               981
187  49715  C*16:04     C               724
188  49716  C*17:01     C              2112
189  49717  C*18:01     C                37

[190 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\mhcflurry_USA_NMDP_Middle_Eastern_or_North_Coast_of_Africa_idea

Ideal dataset has 187299 peptides across 201 alleles
     index   Allele Locus  ideal_n_peptides
0    13599  A*01:01     A              7511
1    13600  A*01:02     A                12
2    13604  A*02:01     A             17348
3    13605  A*02:02     A                87
4    13607  A*02:04     A                 6
..     ...      ...   ...               ...
196  49773  C*16:01     C              1756
197  49774  C*16:02     C                62
198  49775  C*16:04     C                12
199  49776  C*17:01     C               512
200  49777  C*18:01     C                43

[201 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\mhcflurry_USA_NMDP_North_American_Amerindian_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_USA_NMDP_North_American_Amerindian_ideal.txt
Plotting results
Calculating JS divergence
JS divergence: 0.14274266003024802
Calculating PC for USA NMDP North American Amerindian for dataset netmh

Plotting results
Calculating JS divergence
JS divergence: 0.247316488864903
Calculating PC for USA NMDP South Asian Indian for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_USA_NMDP_South_Asian_Indian_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "USA NMDP South Asian Indian" -c 

iedb_afnd_pc_results\netmhc_USA_NMDP_Southeast_Asian_real.txt
Calculating PC for USA NMDP Southeast Asian for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 309 alleles across 3 loci
Ideal dataset has 169985 peptides across 209 alleles
     index   Allele Locus  ideal_n_peptides
0    13843  A*01:01     A              6503
1    13845  A*01:03     A                 5
2    13849  A*02:01     A              3277
3    13850  A*02:02     A                17
4    13851  A*02:03     A              1820
..     ...      ...   ...               ...
204  49906  C*16:01     C                73
205  49907  C*16:02     C              1088
206  49908  C*16:04     C                39
207  49909  C*17:01     C               243
208  49910  C*18:01     C                28

[209 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\netmhc_USA_NMDP_Southeast_Asian_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_r

Plotting results
Calculating JS divergence
JS divergence: 0.4271830996207394
Calculating PC for USA NMDP Vietnamese for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_USA_NMDP_Vietnamese_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "USA NMDP Vietnames

Plotting results
Calculating JS divergence
JS divergence: 0.5493722859221042
Calculating PC for USA New Mexico Canoncito Navajo for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_USA_New_Mexico_Canoncito_Navajo_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.p

             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_USA_North_American_Native_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "USA North American Native" -c I -f C:\Users\ac121\Documents\HLAequity\HLAlleleBias\Datasets_coverage\datasets\hlathena_peptides.csv > iedb_afnd_pc_results\hlathena_USA_North_American_Native

iedb_afnd_pc_results\hlathena_USA_Philadelphia_Caucasian_real.txt
Calculating PC for USA Philadelphia Caucasian for ideal dataset hlathena
The original dataset hlathena has 142300 peptides
The population has 964 alleles across 3 loci
Ideal dataset has 137896 peptides across 88 alleles
    index   Allele Locus  ideal_n_peptides
0   14061  A*01:01     A              5644
1   14071  A*02:01     A             12285
2   14075  A*02:05     A               901
3   14141  A*03:01     A              2561
4   14142  A*03:02     A               189
..    ...      ...   ...               ...
83  50119  C*15:02     C              1233
84  50122  C*15:05     C               237
85  50130  C*16:01     C              1470
86  50134  C*17:01     C              1233
87  50136  C*17:03     C               474

[88 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\hlathena_USA_Philadelphia_Caucasian_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_r

Plotting results
Calculating JS divergence
JS divergence: 0.5775928290862451
Calculating PC for USA San Diego for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_USA_San_Diego_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "USA San Diego" -c I -f C:\Users\ac121\Documents\HL

iedb_afnd_pc_results\mhcflurry_USA_San_Francisco_Caucasian_real.txt
Calculating PC for USA San Francisco Caucasian for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 89 alleles across 3 loci
Ideal dataset has 187174 peptides across 89 alleles
    index   Allele Locus  ideal_n_peptides
0   14615  A*01:01     A              9999
1   14616  A*02:01     A             15436
2   14617  A*02:05     A               562
3   14618  A*02:17     A               124
4   14619  A*03:01     A              9249
..    ...      ...   ...               ...
84  50279  C*15:05     C               124
85  50280  C*15:11     C               124
86  50281  C*16:01     C              2624
87  50282  C*16:02     C               124
88  50283  C*17:01     C               437

[89 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\mhcflurry_USA_San_Francisco_Caucasian_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afn

Plotting results
Calculating JS divergence
JS divergence: 0.3971141433637555
Calculating PC for USA South Dakota Lakota Sioux for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_USA_South_Dakota_Lakota_Sioux_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "USA South Dakota Lakota Sio

Plotting results
Calculating JS divergence
JS divergence: 0.6892083402337795
Calculating PC for USA South Texas Hispanic for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_USA_South_Texas_Hispanic_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "USA South Texas Hispanic" -c

iedb_afnd_pc_results\mhcflurry_Uganda_Kampala_real.txt
Calculating PC for Uganda Kampala for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 142 alleles across 3 loci
Ideal dataset has 186045 peptides across 105 alleles
     index   Allele Locus  ideal_n_peptides
0    14861  A*01:01     A              5187
1    14862  A*01:02     A               187
2    14864  A*02:01     A             11499
3    14865  A*02:02     A              2124
4    14867  A*02:05     A              1374
..     ...      ...   ...               ...
100  50417  C*15:05     C               187
101  50418  C*16:01     C              3062
102  50419  C*16:02     C               187
103  50421  C*17:01     C              1749
104  50422  C*18:01     C               562

[105 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\mhcflurry_Uganda_Kampala_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_U

Plotting results
Calculating JS divergence
JS divergence: 0.21011381502281898
Calculating PC for Uganda Kampala pop 2 for dataset netmhc
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AAAAFEAAL  HLA-B*48:01     B
1        AAAAFEAAL  HLA-C*14:02     C
2        AAADFAHAE  HLA-B*44:03     B
3        AAAKTPVIV  HLA-A*02:01     A
4        AAAKTPVIV  HLA-A*68:02     A
...            ...          ...   ...
170465  YDGRYWTMWK  HLA-C*08:03     C
170466   YEPSQSAQL  HLA-B*55:01     B
170467    YKKEQTLK  HLA-B*55:01     B
170468  YLRMRRTAAL  HLA-B*15:10     B
170469    YVALDFEQ  HLA-C*08:03     C

[170107 rows x 3 columns]
Looking at dataset netmhc with total 170107 peptides across 109 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\netmhc_Uganda_Kampala_pop_2_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Uganda Kampala pop 2" -c I -f C:\Users\ac121

iedb_afnd_pc_results\netmhc_United_Arab_Emirates_Abu_Dhabi_real.txt
Calculating PC for United Arab Emirates Abu Dhabi for ideal dataset netmhc
The original dataset netmhc has 170107 peptides
The population has 85 alleles across 3 loci
Ideal dataset has 170014 peptides across 85 alleles
    index   Allele Locus  ideal_n_peptides
0   14942  A*01:01     A              3816
1   14943  A*01:03     A               544
2   14944  A*02:01     A             11448
3   14945  A*02:05     A              2183
4   14946  A*02:06     A               544
..    ...      ...   ...               ...
80  50462  C*16:01     C               544
81  50463  C*16:02     C              3271
82  50464  C*17:01     C              1088
83  50465  C*17:03     C               544
84  50466  C*18:01     C               544

[85 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\netmhc_United_Arab_Emirates_Abu_Dhabi_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_p

Plotting results
Calculating JS divergence
JS divergence: 0.7283005695218865
Calculating PC for Venezuela Sierra de Perija Yucpa for dataset hlathena
Saving outputs to: iedb_afnd_pc_results
             peptide       allele locus
0         AIDNSRNILY  HLA-A*01:01     A
1       AIGTYLDQYIVY  HLA-A*01:01     A
2          ALDDFTICY  HLA-A*01:01     A
3         ALDDFTICYF  HLA-A*01:01     A
4         ALDENGNLWY  HLA-A*01:01     A
...              ...          ...   ...
142295     YTSRIVVRL  HLA-C*17:01     C
142296     YVAPKAVWL  HLA-C*17:01     C
142297     YVAPRRILL  HLA-C*17:01     C
142298     YVVPYTIHL  HLA-C*17:01     C
142299     YVYKNVSQL  HLA-C*17:01     C

[142300 rows x 3 columns]
Looking at dataset hlathena with total 142300 peptides across 92 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\hlathena_Venezuela_Sierra_de_Perija_Yucpa_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage

iedb_afnd_pc_results\hlathena_Vietnam_Hanoi_Kinh_pop_2_real.txt
Calculating PC for Vietnam Hanoi Kinh pop 2 for ideal dataset hlathena
The original dataset hlathena has 142300 peptides
The population has 76 alleles across 3 loci
Ideal dataset has 142502 peptides across 76 alleles
    index   Allele Locus  ideal_n_peptides
0   14975  A*01:01     A              1802
1   14976  A*02:01     A               996
2   14977  A*02:03     A              3747
3   14978  A*02:06     A              2229
4   14979  A*02:07     A              4031
..    ...      ...   ...               ...
71  50485  C*12:02     C               996
72  50486  C*12:03     C               426
73  50487  C*14:02     C              1233
74  50488  C*15:02     C              1375
75  50489  C*15:05     C              2371

[76 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\hlathena_Vietnam_Hanoi_Kinh_pop_2_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\

Plotting results
Calculating JS divergence
JS divergence: 0.5127701814412851
Calculating PC for Zambia Lusaka for dataset mhcflurry
Saving outputs to: iedb_afnd_pc_results
           peptide       allele locus
0        AADFPGIAR  HLA-A*01:01     A
1       AADKAAAAAY  HLA-A*01:01     A
2        AADKAAAAY  HLA-A*01:01     A
3        AADSFATSY  HLA-A*01:01     A
4       AAELDRTEEY  HLA-A*01:01     A
...            ...          ...   ...
187590   YVYFYDLSY  HLA-C*15:02     C
187591   YWMGGTTYF  HLA-C*15:02     C
187592   YYFSYPLFV  HLA-C*15:02     C
187593   YYGRWVHEF  HLA-C*15:02     C
187594   YYKKTFSAL  HLA-C*15:02     C

[187485 rows x 3 columns]
Looking at dataset mhcflurry with total 187485 peptides across 132 alleles 3 loci
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcflurry_Zambia_Lusaka_real.txt
python IEDB_Population_Coverage-3.0.2\population_coverage\calculate_population_coverage.py -p "Zambia Lusaka" -c I -f C:\Users\ac121\Documents\HL

iedb_afnd_pc_results\mhcflurry_Zimbabwe_Harare_Shona_real.txt
Calculating PC for Zimbabwe Harare Shona for ideal dataset mhcflurry
The original dataset mhcflurry has 187485 peptides
The population has 91 alleles across 3 loci
Ideal dataset has 184423 peptides across 91 alleles
    index   Allele Locus  ideal_n_peptides
0   15069  A*01:01     A               249
1   15070  A*02:01     A              5687
2   15071  A*02:02     A              2249
3   15072  A*02:05     A              1937
4   15073  A*02:17     A               124
..    ...      ...   ...               ...
86  50555  C*14:03     C               249
87  50556  C*15:05     C               124
88  50557  C*16:01     C              4999
89  50558  C*17:01     C              5124
90  50559  C*18:01     C              3062

[91 rows x 4 columns]
Saved dummy ideal dataset to: iedb_afnd_pc_results\mhcflurry_Zimbabwe_Harare_Shona_ideal.csv
Running IEDB tool for population coverage and saving output to: iedb_afnd_pc_results\mhcfl