In [24]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import requests

pd.set_option('display.max_colwidth', None)

## Helper Functions

In [25]:
def plot_histogram_per_predclass(df, df_name):
    missing_percentage_per_class = {}
    total_rows_per_class = df.groupby('PredClass').size()
    
    for pred_class, group in df.groupby('PredClass'):
        missing_rows = group['Pred'].isnull().sum()
        missing_percentage = (missing_rows / total_rows_per_class[pred_class]) * 100
        missing_percentage_per_class[pred_class] = missing_percentage

    print(f"\n{df_name} - Percentage of missing 'Pred' values per 'PredClass':")
    for pred_class, missing_percentage in missing_percentage_per_class.items():
        print(f"{pred_class}: {missing_percentage:.2f}%")

    print(f"\n{df_name} - Number of rows per 'PredClass' where 'Pred' value is present:")
    for pred_class, group in df.groupby('PredClass'):
        rows_with_pred = group['Pred'].notnull().sum()
        print(f"{pred_class}: {rows_with_pred}")

        sorted_pred = group['Pred'].dropna().sort_values()   
        print(f"Top 5 highest 'Pred' values for {pred_class}: {sorted_pred[-5:].values}")
        print(f"Bottom 5 lowest 'Pred' values for {pred_class}: {sorted_pred[:5].values}")

    df_dropna = df.dropna(subset=['Pred'])
    plt.figure(figsize=(12, 6))
    for pred_class, group in df_dropna.groupby('PredClass'):
        plt.hist(group['Pred'], bins=20, alpha=0.5, label=str(pred_class))

    plt.xlabel('Pred Value')
    plt.ylabel('Frequency')
    plt.title(f'Histogram of Pred Values per PredClass for {df_name}')
    plt.legend(title='Pred Class', loc='upper right')
    plt.show()


## Data Imports 

In [26]:
directory = '/Users/kristinagrigaityte/PycharmProjects/pulls/Files/BSModel/Predictions'
csv_files = [file for file in os.listdir(directory) if file.endswith('.csv')]

for file in csv_files:
    df_name = os.path.splitext(file)[0] 
    globals()[df_name] = pd.read_csv(os.path.join(directory, file))

In [27]:
categories = ['Selection', 'Balancing Selection', 'Negative Freq-Dep. Selection', 'Overdominance']
prefixes = ['recent', 'old', 'medium']
indexes = ['1', '2', '3', '3']
chromosomes = ['ch1', 'ch2', 'ch3', 'ch4', 'ch5', 'ch6', 'ch7', 'ch8', 'ch9', 'ch10',
               'ch11', 'ch12', 'ch13', 'ch14', 'ch15', 'ch16', 'ch17', 'ch18', 'ch19', 'ch20', 'ch21', 'ch22']

data_list = []

for chrom in chromosomes:
    count_dict = {'chromosome': chrom}
    
    for prefix in prefixes:
        for i, category in zip(indexes, categories):
            category_name_parts = category.split()
            formatted_category_name = category_name_parts[0].lower() + ''.join(word.capitalize() for word in category_name_parts[1:])
            df_name = f'prediction_{prefix}_{i}_{chrom}'
            df = globals()[df_name]
            
            selected_rows = df[df['PredClass'] == category]
            
            count = selected_rows.shape[0]
            count_dict[f'{prefix}_{formatted_category_name}'] = count
            
            if category != 'Overdominance':
                if count > 0:
                    avg_score = selected_rows['Pred'].mean()
                else:
                    avg_score = 0.0
                count_dict[f'{prefix}_{formatted_category_name}_avg'] = f"{avg_score:.3f}"
    
    data_list.append(count_dict)

counts_df = pd.DataFrame(data_list)
pd.set_option('display.max_columns', None)
counts_df

Unnamed: 0,chromosome,recent_selection,recent_selection_avg,recent_balancingSelection,recent_balancingSelection_avg,recent_negativeFreq-dep.Selection,recent_negativeFreq-dep.Selection_avg,recent_overdominance,old_selection,old_selection_avg,old_balancingSelection,old_balancingSelection_avg,old_negativeFreq-dep.Selection,old_negativeFreq-dep.Selection_avg,old_overdominance,medium_selection,medium_selection_avg,medium_balancingSelection,medium_balancingSelection_avg,medium_negativeFreq-dep.Selection,medium_negativeFreq-dep.Selection_avg,medium_overdominance
0,ch1,150,0.998,23,0.606,147,0.565,8583,150,0.997,0,0.0,150,0.655,8580,150,1.0,0,0.0,150,0.709,8580
1,ch2,1,0.985,0,0.0,1,0.526,9060,1,0.986,0,0.0,1,0.549,9060,1,0.998,0,0.0,1,0.575,9060
2,ch3,763,0.998,106,0.619,738,0.552,6633,763,0.998,10,0.602,763,0.628,6608,763,1.0,17,0.6,763,0.678,6608
3,ch4,454,0.999,58,0.594,453,0.573,5730,454,0.999,0,0.0,454,0.676,5729,454,1.0,0,0.0,454,0.736,5729
4,ch5,133,0.999,10,0.579,133,0.573,6339,133,0.999,0,0.0,133,0.672,6339,133,1.0,0,0.0,133,0.732,6339
5,ch6,1393,0.998,252,0.601,1375,0.562,5693,1393,0.999,1,0.52,1393,0.652,5675,1393,1.0,1,0.509,1393,0.704,5675
6,ch7,353,0.999,28,0.599,350,0.58,5583,353,0.999,0,0.0,353,0.687,5580,353,1.0,0,0.0,353,0.752,5580
7,ch8,145,0.994,20,0.567,144,0.546,5590,145,0.992,0,0.0,145,0.614,5589,145,0.999,0,0.0,145,0.662,5589
8,ch9,390,0.999,54,0.629,385,0.579,4579,390,0.999,1,0.509,390,0.684,4574,390,1.0,0,0.0,390,0.744,4574
9,ch10,429,0.999,74,0.615,425,0.584,5455,429,0.999,0,0.0,429,0.7,5451,429,1.0,0,0.0,429,0.759,5451


In [28]:
def plot_line_with_values(dataframe, prefix, legend_labels, colors, filename):
    relevant_columns = [col for col in dataframe.columns if col.startswith(prefix) and "_avg" in col]
    plot_data = dataframe[['chromosome'] + relevant_columns]
    
    plot_data.set_index('chromosome', inplace=True)
    plot_data = plot_data.apply(pd.to_numeric, errors='coerce')
    plot_data.dropna(subset=relevant_columns, how='all', inplace=True)
    
    ax = plot_data.plot(kind='line', figsize=(14, 7), color=colors, marker='o')
    plt.title(f'Line Plot for {prefix.capitalize()} Selection')
    plt.xlabel('Chromosome')
    plt.ylabel('Prediction Score')
    
    new_labels = [legend_labels.get(col, col) for col in relevant_columns]
    plt.legend(new_labels, title='Selection Type', bbox_to_anchor=(1.05, 1), loc='upper left')
    
    ax.set_xticks(range(len(plot_data.index)))
    ax.set_xticklabels(plot_data.index, rotation=45, ha='right')
    
    for line in ax.get_lines():
        for x, y in zip(line.get_xdata(), line.get_ydata()):
            if y == 0:
                ax.annotate(f'{y:.2f}', xy=(x, y), xytext=(5, 5), textcoords='offset points')

    plt.tight_layout()
    
    plt.tight_layout()
    plt.savefig(filename)
    plt.close()

legend_labels_avg = {
    'recent_selection_avg': 'Selection Prediction Average',
    'recent_balancingSelection_avg': 'Balancing Selection Prediction Average',
    'recent_negativeFreq-dep.Selection_avg': 'Negative Freq-dep. Selection Prediction Average',
    
    'old_selection_avg': 'Selection Prediction Average',
    'old_balancingSelection_avg': 'Balancing Selection Prediction Average',
    'old_negativeFreq-dep.Selection_avg': 'Negative Freq-dep. Selection Prediction Average',
    
    'medium_selection_avg': 'Selection Prediction Average',
    'medium_balancingSelection_avg': 'Balancing Selection Prediction Average',
    'medium_negativeFreq-dep.Selection_avg': 'Negative Freq-dep. Selection Prediction Average'
}

secure_blue = '#3357A4'
peaceful_mint = '#A0E0D8'
innovation_purple = '#879BEF'

colors_dict = {
    'old': [secure_blue, peaceful_mint, innovation_purple],
    'recent': [secure_blue, peaceful_mint, innovation_purple],
    'medium': [secure_blue, peaceful_mint, innovation_purple]
}

for prefix in ['old', 'recent', 'medium']:
    filename = f'{prefix}_selection_plot.png'
    plot_line_with_values(counts_df, prefix, legend_labels_avg, colors_dict[prefix], filename)
    plt.show()

In [29]:
def plot_stacked_bars(dataframe, prefix, legend_labels, colors, filename):
    relevant_columns = [col for col in dataframe.columns if col.startswith(prefix) and "avg" not in col]
    plot_data = dataframe[['chromosome'] + relevant_columns]

    plot_data.set_index('chromosome', inplace=True)

    ax = plot_data.plot(kind='bar', stacked=True, figsize=(14, 7), color=colors)
    plt.title(f'Stacked Bar Plot for {prefix.capitalize()} Selection')
    plt.xlabel('Chromosome')
    plt.ylabel('SNP Count')
    
    new_labels = [legend_labels.get(col, col) for col in relevant_columns]
    
    plt.legend(new_labels, title='Selection Type', bbox_to_anchor=(1.05, 1), loc='upper left')
    ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
    plt.tight_layout()
    
    plt.savefig(filename)
    plt.close()

legend_labels = {
    'recent_selection': 'Selection',
    'recent_balancingSelection': 'Balancing Selection',
    'recent_negativeFreq-dep.Selection': 'Negative Freq-dep. Selection',
    'recent_overdominance': 'Overdominance',
    
    'old_selection': 'Selection',
    'old_balancingSelection': 'Balancing Selection',
    'old_negativeFreq-dep.Selection': 'Negative Freq-dep. Selection',
    'old_overdominance': 'Overdominance',
    
    'medium_selection': 'Selection',
    'medium_balancingSelection': 'Balancing Selection',
    'medium_negativeFreq-dep.Selection': 'Negative Freq-dep. Selection',
    'medium_overdominance': 'Overdominance',
}

secure_blue = '#879BEF'
peaceful_mint = '#FFBD00'
innovation_purple = '#72bcd4'
navy = '#3357A4'

colors_dict = {
    'old': [secure_blue, peaceful_mint, innovation_purple, navy],
    'recent': [secure_blue, peaceful_mint, innovation_purple, navy],
    'medium': [secure_blue, peaceful_mint, innovation_purple, navy]
}

for prefix in ['old', 'recent', 'medium']:
    filename = f"{prefix}_stacked_bar_plot.png"
    plot_stacked_bars(counts_df, prefix, legend_labels, colors_dict[prefix], filename)

In [77]:
available_dfs = [var_name for var_name, var in globals().items() if isinstance(var, pd.DataFrame)]

print("Available Dataframes:")
# for df_name in available_dfs:
#     print(df_name)

Available Dataframes:


## Archaic Matches

In [31]:
archaic = pd.read_csv('/Users/kristinagrigaityte/PycharmProjects/pulls/Files/BSModel/Predictions/archaic.csv')
archaic.tail()

Unnamed: 0,chr,seg,from,to,AltaiNean,AltaiDeni
1594,22,103,49054049,49103380,0.5859,0.5636
1595,22,104,49103660,49153192,0.9831,0.9636
1596,22,105,49307031,49353474,0.1228,0.2586
1597,22,106,49404049,49450895,0.5225,0.5405
1598,22,107,49460591,49502569,0.7818,0.5862


In [54]:
def add_chr_column(df, chr_number):
    df = df.copy()
    df.loc[:, 'chr'] = chr_number
    return df

def get_df_name(prefix, chr_number):
    return f'{prefix}_ch{chr_number}'

prefixes = ['prediction_recent_1', 'prediction_recent_2', 'prediction_recent_3']
pred_classes = ["Selection", "Balancing Selection", "Overdominance", "Negative Freq-Dep. Selection"]
all_dfs = []

for chr_number in range(1, 23):   
    for prefix in prefixes:
        df_name = get_df_name(prefix, chr_number)
        try:
            df = globals()[df_name]
            for pred_class in pred_classes:
                filtered_df = df[df['PredClass'] == pred_class]
                if not filtered_df.empty:
                    filtered_df = add_chr_column(filtered_df, chr_number)
                    all_dfs.append(filtered_df)
        except KeyError:
            continue

all_data = pd.concat(all_dfs, ignore_index=True)
all_data.tail()

Unnamed: 0,SNP,Positions,Pred,PredClass,chr
123025,rs131718,49404789,0.586998,Negative Freq-Dep. Selection,22
123026,rs131715,49413787,0.585034,Negative Freq-Dep. Selection,22
123027,rs9616915,49464446,0.550545,Negative Freq-Dep. Selection,22
123028,rs739365,49487182,0.597457,Negative Freq-Dep. Selection,22
123029,rs6009951,49498216,0.574389,Negative Freq-Dep. Selection,22


In [55]:
selection_snps = all_data[all_data['PredClass'] == 'Selection']['SNP'].unique()
filtered_df = all_data[all_data['PredClass'].isin(['Balancing Selection', 'Overdominance', 'Negative Freq-Dep. Selection'])]
all_data = filtered_df[filtered_df['SNP'].isin(selection_snps)]
all_data.head()

Unnamed: 0,SNP,Positions,Pred,PredClass,chr
150,rs10927011,241645735,0.550709,Balancing Selection,1
151,rs692981,242232230,0.517401,Balancing Selection,1
152,rs1000543,242254223,0.581432,Balancing Selection,1
153,rs10927137,242355096,0.580353,Balancing Selection,1
154,rs913994,242381803,0.574044,Balancing Selection,1


### Genes

In [56]:
%%time

def find_snps_in_range(row, all_data):
    snps_in_range = all_data[
        (all_data['chr'] == row['chr']) & 
        (all_data['Positions'] >= row['from']) & 
        (all_data['Positions'] <= row['to'])
    ]
    grouped_snps = snps_in_range.groupby('PredClass').agg({'SNP': list}).reset_index()
    return grouped_snps

new_rows = []

for _, row in archaic.iterrows():
    snps_grouped = find_snps_in_range(row, all_data)
    for _, snp_group in snps_grouped.iterrows():
        new_rows.append([
            row['chr'], row['seg'], row['from'], row['to'], row['AltaiNean'], row['AltaiDeni'],
            snp_group['SNP'], snp_group['PredClass']
        ])

flattened_df = pd.DataFrame(new_rows, columns=['chr', 'seg', 'from', 'to', 'AltaiNean', 'AltaiDeni', 'SNPs', 'PredClass'])

CPU times: user 892 ms, sys: 3.86 ms, total: 896 ms
Wall time: 903 ms


In [58]:
flattened_df.tail()

Unnamed: 0,chr,seg,from,to,AltaiNean,AltaiDeni,SNPs,PredClass
171,22.0,102.0,49003874.0,49053529.0,0.1111,0.4744,"[rs742186, rs2235356]",Negative Freq-Dep. Selection
172,22.0,105.0,49307031.0,49353474.0,0.1228,0.2586,[rs140519],Balancing Selection
173,22.0,105.0,49307031.0,49353474.0,0.1228,0.2586,"[rs140519, rs131778]",Negative Freq-Dep. Selection
174,22.0,106.0,49404049.0,49450895.0,0.5225,0.5405,"[rs131718, rs131715]",Negative Freq-Dep. Selection
175,22.0,107.0,49460591.0,49502569.0,0.7818,0.5862,"[rs9616915, rs739365, rs6009951]",Negative Freq-Dep. Selection


In [65]:
filtered_df = flattened_df[(flattened_df['AltaiNean'] > 0.85)]
filtered_df.tail()

Unnamed: 0,chr,seg,from,to,AltaiNean,AltaiDeni,SNPs,PredClass
132,22.0,56.0,35654133.0,35703551.0,0.8596,0.8305,"[rs11089816, rs5750348]",Negative Freq-Dep. Selection
135,22.0,59.0,36103709.0,36133397.0,1.0,0.8636,"[rs4820286, rs5750428]",Negative Freq-Dep. Selection
146,22.0,81.0,44107968.0,44149935.0,0.9167,1.0,"[rs11704481, rs2064068, rs1044742, rs5764698]",Balancing Selection
147,22.0,81.0,44107968.0,44149935.0,0.9167,1.0,"[rs11704481, rs2064068, rs1044742, rs5764698]",Negative Freq-Dep. Selection
160,22.0,93.0,47432419.0,47445957.0,0.9107,0.963,"[rs5771906, rs6010568]",Negative Freq-Dep. Selection


In [72]:
%%time

def get_genes_and_consequence_for_snp(snp_id, chr_column):
    server = "https://grch37.rest.ensembl.org"
    ext = f"/variation/human/{snp_id}?content-type=application/json"
    r = requests.get(server + ext, headers={"Content-Type": "application/json"})
    
    if not r.ok:
        return [], None
    
    data = r.json()
    genes = []
    consequence = data.get("most_severe_consequence", "unknown")
    
    if 'mappings' in data:
        for mapping in data['mappings']:
            location = f"{chr_column}:{mapping['start']}-{mapping['end']}"
            ext = f"/overlap/region/human/{location}?feature=gene;content-type=application/json"
            r = requests.get(server + ext, headers={"Content-Type": "application/json"})
            
            if not r.ok:
                continue
            
            gene_data = r.json()
            
            for gene in gene_data:
                genes.append(gene['id'])   
    
    return list(set(genes)), consequence

def add_gene_info(row):
    all_genes = []
    consequences = []
    chr_column = str(int(row['chr']))  
    
    for snp in row['SNPs']:
        gene_ids, consequence = get_genes_and_consequence_for_snp(snp, chr_column)
        if gene_ids:
            all_genes.extend(gene_ids)
        consequences.append(consequence)
    
    return list(set(all_genes)), consequences

filtered_df = filtered_df.copy()
filtered_df[['Genes', 'Consequences']] = filtered_df.apply(
    lambda row: pd.Series(add_gene_info(row)), axis=1)

In [73]:
filtered_df.head()

Unnamed: 0,chr,seg,from,to,AltaiNean,AltaiDeni,SNPs,PredClass,Genes,Consequences,Proteins
9,3.0,98.0,189261797.0,189309846.0,0.9394,0.8154,"[rs2037184, rs6444269]",Negative Freq-Dep. Selection,[],"[intergenic_variant, intergenic_variant]",[]
10,3.0,100.0,191112052.0,191161035.0,1.0,0.8298,"[rs12696598, rs724438, rs4687112]",Negative Freq-Dep. Selection,[],"[intergenic_variant, regulatory_region_variant, regulatory_region_variant]",[]
12,3.0,103.0,191661171.0,191710630.0,0.9479,0.95,"[rs3943979, rs9857115, rs2042865]",Negative Freq-Dep. Selection,[],"[intergenic_variant, intergenic_variant, regulatory_region_variant]",[]
13,3.0,104.0,191711064.0,191755302.0,0.9818,1.0,"[rs2193880, rs3773989]",Negative Freq-Dep. Selection,[ENSG00000196083],"[intron_variant, intron_variant]","[[IL1RAP, interleukin 1 receptor accessory protein [Source:HGNC Symbol;Acc:5995]]]"
15,4.0,141.0,186117497.0,186164767.0,0.9268,0.775,[rs4355430],Negative Freq-Dep. Selection,[],[intergenic_variant],[]


### Proteins

In [74]:
%%time

def get_protein_info_for_gene(gene_id):
    server = "https://grch37.rest.ensembl.org"
    ext = f"/lookup/id/{gene_id}?content-type=application/json"
    r = requests.get(server + ext, headers={"Content-Type": "application/json"})
    
    if not r.ok:
        return None
    
    data = r.json()
    protein_name = data.get('display_name', 'Unknown')
    description = data.get('description', 'No description available')
    
    return [protein_name, description]

def add_protein_info(genes):
    protein_info = []
    for gene in genes:
        info = get_protein_info_for_gene(gene)
        if info:
            protein_info.append(info)
    return protein_info

filtered_df['Proteins'] = filtered_df['Genes'].apply(add_protein_info)

CPU times: user 1.21 s, sys: 56.6 ms, total: 1.26 s
Wall time: 6.34 s


In [75]:
filtered_df.to_csv("genes.csv", index=False)
filtered_df

Unnamed: 0,chr,seg,from,to,AltaiNean,AltaiDeni,SNPs,PredClass,Genes,Consequences,Proteins
9,3.0,98.0,189261797.0,189309846.0,0.9394,0.8154,"[rs2037184, rs6444269]",Negative Freq-Dep. Selection,[],"[intergenic_variant, intergenic_variant]",[]
10,3.0,100.0,191112052.0,191161035.0,1.0,0.8298,"[rs12696598, rs724438, rs4687112]",Negative Freq-Dep. Selection,[],"[intergenic_variant, regulatory_region_variant, regulatory_region_variant]",[]
12,3.0,103.0,191661171.0,191710630.0,0.9479,0.95,"[rs3943979, rs9857115, rs2042865]",Negative Freq-Dep. Selection,[],"[intergenic_variant, intergenic_variant, regulatory_region_variant]",[]
13,3.0,104.0,191711064.0,191755302.0,0.9818,1.0,"[rs2193880, rs3773989]",Negative Freq-Dep. Selection,[ENSG00000196083],"[intron_variant, intron_variant]","[[IL1RAP, interleukin 1 receptor accessory protein [Source:HGNC Symbol;Acc:5995]]]"
15,4.0,141.0,186117497.0,186164767.0,0.9268,0.775,[rs4355430],Negative Freq-Dep. Selection,[],[intergenic_variant],[]
17,6.0,104.0,143867336.0,143897977.0,0.9661,0.1379,[rs2128977],Negative Freq-Dep. Selection,[],[regulatory_region_variant],[]
18,6.0,110.0,156898474.0,156946230.0,0.8947,0.9459,[rs288976],Balancing Selection,[],[intergenic_variant],[]
19,6.0,110.0,156898474.0,156946230.0,0.8947,0.9459,[rs288976],Negative Freq-Dep. Selection,[],[intergenic_variant],[]
20,6.0,112.0,159198260.0,159247967.0,0.9672,0.2143,"[rs9457507, rs9457511, rs12200537]",Negative Freq-Dep. Selection,[ENSG00000203711],"[intron_variant, intron_variant, intron_variant]","[[C6orf99, chromosome 6 open reading frame 99 [Source:HGNC Symbol;Acc:21179]]]"
23,6.0,115.0,161248552.0,161291617.0,0.9286,0.9032,[rs1962358],Negative Freq-Dep. Selection,[],[intergenic_variant],[]


In [79]:
def collect_protein_names(df):
    protein_data = []

    for index, row in df.iterrows():
        pred_class = row['PredClass']
        proteins = row['Proteins']
        
        for protein in proteins:
            protein_name = protein[0]
            protein_data.append((pred_class, protein_name))

    return protein_data

protein_data = collect_protein_names(filtered_df)
protein_df = pd.DataFrame(protein_data, columns=['PredClass', 'Protein'])
result_df = protein_df.groupby(['PredClass', 'Protein']).size()
pd.set_option('display.max_rows', None)
result_df

PredClass                     Protein      
Balancing Selection           AK1              1
                              FAM118A          1
                              RP11-203J24.9    1
                              SMC1B            1
                              TPP2             1
Negative Freq-Dep. Selection  ADAM12           1
                              AK1              1
                              AL035610.1       1
                              AL035610.2       1
                              C6orf99          1
                              CTD-2571L23.6    1
                              ELFN2            1
                              FAM118A          1
                              FAM19A5          1
                              IL1RAP           1
                              PACRG            1
                              RP1-63G5.5       1
                              RP11-203J24.9    1
                              RP11-648K4.2     1
                         

In [80]:
proteins = pd.read_csv('/Users/kristinagrigaityte/PycharmProjects/pulls/Files/BSModel/Predictions/proteins.csv')
proteins

Unnamed: 0,Protein,Function
0,AK1,Involved in the regulation of adenylate kinase activity and energy metabolism.
1,FAM118A,Potential role in cellular processes and disease mechanisms.
2,SMC1B,"Part of the cohesin complex, important for chromosome segregation during meiosis."
3,SLC39A11,Zinc transporter involved in maintaining zinc homeostasis.
4,RP11-648K4.2,"Non-coding RNA, function not well understood."
5,RP11-203J24.9,"Non-coding RNA, potential regulatory roles."
6,RP1-63G5.5,"Non-coding RNA, function not well understood."
7,PACRG,"Associated with Parkinson’s disease, involved in microtubule stability."
8,IL1RAP,"Interleukin-1 receptor accessory protein, involved in inflammatory response."
9,FAM19A5,Potentially involved in neural development and function.
