In [6]:
import pandas as pd

#----------------- UV Score -----------------#
def map_uv(timepoint):
    '''Assign timepoints to UV scores. '''
    if timepoint=='mock':
        return -4
    elif timepoint=='01h':
        return 1
    elif timepoint=='24h':
        return 2
    else:
        return 3

def uv_score(uv):
    ''' Calculate UV score based on the timepoints of UV treatment.'''
    uv_list = list(set(uv.split(':')))
    return sum(map(map_uv, uv_list))

#----------------- Compute Score -----------------#
def compute_score(row):
    '''
    Compute weighted score for protein selection based on article count (favor fewer), 
    number of referenced antibodies, interactions GO score, and UV score.
    '''

    # late more important, mock less important ercc6 rated high by score, 
    # present at 3 timepoints, mock is one of them (fix uv score)
    # any term present gets increase score if found in abstract

    score = row['Total Article Count (normalized)']*-1 
    + row['Term Article Count (normalized)'] 
    + row['Number of Ref-ed Antibodies (normalized)']*0.5
    + row['Interactions (normalized)'] 
    + row['GO Score (normalized)'] 
    + row['UV Score (normalized)']
    return score

def final_score(final_output_csv):
    df = pd.read_csv('final-scores-3.csv')
    article_df = pd.read_csv('combined-scores.csv')
    df_with_uv = pd.read_csv('proteins_with_uv.csv')

    df['Total Article Count'] = article_df['Article Count']
    df['UV Score'] = df_with_uv['UV_treatment'].apply(lambda x: uv_score(x))
    df.rename(columns={'Article Count': 'Term Article Count'}, inplace=True)

    for col in ['Number of Ref-ed Antibodies', 'Total Article Count', 'Term Article Count', 'Interactions', 'GO Score', 'UV Score']:
        df[col] =pd.to_numeric(df[col])
        df[f'{col} (normalized)'] = df[col]/df[col].std()

    # Compute weighted score for protein selection and save results to CSV
    df['Overall Score'] = df.apply(lambda x: compute_score(x), axis=1)
    cols = ['Uniprot', 'Protein Symbol','Antibody Link', 'Number of Antibodies', 'Number of Providers', 'Number of Ref-ed Antibodies',
            'Total Article Count', 'Interactions', 'GO Score', 'UV Score','Overall Score', 'GO Terms', 'Term Article Count']
    df = df[cols]
    df= df[['Uniprot', 'Protein Symbol','Antibody Link', 'Number of Antibodies', 'Number of Providers', 'Number of Ref-ed Antibodies',
            'Total Article Count', 'Term Article Count', 'Interactions', 'GO Score', 'UV Score','Overall Score', 'GO Terms']]
    df.to_csv(final_output_csv)
    print('Protein selection data saved to', final_output_csv)

final_score('final-scores-4.csv')


Protein selection data saved to final-scores-4.csv
