In [None]:
import numpy as np
import pandas as pd
from scipy import stats
from sklearn.linear_model import ElasticNet

In [None]:
dataset = 'cg'

gene_index = pd.read_csv('../data/gene2ind_' + dataset + '.txt', sep='\t', header=None, names=(['I', 'G']))
gene_list = gene_index['G']

cell_index = pd.read_csv('../data/cell2ind_' + dataset + '.txt', sep='\t', header=None, names=(['I', 'C']))
cell_map = dict(zip(cell_index['C'], cell_index['I']))

cell_features = pd.read_csv('../data/cell2mutation_' + dataset + '.txt', header=None, names=gene_list)

drug_index = pd.read_csv('../data/drug2ind_' + dataset + '.txt', sep='\t', header=None, names=(['I', 'D']))
drug_map = dict(zip(drug_index['D'], drug_index['I']))

drug_features = pd.read_csv('../data/drug2fingerprint_' + dataset + '.txt', header=None)

In [None]:
def prepare_data(train_df, test_df):

    train_Y = np.array(train_df['auc'])

    train_X = np.empty(shape = (len(train_df), len(gene_list) + len(drug_features.columns)))
    test_X = np.empty(shape = (len(test_df), len(gene_list) + len(drug_features.columns)))

    for i, row in train_df.iterrows():
        temp = []
        temp = np.append(temp, np.array(cell_features.iloc[int(cell_map[row['cell']])]))
        temp = np.append(temp, np.array(drug_features.iloc[int(drug_map[row['drug']])]))
        train_X[i] = temp

    for i, row in test_df.iterrows():
        temp = []
        temp = np.append(temp, np.array(cell_features.iloc[int(cell_map[row['cell']])]))
        temp = np.append(temp, np.array(drug_features.iloc[int(drug_map[row['drug']])]))
        test_X[i] = temp
        
    return train_X, train_Y, test_X

In [None]:
def run_elastic_net(dataset, ont):
    
    avg_corr = 0.0
    for i in range(1, 6):

        train_df = pd.read_csv("../data/" + str(i) + "_drugcell_train_" + dataset + ".txt", sep='\t', header=None, names=(['cell', 'drug', 'auc']))
        test_df = pd.read_csv("../data/" + str(i) + "_drugcell_test_" + dataset + ".txt", sep='\t', header=None, names=['cell', 'drug', 'auc'])
        train_X, train_Y, test_X = prepare_data(train_df, test_df)
        
        regr = ElasticNet(alpha=0.1, tol=1e-5)
        regr.fit(train_X, train_Y)
        predicted_Y = regr.predict(test_X)
        
        sm_corr = stats.spearmanr(predicted_Y, test_df['auc'])[0]
        avg_corr += sm_corr
        
        print(str(i), sm_corr)
        np.savetxt("../result/" + str(i) + "_predict_elastic_net_" + ont + ".txt", predicted_Y, fmt = '%.4e')
        
    return avg_corr/5

In [None]:
ont = "cg"

avg_corr = run_elastic_net(dataset, ont)
print(avg_corr)