In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import ElasticNetCV

In [None]:
train_df = pd.read_csv('../data/1_drugcell_test.txt', sep='\t', header=None, names=(['cell', 'drug', 'auc']))
test_df = pd.read_csv('../data/2_drugcell_test.txt', sep='\t', header=None, names=['cell', 'drug', 'auc'])

gene_index = pd.read_csv('../data/gene2ind.txt', sep='\t', header=None, names=(['I', 'G']))
gene_list = gene_index['G']

cell_index = pd.read_csv('../data/cell2ind.txt', sep='\t', header=None, names=(['I', 'C']))
cell_map = dict(zip(cell_index['C'], cell_index['I']))

cell_features = pd.read_csv('../data/cell2mutation.txt', header=None, names=gene_list)

drug_index = pd.read_csv('../data/drug2ind.txt', sep='\t', header=None, names=(['I', 'D']))
drug_map = dict(zip(drug_index['D'], drug_index['I']))

drug_features = pd.read_csv('../data/drug2fingerprint.txt', header=None)

In [None]:
train_Y = np.array(train_df['auc'])

train_X = np.empty(shape = (len(train_df), len(gene_list) + len(drug_features.columns)))
test_X = np.empty(shape = (len(test_df), len(gene_list) + len(drug_features.columns)))

for i, row in train_df.iterrows():
    temp = []
    temp = np.append(temp, np.array(cell_features.iloc[int(cell_map[row['cell']])]))
    temp = np.append(temp, np.array(drug_features.iloc[int(drug_map[row['drug']])]))
    train_X[i] = temp

for i, row in test_df.iterrows():
    temp = []
    temp = np.append(temp, np.array(cell_features.iloc[int(cell_map[row['cell']])]))
    temp = np.append(temp, np.array(drug_features.iloc[int(drug_map[row['drug']])]))
    test_X[i] = temp

In [None]:
regr = ElasticNetCV(fit_intercept=True, cv=5, max_iter=3000, tol=1e-3, n_jobs=-2)
regr.fit(train_X, train_Y)
predicted_Y = regr.predict(test_X)

In [None]:
np.savetxt('../result/elastic_net.predict', predicted_Y, fmt = '%.4e')

In [None]:
print(predicted_Y)

In [None]:
mutation_count = np.count_nonzero(train_X == 1, axis=1)
mutation_count