## Import necessary libraries

In [1]:
from collections import defaultdict
import os
import pickle
import sys
import timeit

import numpy as np

from rdkit import Chem
from rdkit.Chem import rdDepictor, Descriptors
from rdkit.Chem import MACCSkeys

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from sklearn.ensemble import RandomForestClassifier
from sklearn import linear_model
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import make_classification
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, auc, roc_curve

from sklearn.exceptions import UndefinedMetricWarning

def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

#### Check if CUDA is available

In [2]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

## Helper functions

In [3]:
# dictionary of atoms where a new element gets a new index
def create_atoms(mol):
    atoms = [atom_dict[a.GetSymbol()] for a in mol.GetAtoms()]
    return np.array(atoms)

# format from_atomIDx : [to_atomIDx, bondDict]
def create_ijbonddict(mol):
    i_jbond_dict = defaultdict(lambda: [])
    for b in mol.GetBonds():
        i, j = b.GetBeginAtomIdx(), b.GetEndAtomIdx()
        bond = bond_dict[str(b.GetBondType())]
        i_jbond_dict[i].append((j, bond))
        i_jbond_dict[j].append((i, bond))
    return i_jbond_dict


def create_fingerprints(atoms, i_jbond_dict, radius):
    """Extract the r-radius subgraphs (i.e., fingerprints)
    from a molecular graph using WeisfeilerLehman-like algorithm."""

    if (len(atoms) == 1) or (radius == 0):
        fingerprints = [fingerprint_dict[a] for a in atoms]

    else:
        vertices = atoms
        for _ in range(radius):
            fingerprints = []
            for i, j_bond in i_jbond_dict.items():
                neighbors = [(vertices[j], bond) for j, bond in j_bond]
                fingerprint = (vertices[i], tuple(sorted(neighbors)))
                fingerprints.append(fingerprint_dict[fingerprint])
            vertices = fingerprints

    return np.array(fingerprints)


def create_adjacency(mol):
    adjacency  = Chem.GetAdjacencyMatrix(mol)
    n          = adjacency.shape[0]

    adjacency  = adjacency + np.eye(n)
    degree     = sum(adjacency)
    d_half     = np.sqrt(np.diag(degree))
    d_half_inv = np.linalg.inv(d_half)
    adjacency  = np.matmul(d_half_inv,np.matmul(adjacency,d_half_inv))
    return np.array(adjacency)


def dump_dictionary(dictionary, file_name):
    with open(file_name, 'wb') as f:
        pickle.dump(dict(dictionary), f)


def load_tensor(file_name, dtype):
    return [dtype(d).to(device) for d in np.load(file_name + '.npy', allow_pickle=True)]


def load_numpy(file_name):
    return np.load(file_name + '.npy', allow_pickle=True)


def load_pickle(file_name):
    with open(file_name, 'rb') as f:
        return pickle.load(f)


def shuffle_dataset(dataset, seed):
    np.random.seed(seed)
    np.random.shuffle(dataset)
    return dataset


def split_dataset(dataset, ratio):
    n = int(ratio * len(dataset))
    dataset_1, dataset_2 = dataset[:n], dataset[n:]
    return dataset_1, dataset_2

## Data processing

In [4]:
radius = 2

with open('kegg_classes.txt', 'r') as f:
    data_list = f.read().strip().split('\n')

"""Exclude the data contains "." in the smiles, which correspond to non-bonds"""
data_list = list(filter(lambda x: '.' not in x.strip().split()[0], data_list))
N = len(data_list)

print('Total number of molecules : %d' %(N))

atom_dict = defaultdict(lambda: len(atom_dict))
bond_dict = defaultdict(lambda: len(bond_dict))
fingerprint_dict = defaultdict(lambda: len(fingerprint_dict))

Molecules, Adjacencies, Properties, MACCS_list = [], [], [], []

max_MolMR, min_MolMR     = -1000, 1000
max_MolLogP, min_MolLogP = -1000, 1000
max_MolWt, min_MolWt     = -1000, 1000
max_NumRotatableBonds, min_NumRotatableBonds = -1000, 1000
max_NumAliphaticRings, min_NumAliphaticRings = -1000, 1000
max_NumAromaticRings, min_NumAromaticRings   = -1000, 1000
max_NumSaturatedRings, min_NumSaturatedRings = -1000, 1000

for no, data in enumerate(data_list):

    print('/'.join(map(str, [no+1, N])))

    smiles, property_indices = data.strip().split('\t')
    property_s = property_indices.strip().split(',')

    property = np.zeros((1,11))
    for prop in property_s:
        property[0,int(prop)] = 1

    Properties.append(property)

    mol = Chem.MolFromSmiles(smiles)
    atoms = create_atoms(mol)
    i_jbond_dict = create_ijbonddict(mol)

    fingerprints = create_fingerprints(atoms, i_jbond_dict, radius)
    Molecules.append(fingerprints)

    adjacency = create_adjacency(mol)
    Adjacencies.append(adjacency)

    MACCS         = MACCSkeys.GenMACCSKeys(Chem.MolFromSmiles(smiles))
    MACCS_ids     = np.zeros((20,))
    MACCS_ids[0]  = Descriptors.MolMR(mol)
    MACCS_ids[1]  = Descriptors.MolLogP(mol)
    MACCS_ids[2]  = Descriptors.MolWt(mol)
    MACCS_ids[3]  = Descriptors.NumRotatableBonds(mol)
    MACCS_ids[4]  = Descriptors.NumAliphaticRings(mol)
    MACCS_ids[5]  = MACCS[108]
    MACCS_ids[6]  = Descriptors.NumAromaticRings(mol)
    MACCS_ids[7]  = MACCS[98]
    MACCS_ids[8]  = Descriptors.NumSaturatedRings(mol)
    MACCS_ids[9]  = MACCS[137]
    MACCS_ids[10] = MACCS[136]
    MACCS_ids[11] = MACCS[145]
    MACCS_ids[12] = MACCS[116]
    MACCS_ids[13] = MACCS[141]
    MACCS_ids[14] = MACCS[89]
    MACCS_ids[15] = MACCS[50]
    MACCS_ids[16] = MACCS[160]
    MACCS_ids[17] = MACCS[121]
    MACCS_ids[18] = MACCS[149]
    MACCS_ids[19] = MACCS[161]

    if max_MolMR < MACCS_ids[0]:
        max_MolMR = MACCS_ids[0]
    if min_MolMR > MACCS_ids[0]:
        min_MolMR = MACCS_ids[0]

    if max_MolLogP < MACCS_ids[1]:
        max_MolLogP = MACCS_ids[1]
    if min_MolLogP > MACCS_ids[1]:
        min_MolLogP = MACCS_ids[1]

    if max_MolWt < MACCS_ids[2]:
        max_MolWt = MACCS_ids[2]
    if min_MolWt > MACCS_ids[2]:
        min_MolWt = MACCS_ids[2]

    if max_NumRotatableBonds < MACCS_ids[3]:
        max_NumRotatableBonds = MACCS_ids[3]
    if min_NumRotatableBonds > MACCS_ids[3]:
        min_NumRotatableBonds = MACCS_ids[3]

    if max_NumAliphaticRings < MACCS_ids[4]:
        max_NumAliphaticRings = MACCS_ids[4]
    if min_NumAliphaticRings > MACCS_ids[4]:
        min_NumAliphaticRings = MACCS_ids[4]

    if max_NumAromaticRings < MACCS_ids[6]:
        max_NumAromaticRings = MACCS_ids[6]
    if min_NumAromaticRings > MACCS_ids[6]:
        min_NumAromaticRings = MACCS_ids[6]

    if max_NumSaturatedRings < MACCS_ids[8]:
        max_NumSaturatedRings = MACCS_ids[8]
    if min_NumSaturatedRings > MACCS_ids[8]:
        min_NumSaturatedRings = MACCS_ids[8]

    MACCS_list.append(MACCS_ids)

dir_input = ('pathway/input'+str(radius)+'/')
os.makedirs(dir_input, exist_ok=True)

for n in range(N):
    for b in range(20):
        if b==0:
            MACCS_list[n][b] = (MACCS_list[n][b]-min_MolMR)/(max_MolMR-min_MolMR)
        elif b==1:
            MACCS_list[n][b] = (MACCS_list[n][b]-min_MolLogP)/(max_MolMR-min_MolLogP)
        elif b==2:
            MACCS_list[n][b] = (MACCS_list[n][b]-min_MolWt)/(max_MolMR-min_MolWt)
        elif b==3:
            MACCS_list[n][b] = (MACCS_list[n][b]-min_NumRotatableBonds)/(max_MolMR-min_NumRotatableBonds)
        elif b==4:
            MACCS_list[n][b] = (MACCS_list[n][b]-min_NumAliphaticRings)/(max_MolMR-min_NumAliphaticRings)
        elif b==6:
            MACCS_list[n][b] = (MACCS_list[n][b]-min_NumAromaticRings)/(max_MolMR-min_NumAromaticRings)
        elif b==8:
            MACCS_list[n][b] = (MACCS_list[n][b]-min_NumSaturatedRings)/(max_NumSaturatedRings-min_NumSaturatedRings)

np.save(dir_input + 'molecules', Molecules)
np.save(dir_input + 'adjacencies', Adjacencies)
np.save(dir_input + 'properties', Properties)
np.save(dir_input + 'maccs', np.asarray(MACCS_list))

dump_dictionary(fingerprint_dict, dir_input + 'fingerprint_dict.pickle')

print('The preprocess has finished!')

Total number of molecules : 4935
1/4935
2/4935
3/4935
4/4935
5/4935
6/4935
7/4935
8/4935
9/4935
10/4935
11/4935
12/4935
13/4935
14/4935
15/4935
16/4935
17/4935
18/4935
19/4935
20/4935
21/4935
22/4935
23/4935
24/4935
25/4935
26/4935
27/4935
28/4935
29/4935
30/4935
31/4935
32/4935
33/4935
34/4935
35/4935
36/4935
37/4935
38/4935
39/4935
40/4935
41/4935
42/4935
43/4935
44/4935
45/4935
46/4935
47/4935
48/4935
49/4935
50/4935
51/4935
52/4935
53/4935
54/4935
55/4935
56/4935
57/4935
58/4935
59/4935
60/4935
61/4935
62/4935
63/4935
64/4935
65/4935
66/4935
67/4935
68/4935
69/4935
70/4935
71/4935
72/4935
73/4935
74/4935
75/4935
76/4935
77/4935
78/4935
79/4935
80/4935
81/4935
82/4935
83/4935
84/4935
85/4935
86/4935
87/4935
88/4935
89/4935
90/4935
91/4935
92/4935
93/4935
94/4935
95/4935
96/4935
97/4935
98/4935
99/4935
100/4935
101/4935
102/4935
103/4935
104/4935
105/4935
106/4935
107/4935
108/4935
109/4935
110/4935
111/4935
112/4935
113/4935
114/4935
115/4935
116/4935
117/4935
118/4935
119/4935
120/



366/4935
367/4935
368/4935
369/4935
370/4935
371/4935
372/4935
373/4935
374/4935
375/4935
376/4935
377/4935
378/4935
379/4935
380/4935
381/4935
382/4935
383/4935
384/4935
385/4935
386/4935
387/4935
388/4935
389/4935
390/4935
391/4935
392/4935
393/4935
394/4935
395/4935
396/4935
397/4935
398/4935
399/4935
400/4935
401/4935
402/4935
403/4935
404/4935
405/4935
406/4935
407/4935
408/4935
409/4935
410/4935
411/4935
412/4935
413/4935
414/4935
415/4935
416/4935
417/4935
418/4935
419/4935
420/4935
421/4935
422/4935
423/4935
424/4935
425/4935
426/4935
427/4935
428/4935
429/4935
430/4935
431/4935
432/4935
433/4935
434/4935
435/4935
436/4935
437/4935
438/4935
439/4935
440/4935
441/4935
442/4935
443/4935
444/4935
445/4935
446/4935
447/4935
448/4935
449/4935
450/4935
451/4935
452/4935
453/4935
454/4935
455/4935
456/4935
457/4935
458/4935
459/4935
460/4935
461/4935
462/4935
463/4935
464/4935
465/4935
466/4935
467/4935
468/4935
469/4935
470/4935
471/4935
472/4935
473/4935
474/4935
475/4935
476/4935
4



564/4935
565/4935
566/4935
567/4935
568/4935
569/4935
570/4935
571/4935
572/4935
573/4935
574/4935
575/4935
576/4935
577/4935
578/4935
579/4935
580/4935
581/4935
582/4935
583/4935
584/4935
585/4935
586/4935
587/4935
588/4935
589/4935
590/4935
591/4935
592/4935
593/4935
594/4935
595/4935
596/4935
597/4935
598/4935
599/4935
600/4935
601/4935
602/4935
603/4935
604/4935
605/4935
606/4935
607/4935
608/4935
609/4935
610/4935
611/4935
612/4935
613/4935
614/4935
615/4935
616/4935
617/4935
618/4935
619/4935
620/4935
621/4935
622/4935
623/4935
624/4935
625/4935
626/4935
627/4935
628/4935
629/4935
630/4935
631/4935
632/4935
633/4935
634/4935
635/4935
636/4935
637/4935
638/4935
639/4935
640/4935
641/4935
642/4935
643/4935
644/4935
645/4935
646/4935
647/4935
648/4935
649/4935
650/4935
651/4935
652/4935
653/4935
654/4935
655/4935
656/4935
657/4935
658/4935
659/4935
660/4935
661/4935
662/4935
663/4935
664/4935
665/4935
666/4935
667/4935
668/4935
669/4935
670/4935
671/4935
672/4935
673/4935
674/4935
6

1443/4935
1444/4935
1445/4935
1446/4935
1447/4935
1448/4935
1449/4935
1450/4935
1451/4935
1452/4935
1453/4935
1454/4935
1455/4935
1456/4935
1457/4935
1458/4935
1459/4935
1460/4935
1461/4935
1462/4935
1463/4935
1464/4935
1465/4935
1466/4935
1467/4935
1468/4935
1469/4935
1470/4935
1471/4935
1472/4935
1473/4935
1474/4935
1475/4935
1476/4935
1477/4935
1478/4935
1479/4935
1480/4935
1481/4935
1482/4935
1483/4935
1484/4935
1485/4935
1486/4935
1487/4935
1488/4935
1489/4935
1490/4935
1491/4935
1492/4935
1493/4935
1494/4935
1495/4935
1496/4935
1497/4935
1498/4935
1499/4935
1500/4935
1501/4935
1502/4935
1503/4935
1504/4935
1505/4935
1506/4935
1507/4935
1508/4935
1509/4935
1510/4935
1511/4935
1512/4935
1513/4935
1514/4935
1515/4935
1516/4935
1517/4935
1518/4935
1519/4935
1520/4935
1521/4935
1522/4935
1523/4935
1524/4935
1525/4935
1526/4935
1527/4935
1528/4935
1529/4935
1530/4935
1531/4935
1532/4935
1533/4935
1534/4935
1535/4935
1536/4935
1537/4935
1538/4935
1539/4935
1540/4935
1541/4935
1542/4935


2281/4935
2282/4935
2283/4935
2284/4935
2285/4935
2286/4935
2287/4935
2288/4935
2289/4935
2290/4935
2291/4935
2292/4935
2293/4935
2294/4935
2295/4935
2296/4935
2297/4935
2298/4935
2299/4935
2300/4935
2301/4935
2302/4935
2303/4935
2304/4935
2305/4935
2306/4935
2307/4935
2308/4935
2309/4935
2310/4935
2311/4935
2312/4935
2313/4935
2314/4935
2315/4935
2316/4935
2317/4935
2318/4935
2319/4935
2320/4935
2321/4935
2322/4935
2323/4935
2324/4935
2325/4935
2326/4935
2327/4935
2328/4935
2329/4935
2330/4935
2331/4935
2332/4935
2333/4935
2334/4935
2335/4935
2336/4935
2337/4935
2338/4935
2339/4935
2340/4935
2341/4935
2342/4935
2343/4935
2344/4935
2345/4935
2346/4935
2347/4935
2348/4935
2349/4935
2350/4935
2351/4935
2352/4935
2353/4935
2354/4935
2355/4935
2356/4935
2357/4935
2358/4935
2359/4935
2360/4935
2361/4935
2362/4935
2363/4935
2364/4935
2365/4935
2366/4935
2367/4935
2368/4935
2369/4935
2370/4935
2371/4935
2372/4935
2373/4935
2374/4935
2375/4935
2376/4935
2377/4935
2378/4935
2379/4935
2380/4935


3116/4935
3117/4935
3118/4935
3119/4935
3120/4935
3121/4935
3122/4935
3123/4935
3124/4935
3125/4935
3126/4935
3127/4935
3128/4935
3129/4935
3130/4935
3131/4935
3132/4935
3133/4935
3134/4935
3135/4935
3136/4935
3137/4935
3138/4935
3139/4935
3140/4935
3141/4935
3142/4935
3143/4935
3144/4935
3145/4935
3146/4935
3147/4935
3148/4935
3149/4935
3150/4935
3151/4935
3152/4935
3153/4935
3154/4935
3155/4935
3156/4935
3157/4935
3158/4935
3159/4935
3160/4935
3161/4935
3162/4935
3163/4935
3164/4935
3165/4935
3166/4935
3167/4935
3168/4935
3169/4935
3170/4935
3171/4935
3172/4935
3173/4935
3174/4935
3175/4935
3176/4935
3177/4935
3178/4935
3179/4935
3180/4935
3181/4935
3182/4935
3183/4935
3184/4935
3185/4935
3186/4935
3187/4935
3188/4935
3189/4935
3190/4935
3191/4935
3192/4935
3193/4935
3194/4935
3195/4935
3196/4935
3197/4935
3198/4935
3199/4935
3200/4935
3201/4935
3202/4935
3203/4935
3204/4935
3205/4935
3206/4935
3207/4935
3208/4935
3209/4935
3210/4935
3211/4935
3212/4935
3213/4935
3214/4935
3215/4935


3945/4935
3946/4935
3947/4935
3948/4935
3949/4935
3950/4935
3951/4935
3952/4935
3953/4935
3954/4935
3955/4935
3956/4935
3957/4935
3958/4935
3959/4935
3960/4935
3961/4935
3962/4935
3963/4935
3964/4935
3965/4935
3966/4935
3967/4935
3968/4935
3969/4935
3970/4935
3971/4935
3972/4935
3973/4935
3974/4935
3975/4935
3976/4935
3977/4935
3978/4935
3979/4935
3980/4935
3981/4935
3982/4935
3983/4935
3984/4935
3985/4935
3986/4935
3987/4935
3988/4935
3989/4935
3990/4935
3991/4935
3992/4935
3993/4935
3994/4935
3995/4935
3996/4935
3997/4935
3998/4935
3999/4935
4000/4935
4001/4935
4002/4935
4003/4935
4004/4935
4005/4935
4006/4935
4007/4935
4008/4935
4009/4935
4010/4935
4011/4935
4012/4935
4013/4935
4014/4935
4015/4935
4016/4935
4017/4935
4018/4935
4019/4935
4020/4935
4021/4935
4022/4935
4023/4935
4024/4935
4025/4935
4026/4935
4027/4935
4028/4935
4029/4935
4030/4935
4031/4935
4032/4935
4033/4935
4034/4935
4035/4935
4036/4935
4037/4935
4038/4935
4039/4935
4040/4935
4041/4935
4042/4935
4043/4935
4044/4935


4773/4935
4774/4935
4775/4935
4776/4935
4777/4935
4778/4935
4779/4935
4780/4935
4781/4935
4782/4935
4783/4935
4784/4935
4785/4935
4786/4935
4787/4935
4788/4935
4789/4935
4790/4935
4791/4935
4792/4935
4793/4935
4794/4935
4795/4935
4796/4935
4797/4935
4798/4935
4799/4935
4800/4935
4801/4935
4802/4935
4803/4935
4804/4935
4805/4935
4806/4935
4807/4935
4808/4935
4809/4935
4810/4935
4811/4935
4812/4935
4813/4935
4814/4935
4815/4935
4816/4935
4817/4935
4818/4935
4819/4935
4820/4935
4821/4935
4822/4935
4823/4935
4824/4935
4825/4935
4826/4935
4827/4935
4828/4935
4829/4935
4830/4935
4831/4935
4832/4935
4833/4935
4834/4935
4835/4935
4836/4935
4837/4935
4838/4935
4839/4935
4840/4935
4841/4935
4842/4935
4843/4935
4844/4935
4845/4935
4846/4935
4847/4935
4848/4935
4849/4935
4850/4935
4851/4935
4852/4935
4853/4935
4854/4935
4855/4935
4856/4935
4857/4935
4858/4935
4859/4935
4860/4935
4861/4935
4862/4935
4863/4935
4864/4935
4865/4935
4866/4935
4867/4935
4868/4935
4869/4935
4870/4935
4871/4935
4872/4935


  return array(a, dtype, copy=False, order=order, subok=True)


The preprocess has finished!


## Load and create dataset

In [5]:
dir_input = ('pathway/input'+str(radius)+'/')

molecules  = load_tensor(dir_input + 'molecules', torch.FloatTensor)
properties = load_numpy(dir_input + 'properties')
maccs      = load_numpy(dir_input + 'maccs')


with open(dir_input + 'fingerprint_dict.pickle', 'rb') as f:
    fingerprint_dict = pickle.load(f)

fingerprint_dict = load_pickle(dir_input + 'fingerprint_dict.pickle')
unknown          = 100
n_fingerprint    = len(fingerprint_dict) + unknown

my_maccs = []
for i in range(len(molecules)):
    target_mol = (n_fingerprint-1)*torch.ones([259], dtype=torch.float, device=device)
    target_mol[:molecules[i].size()[0]] = molecules[i]
    my_maccs.append(np.concatenate((target_mol.cpu().data.numpy(),maccs[i]), axis=0))

dataset = list(zip(properties, my_maccs))
dataset = shuffle_dataset(dataset, 4123)
dataset_train, dataset_   = split_dataset(dataset, 0.8)
dataset_dev, dataset_test = split_dataset(dataset_, 0.5)


data_batch = list(zip(*dataset_train))
properties_train, maccs_train = data_batch[-2], data_batch[-1]

data_batch = list(zip(*dataset_dev))
properties_dev, maccs_dev = data_batch[-2], data_batch[-1]

data_batch = list(zip(*dataset_test))
properties_test, maccs_test = data_batch[-2], data_batch[-1]

train_len, dev_len, test_len = len(dataset_train), len(dataset_dev), len(dataset_test)

feature_len = maccs_train[0].shape[0]

X_train, X_dev, X_test = np.zeros((train_len,feature_len)), np.zeros((dev_len,feature_len)), np.zeros((test_len,feature_len))
Y_train, Y_dev, Y_test = np.zeros((train_len,11)), np.zeros((dev_len,11)), np.zeros((test_len,11))

for i in range(train_len):
    X_train[i,:] = maccs_train[i]
    Y_train[i] = properties_train[i][0]

for i in range(dev_len):
    X_dev[i,:]   = maccs_dev[i]
    Y_dev[i]   = properties_dev[i][0]

for i in range(test_len):
    X_test[i,:]  = maccs_test[i]
    Y_test[i]  = properties_test[i][0]

# Random forest

In [6]:
clf = RandomForestClassifier(n_estimators=300, criterion = 'gini', max_depth=60, random_state=0)
multi_target_forest = MultiOutputClassifier(clf, n_jobs=-1)
multi_target_forest.fit(X_train, Y_train)

In [7]:
Y_pred = multi_target_forest.predict(X_test)

acc_score, prec_score, rec_score = 0., 0., 0.
for i in range(Y_test.shape[0]):
    acc_score  += accuracy_score(Y_test[i],Y_pred[i])
    prec_score += precision_score(Y_test[i],Y_pred[i])
    rec_score  += recall_score(Y_test[i],Y_pred[i])

acc_score  = acc_score/Y_test.shape[0]
prec_score = prec_score/Y_test.shape[0]
rec_score  = rec_score/Y_test.shape[0]

print('Accuracy : %.4f%%, \t Precision : %.4f%%, \t, Recall : %.4f%%' %(acc_score, prec_score, rec_score))

Accuracy : 0.9547%, 	 Precision : 0.7243%, 	, Recall : 0.6960%


In [8]:
Y_pred = multi_target_forest.predict(X_dev)

acc_score, prec_score, rec_score = 0., 0., 0.
for i in range(Y_dev.shape[0]):
    acc_score  += accuracy_score(Y_dev[i],Y_pred[i])
    prec_score += precision_score(Y_dev[i],Y_pred[i])
    rec_score  += recall_score(Y_dev[i],Y_pred[i])

acc_score  = acc_score/Y_dev.shape[0]
prec_score = prec_score/Y_dev.shape[0]
rec_score  = rec_score/Y_dev.shape[0]

print('Accuracy : %.4f%%, \t Precision : %.4f%%, \t, Recall : %.4f%%' %(acc_score, prec_score, rec_score))

Accuracy : 0.9506%, 	 Precision : 0.7385%, 	, Recall : 0.7092%


# Logistic regression

In [9]:
lm = linear_model.LogisticRegression()
multi_target_logistic = MultiOutputClassifier(lm, n_jobs=-1)
multi_target_logistic.fit(X_train, Y_train)

In [10]:
Y_pred = multi_target_logistic.predict(X_test)

acc_score, prec_score, rec_score = 0., 0., 0.
for i in range(Y_test.shape[0]):
    acc_score  += accuracy_score(Y_test[i],Y_pred[i])
    prec_score += precision_score(Y_test[i],Y_pred[i])
    rec_score  += recall_score(Y_test[i],Y_pred[i])

acc_score  = acc_score/Y_test.shape[0]
prec_score = prec_score/Y_test.shape[0]
rec_score  = rec_score/Y_test.shape[0]

print('Accuracy : %.4f%%, \t Precision : %.4f%%, \t, Recall : %.4f%%' %(acc_score, prec_score, rec_score))

Accuracy : 0.8916%, 	 Precision : 0.1103%, 	, Recall : 0.0988%


In [11]:
Y_pred = multi_target_logistic.predict(X_dev)

acc_score, prec_score, rec_score = 0., 0., 0.
for i in range(Y_dev.shape[0]):
    acc_score  += accuracy_score(Y_dev[i],Y_pred[i])
    prec_score += precision_score(Y_dev[i],Y_pred[i])
    rec_score  += recall_score(Y_dev[i],Y_pred[i])

acc_score  = acc_score/Y_dev.shape[0]
prec_score = prec_score/Y_dev.shape[0]
rec_score  = rec_score/Y_dev.shape[0]

print('Accuracy : %.4f%%, \t Precision : %.4f%%, \t, Recall : %.4f%%' %(acc_score, prec_score, rec_score))

Accuracy : 0.8910%, 	 Precision : 0.1339%, 	, Recall : 0.1220%


# kNN classifier

In [12]:
kNN = KNeighborsClassifier()
multi_target_kNN = MultiOutputClassifier(kNN, n_jobs=-1)
multi_target_kNN.fit(X_train, Y_train)

In [13]:
Y_pred = multi_target_kNN.predict(X_test)

acc_score, prec_score, rec_score = 0., 0., 0.
for i in range(Y_test.shape[0]):
    acc_score  += accuracy_score(Y_test[i],Y_pred[i])
    prec_score += precision_score(Y_test[i],Y_pred[i])
    rec_score  += recall_score(Y_test[i],Y_pred[i])

acc_score  = acc_score/Y_test.shape[0]
prec_score = prec_score/Y_test.shape[0]
rec_score  = rec_score/Y_test.shape[0]

print('Accuracy : %.4f%%, \t Precision : %.4f%%, \t, Recall : %.4f%%' %(acc_score, prec_score, rec_score))

Accuracy : 0.8920%, 	 Precision : 0.3979%, 	, Recall : 0.3922%


In [14]:
Y_pred = multi_target_kNN.predict(X_dev)

acc_score, prec_score, rec_score = 0., 0., 0.
for i in range(Y_dev.shape[0]):
    acc_score  += accuracy_score(Y_dev[i],Y_pred[i])
    prec_score += precision_score(Y_dev[i],Y_pred[i])
    rec_score  += recall_score(Y_dev[i],Y_pred[i])

acc_score  = acc_score/Y_dev.shape[0]
prec_score = prec_score/Y_dev.shape[0]
rec_score  = rec_score/Y_dev.shape[0]

print('Accuracy : %.4f%%, \t Precision : %.4f%%, \t, Recall : %.4f%%' %(acc_score, prec_score, rec_score))

Accuracy : 0.8993%, 	 Precision : 0.4313%, 	, Recall : 0.4165%
