In [43]:
import pandas as pd
import numpy as np
import os
import glob

df = pd.read_csv('../datasets/pcba/pcba_all.csv')
features = df.columns[:6].tolist()
pcba = df.columns[6:].tolist() 

counts = df[pcba].count().sort_values()
sums = df[pcba].sum().loc[counts.index.tolist()]
sums.name, counts.name = 'actives', 'counts'
count_df = pd.concat([counts, sums], axis=1)

In [None]:
pcba_df = df[features+pcba]
pcba_df = pcba_df[~pcba_df[pcba].isna().all(axis=1)]

from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.SaltRemover import SaltRemover
from rdkit.Chem.FilterCatalog import *

FP_radius=2
FP_size=1024
saltRemover = SaltRemover(defnFilename='../datasets/Salts.txt')
rdkit_mols = pcba_df['SMILES'].astype(str).apply((lambda x: Chem.MolFromSmiles(x)))
rdkit_mols = rdkit_mols.apply((lambda x: saltRemover.StripMol(x)))
pcba_df['rdkit SMILES'] = rdkit_mols.apply((lambda x: Chem.MolToSmiles(x)))
pcba_df['Morgan FP_2_1024'] = rdkit_mols.apply((lambda x: AllChem.GetMorganFingerprintAsBitVect(x, 
                                                                                       radius=FP_radius, 
                                                                                       nBits=FP_size).ToBitString()))

pcba_df['Index ID'] = np.arange(pcba_df.shape[0])
pcba_df = pcba_df[['Index ID', 'Molecule', 'rdkit SMILES',
                   'Morgan FP_2_1024'] + pcba]
pcba_df.to_csv('../datasets/pcba/pcba_all.csv', index=False)

---
# Prepare PCBA Targets

In [41]:
import pandas as pd
import numpy as np
import os
import glob
import pathlib
from stratify_helper import *
%load_ext autoreload
%autoreload 2

random_seed = 20190918
split_size = 96
cluster_col_name = 'BT_0.4 ID'

data_df = pd.read_csv('../datasets/pcba/pcba_all.csv')
pcba = data_df.columns[6:].tolist() 
features = data_df.columns[:6].tolist() 

for task_col_name in pcba:
    output_dir = '../datasets/pcba_alt/{}_cv_{}/'.format(task_col_name, split_size)
    pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True) 
    
    tmp_df = data_df[features+[task_col_name]]
    stratify_target_alt(tmp_df, output_dir, task_col_name, 
                        num_samples=10, num_actives_in_split=1,
                        split_size=split_size, cluster_col_name=cluster_col_name, 
                        random_seed=random_seed)
    print('-------------------------------------------------------------')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Target pcba-aid1030
Total molecules: 161295, Total active: 15932, Total inactive: 145363, Total clusters: 52402.
Clusters #: 52402. Singletons #: 23274. Singletons with hits #: 2811.0
Split 0: Total molecules: 96, Total active: 1, Total inactive: 95
Split 1: Total molecules: 96, Total active: 1, Total inactive: 95
Split 2: Total molecules: 96, Total active: 1, Total inactive: 95
Split 3: Total molecules: 96, Total active: 1, Total inactive: 95
Split 4: Total molecules: 96, Total active: 1, Total inactive: 95
Split 5: Total molecules: 96, Total active: 1, Total inactive: 95
Split 6: Total molecules: 96, Total active: 1, Total inactive: 95
Split 7: Total molecules: 96, Total active: 1, Total inactive: 95
Split 8: Total molecules: 96, Total active: 1, Total inactive: 95
Split 9: Total molecules: 96, Total active: 1, Total inactive: 95
Split 10: Total molecules: 160335, Total active: 15922, Total inacti

Split 10: Total molecules: 207360, Total active: 2295, Total inactive: 205065
Total molecules: 208320, Total active: 2305, Total inactive: 206015.
-------------------------------------------------------------
Target pcba-aid1468
Total molecules: 252185, Total active: 1038, Total inactive: 251147, Total clusters: 61975.
Clusters #: 61975. Singletons #: 22590. Singletons with hits #: 231.0
Split 0: Total molecules: 96, Total active: 1, Total inactive: 95
Split 1: Total molecules: 96, Total active: 1, Total inactive: 95
Split 2: Total molecules: 96, Total active: 1, Total inactive: 95
Split 3: Total molecules: 96, Total active: 1, Total inactive: 95
Split 4: Total molecules: 96, Total active: 1, Total inactive: 95
Split 5: Total molecules: 96, Total active: 1, Total inactive: 95
Split 6: Total molecules: 96, Total active: 1, Total inactive: 95
Split 7: Total molecules: 96, Total active: 1, Total inactive: 95
Split 8: Total molecules: 96, Total active: 1, Total inactive: 95
Split 9: Total 

Split 9: Total molecules: 96, Total active: 1, Total inactive: 95
Split 10: Total molecules: 289778, Total active: 1077, Total inactive: 288701
Total molecules: 290738, Total active: 1087, Total inactive: 289651.
-------------------------------------------------------------
Target pcba-aid2100
Total molecules: 293012, Total active: 1157, Total inactive: 291855, Total clusters: 65333.
Clusters #: 65333. Singletons #: 23006. Singletons with hits #: 256.0
Split 0: Total molecules: 96, Total active: 1, Total inactive: 95
Split 1: Total molecules: 96, Total active: 1, Total inactive: 95
Split 2: Total molecules: 96, Total active: 1, Total inactive: 95
Split 3: Total molecules: 96, Total active: 1, Total inactive: 95
Split 4: Total molecules: 96, Total active: 1, Total inactive: 95
Split 5: Total molecules: 96, Total active: 1, Total inactive: 95
Split 6: Total molecules: 96, Total active: 1, Total inactive: 95
Split 7: Total molecules: 96, Total active: 1, Total inactive: 95
Split 8: Total 

Split 10: Total molecules: 340625, Total active: 642, Total inactive: 339983
Total molecules: 341585, Total active: 652, Total inactive: 340933.
-------------------------------------------------------------
Target pcba-aid2546
Total molecules: 278436, Total active: 10556, Total inactive: 267880, Total clusters: 67104.
Clusters #: 67104. Singletons #: 25462. Singletons with hits #: 1339.0
Split 0: Total molecules: 96, Total active: 1, Total inactive: 95
Split 1: Total molecules: 96, Total active: 1, Total inactive: 95
Split 2: Total molecules: 96, Total active: 1, Total inactive: 95
Split 3: Total molecules: 96, Total active: 1, Total inactive: 95
Split 4: Total molecules: 96, Total active: 1, Total inactive: 95
Split 5: Total molecules: 96, Total active: 1, Total inactive: 95
Split 6: Total molecules: 96, Total active: 1, Total inactive: 95
Split 7: Total molecules: 96, Total active: 1, Total inactive: 95
Split 8: Total molecules: 96, Total active: 1, Total inactive: 95
Split 9: Total 

Split 10: Total molecules: 328248, Total active: 31, Total inactive: 328217
Total molecules: 329208, Total active: 41, Total inactive: 329167.
-------------------------------------------------------------
Target pcba-aid485281
Total molecules: 314600, Total active: 253, Total inactive: 314347, Total clusters: 67928.
Clusters #: 67928. Singletons #: 24015. Singletons with hits #: 62.0
Split 0: Total molecules: 96, Total active: 1, Total inactive: 95
Split 1: Total molecules: 96, Total active: 1, Total inactive: 95
Split 2: Total molecules: 96, Total active: 1, Total inactive: 95
Split 3: Total molecules: 96, Total active: 1, Total inactive: 95
Split 4: Total molecules: 96, Total active: 1, Total inactive: 95
Split 5: Total molecules: 96, Total active: 1, Total inactive: 95
Split 6: Total molecules: 96, Total active: 1, Total inactive: 95
Split 7: Total molecules: 96, Total active: 1, Total inactive: 95
Split 8: Total molecules: 96, Total active: 1, Total inactive: 95
Split 9: Total mole

Split 10: Total molecules: 319120, Total active: 608, Total inactive: 318512
Total molecules: 320080, Total active: 618, Total inactive: 319462.
-------------------------------------------------------------
Target pcba-aid485353
Total molecules: 323054, Total active: 603, Total inactive: 322451, Total clusters: 69208.
Clusters #: 69208. Singletons #: 23888. Singletons with hits #: 64.0
Split 0: Total molecules: 96, Total active: 1, Total inactive: 95
Split 1: Total molecules: 96, Total active: 1, Total inactive: 95
Split 2: Total molecules: 96, Total active: 1, Total inactive: 95
Split 3: Total molecules: 96, Total active: 1, Total inactive: 95
Split 4: Total molecules: 96, Total active: 1, Total inactive: 95
Split 5: Total molecules: 96, Total active: 1, Total inactive: 95
Split 6: Total molecules: 96, Total active: 1, Total inactive: 95
Split 7: Total molecules: 96, Total active: 1, Total inactive: 95
Split 8: Total molecules: 96, Total active: 1, Total inactive: 95
Split 9: Total mo

Split 10: Total molecules: 293052, Total active: 30254, Total inactive: 262798
Total molecules: 294012, Total active: 30264, Total inactive: 263748.
-------------------------------------------------------------
Target pcba-aid504333
Total molecules: 325784, Total active: 15673, Total inactive: 310111, Total clusters: 68372.
Clusters #: 68372. Singletons #: 23614. Singletons with hits #: 1959.0
Split 0: Total molecules: 96, Total active: 1, Total inactive: 95
Split 1: Total molecules: 96, Total active: 1, Total inactive: 95
Split 2: Total molecules: 96, Total active: 1, Total inactive: 95
Split 3: Total molecules: 96, Total active: 1, Total inactive: 95
Split 4: Total molecules: 96, Total active: 1, Total inactive: 95
Split 5: Total molecules: 96, Total active: 1, Total inactive: 95
Split 6: Total molecules: 96, Total active: 1, Total inactive: 95
Split 7: Total molecules: 96, Total active: 1, Total inactive: 95
Split 8: Total molecules: 96, Total active: 1, Total inactive: 95
Split 9: 

Split 10: Total molecules: 371404, Total active: 90, Total inactive: 371314
Total molecules: 372364, Total active: 100, Total inactive: 372264.
-------------------------------------------------------------
Target pcba-aid504847
Total molecules: 380031, Total active: 3509, Total inactive: 376522, Total clusters: 76371.
Clusters #: 76371. Singletons #: 26474. Singletons with hits #: 802.0
Split 0: Total molecules: 96, Total active: 1, Total inactive: 95
Split 1: Total molecules: 96, Total active: 1, Total inactive: 95
Split 2: Total molecules: 96, Total active: 1, Total inactive: 95
Split 3: Total molecules: 96, Total active: 1, Total inactive: 95
Split 4: Total molecules: 96, Total active: 1, Total inactive: 95
Split 5: Total molecules: 96, Total active: 1, Total inactive: 95
Split 6: Total molecules: 96, Total active: 1, Total inactive: 95
Split 7: Total molecules: 96, Total active: 1, Total inactive: 95
Split 8: Total molecules: 96, Total active: 1, Total inactive: 95
Split 9: Total m

Split 10: Total molecules: 385224, Total active: 1970, Total inactive: 383254
Total molecules: 386184, Total active: 1980, Total inactive: 384204.
-------------------------------------------------------------
Target pcba-aid588590
Total molecules: 356872, Total active: 3931, Total inactive: 352941, Total clusters: 73294.
Clusters #: 73294. Singletons #: 25677. Singletons with hits #: 644.0
Split 0: Total molecules: 96, Total active: 1, Total inactive: 95
Split 1: Total molecules: 96, Total active: 1, Total inactive: 95
Split 2: Total molecules: 96, Total active: 1, Total inactive: 95
Split 3: Total molecules: 96, Total active: 1, Total inactive: 95
Split 4: Total molecules: 96, Total active: 1, Total inactive: 95
Split 5: Total molecules: 96, Total active: 1, Total inactive: 95
Split 6: Total molecules: 96, Total active: 1, Total inactive: 95
Split 7: Total molecules: 96, Total active: 1, Total inactive: 95
Split 8: Total molecules: 96, Total active: 1, Total inactive: 95
Split 9: Tota

Split 10: Total molecules: 372070, Total active: 752, Total inactive: 371318
Total molecules: 373030, Total active: 762, Total inactive: 372268.
-------------------------------------------------------------
Target pcba-aid602332
Total molecules: 408382, Total active: 69, Total inactive: 408313, Total clusters: 80785.
Clusters #: 80785. Singletons #: 28239. Singletons with hits #: 10.0
Split 0: Total molecules: 96, Total active: 1, Total inactive: 95
Split 1: Total molecules: 96, Total active: 1, Total inactive: 95
Split 2: Total molecules: 96, Total active: 1, Total inactive: 95
Split 3: Total molecules: 96, Total active: 1, Total inactive: 95
Split 4: Total molecules: 96, Total active: 1, Total inactive: 95
Split 5: Total molecules: 96, Total active: 1, Total inactive: 95
Split 6: Total molecules: 96, Total active: 1, Total inactive: 95
Split 7: Total molecules: 96, Total active: 1, Total inactive: 95
Split 8: Total molecules: 96, Total active: 1, Total inactive: 95
Split 9: Total mol

Split 10: Total molecules: 323442, Total active: 1346, Total inactive: 322096
Total molecules: 324402, Total active: 1356, Total inactive: 323046.
-------------------------------------------------------------
Target pcba-aid624291
Total molecules: 332023, Total active: 222, Total inactive: 331801, Total clusters: 71686.
Clusters #: 71686. Singletons #: 25775. Singletons with hits #: 20.0
Split 0: Total molecules: 96, Total active: 1, Total inactive: 95
Split 1: Total molecules: 96, Total active: 1, Total inactive: 95
Split 2: Total molecules: 96, Total active: 1, Total inactive: 95
Split 3: Total molecules: 96, Total active: 1, Total inactive: 95
Split 4: Total molecules: 96, Total active: 1, Total inactive: 95
Split 5: Total molecules: 96, Total active: 1, Total inactive: 95
Split 6: Total molecules: 96, Total active: 1, Total inactive: 95
Split 7: Total molecules: 96, Total active: 1, Total inactive: 95
Split 8: Total molecules: 96, Total active: 1, Total inactive: 95
Split 9: Total 

Split 10: Total molecules: 323419, Total active: 6336, Total inactive: 317083
Total molecules: 324379, Total active: 6346, Total inactive: 318033.
-------------------------------------------------------------
Target pcba-aid652025
Total molecules: 364400, Total active: 238, Total inactive: 364162, Total clusters: 75779.
Clusters #: 75779. Singletons #: 27062. Singletons with hits #: 26.0
Split 0: Total molecules: 96, Total active: 1, Total inactive: 95
Split 1: Total molecules: 96, Total active: 1, Total inactive: 95
Split 2: Total molecules: 96, Total active: 1, Total inactive: 95
Split 3: Total molecules: 96, Total active: 1, Total inactive: 95
Split 4: Total molecules: 96, Total active: 1, Total inactive: 95
Split 5: Total molecules: 96, Total active: 1, Total inactive: 95
Split 6: Total molecules: 96, Total active: 1, Total inactive: 95
Split 7: Total molecules: 96, Total active: 1, Total inactive: 95
Split 8: Total molecules: 96, Total active: 1, Total inactive: 95
Split 9: Total 

Split 10: Total molecules: 349562, Total active: 10160, Total inactive: 339402
Total molecules: 350522, Total active: 10170, Total inactive: 340352.
-------------------------------------------------------------
Target pcba-aid720532
Total molecules: 12786, Total active: 975, Total inactive: 11811, Total clusters: 5631.
Clusters #: 5631. Singletons #: 3528. Singletons with hits #: 352.0
Split 0: Total molecules: 96, Total active: 1, Total inactive: 95
Split 1: Total molecules: 96, Total active: 1, Total inactive: 95
Split 2: Total molecules: 96, Total active: 1, Total inactive: 95
Split 3: Total molecules: 96, Total active: 1, Total inactive: 95
Split 4: Total molecules: 96, Total active: 1, Total inactive: 95
Split 5: Total molecules: 96, Total active: 1, Total inactive: 95
Split 6: Total molecules: 96, Total active: 1, Total inactive: 95
Split 7: Total molecules: 96, Total active: 1, Total inactive: 95
Split 8: Total molecules: 96, Total active: 1, Total inactive: 95
Split 9: Total mo

Split 10: Total molecules: 356439, Total active: 651, Total inactive: 355788
Total molecules: 357399, Total active: 661, Total inactive: 356738.
-------------------------------------------------------------
Target pcba-aid720709
Total molecules: 353361, Total active: 516, Total inactive: 352845, Total clusters: 74842.
Clusters #: 74842. Singletons #: 27024. Singletons with hits #: 94.0
Split 0: Total molecules: 96, Total active: 1, Total inactive: 95
Split 1: Total molecules: 96, Total active: 1, Total inactive: 95
Split 2: Total molecules: 96, Total active: 1, Total inactive: 95
Split 3: Total molecules: 96, Total active: 1, Total inactive: 95
Split 4: Total molecules: 96, Total active: 1, Total inactive: 95
Split 5: Total molecules: 96, Total active: 1, Total inactive: 95
Split 6: Total molecules: 96, Total active: 1, Total inactive: 95
Split 7: Total molecules: 96, Total active: 1, Total inactive: 95
Split 8: Total molecules: 96, Total active: 1, Total inactive: 95
Split 9: Total mo

Split 10: Total molecules: 9412, Total active: 3386, Total inactive: 6026
Total molecules: 10372, Total active: 3396, Total inactive: 6976.
-------------------------------------------------------------
Target pcba-aid885
Total molecules: 12835, Total active: 160, Total inactive: 12675, Total clusters: 5637.
Clusters #: 5637. Singletons #: 3884. Singletons with hits #: 27.0
Split 0: Total molecules: 96, Total active: 1, Total inactive: 95
Split 1: Total molecules: 96, Total active: 1, Total inactive: 95
Split 2: Total molecules: 96, Total active: 1, Total inactive: 95
Split 3: Total molecules: 96, Total active: 1, Total inactive: 95
Split 4: Total molecules: 96, Total active: 1, Total inactive: 95
Split 5: Total molecules: 96, Total active: 1, Total inactive: 95
Split 6: Total molecules: 96, Total active: 1, Total inactive: 95
Split 7: Total molecules: 96, Total active: 1, Total inactive: 95
Split 8: Total molecules: 96, Total active: 1, Total inactive: 95
Split 9: Total molecules: 96, 

Split 10: Total molecules: 55663, Total active: 443, Total inactive: 55220
Total molecules: 56623, Total active: 453, Total inactive: 56170.
-------------------------------------------------------------
Target pcba-aid914
Total molecules: 7743, Total active: 221, Total inactive: 7522, Total clusters: 4012.
Clusters #: 4012. Singletons #: 3006. Singletons with hits #: 29.0
Split 0: Total molecules: 96, Total active: 1, Total inactive: 95
Split 1: Total molecules: 96, Total active: 1, Total inactive: 95
Split 2: Total molecules: 96, Total active: 1, Total inactive: 95
Split 3: Total molecules: 96, Total active: 1, Total inactive: 95
Split 4: Total molecules: 96, Total active: 1, Total inactive: 95
Split 5: Total molecules: 96, Total active: 1, Total inactive: 95
Split 6: Total molecules: 96, Total active: 1, Total inactive: 95
Split 7: Total molecules: 96, Total active: 1, Total inactive: 95
Split 8: Total molecules: 96, Total active: 1, Total inactive: 95
Split 9: Total molecules: 96, T

Split 10: Total molecules: 64787, Total active: 689, Total inactive: 64098
Total molecules: 65747, Total active: 699, Total inactive: 65048.
-------------------------------------------------------------


---
# tar files

In [59]:
import pandas as pd
import numpy as np
import os
import glob
from IPython.display import clear_output
import pickle

import tarfile

tasks = ['pcba-aid881', 'pcba-aid1454',
         'pcba-aid902', 'pcba-aid924', 'pcba-aid1452', 'pcba-aid1030', 'pcba-aid2242', 'pcba-aid2147',
         'pcba-aid1458', 'pcba-aid1379', 'pcba-aid540276', 'pcba-aid1457', 'pcba-aid1688',
         'pcba-aid1461', 'pcba-aid485360', 'pcba-aid1471', 'pcba-aid1460', 'pcba-aid2549',
         'pcba-aid504467', 'pcba-aid2675', 'pcba-aid1468', 'pcba-aid1631', 'pcba-aid2326',
         'pcba-aid1634', 'pcba-aid1479', 'pcba-aid2551', 'pcba-aid1469', 'pcba-aid2451',
         'pcba-aid2546', 'pcba-aid720579', 'pcba-aid2662', 'pcba-aid504444', 'pcba-aid1721',
         'pcba-aid624296', 'pcba-aid2100', 'pcba-aid504332', 'pcba-aid686978', 'pcba-aid624287',
         'pcba-aid504706', 'pcba-aid686979', 'pcba-aid720580', 'pcba-aid624297', 'pcba-aid485294',
         'pcba-aid2101', 'pcba-aid485297', 'pcba-aid504466', 'pcba-aid485313', 'pcba-aid485281',
         'pcba-aid485314', 'pcba-aid485349', 'pcba-aid652105', 'pcba-aid485353', 'pcba-aid651965',
         'pcba-aid624288', 'pcba-aid504842', 'pcba-aid624417', 'pcba-aid504333', 'pcba-aid485367',
         'pcba-aid588342', 'pcba-aid485341', 'pcba-aid463254', 'pcba-aid492947', 'pcba-aid624291',
         'pcba-aid2517', 'pcba-aid485290', 'pcba-aid686970', 'pcba-aid720553', 'pcba-aid2528',
         'pcba-aid485364', 'pcba-aid720551', 'pcba-aid651635', 'pcba-aid720504', 'pcba-aid588855',
         'pcba-aid720709', 'pcba-aid651644', 'pcba-aid504339', 'pcba-aid588590', 'pcba-aid720542',
         'pcba-aid720708', 'pcba-aid651768', 'pcba-aid2676', 'pcba-aid504891', 'pcba-aid652106',
         'pcba-aid720707', 'pcba-aid720711', 'pcba-aid652025', 'pcba-aid624246', 'pcba-aid624202',
         'pcba-aid743255', 'pcba-aid588453', 'pcba-aid540317', 'pcba-aid504327',
         'pcba-aid504845', 'pcba-aid588591', 'pcba-aid602313', 'pcba-aid652104', 'pcba-aid588795',
         'pcba-aid602233', 'pcba-aid504847', 'pcba-aid588456', 'pcba-aid602179', 'pcba-aid588579',
         'pcba-aid602310', 'pcba-aid624171', 'pcba-aid624170', 'pcba-aid743266', 'pcba-aid602332']

for task_i, task_col_name in enumerate(tasks):
    clear_output()
    print('{}/{}'.format(task_i, len(tasks)))
    output_dir = '../datasets/pcba_alt/tars/{}_cv_{}.tar.gz'.format(task_col_name, split_size)
    
    tar = tarfile.open(output_dir, "w:gz")
    tar.add('../datasets/pcba_alt/{}_cv_{}/'.format(task_col_name, split_size), 
            arcname='pcba/{}_cv_{}/'.format(task_col_name, split_size))
    tar.close()

106/107


In [33]:
import pandas as pd
import numpy as np
import os
import glob
import pathlib
from stratify_helper import *
%load_ext autoreload
%autoreload 2

random_seed = 20190918
split_size = 96
cluster_col_name = 'BT_0.4 ID'

data_df = pd.read_csv('../datasets/pcba/pcba_all.csv')
pcba = data_df.columns[6:].tolist() 
features = data_df.columns[:6].tolist() 

for task_col_name in pcba:
    np.random.seed(random_seed)
    
    output_dir = '../datasets/pcba_alt/{}_cv_{}/'.format(task_col_name, split_size)
    pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True) 
    
    tmp_df = data_df[features+[task_col_name]].copy()
    
    tmp_df = tmp_df[~pd.isna(tmp_df[task_col_name])]
    tmp_df = tmp_df.sort_values('Index ID')
    tmp_df = tmp_df.dropna()
    tmp_df = tmp_df.reset_index(drop=True)
    
    inactive_indices = np.where(tmp_df[task_col_name] == 0)[0]

    curr_indices = np.random.choice(inactive_indices, size=96, replace=False)
    assert np.unique(curr_indices).shape[0] == 96
    
    curr_split_df = tmp_df.iloc[curr_indices,:]
    assert curr_split_df[task_col_name].sum() == 0
    
    curr_split_df.to_csv(output_dir+'/unlabeled_allinactive0.csv', 
                         index=False)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
