Screen the following datasets to predict active compounds using the `lightgbc_3` model <br>
1. Dataset of known inhibitors (validation of models)
2. Dataset of Peptidomimetics from ChemDiv
3. Dataset of Peptidomimetics from Life Chemicals
4. Dataset of Anti-inflammatory compounds


In [1]:
# importing libraries
import pandas as pd
import numpy as np
import pickle

In [2]:
# load vt pickle object
with open('lightgbc_3_vt_pickle', 'rb') as f:
    mask = pickle.load(f)

In [3]:
# load model
with open('lightgbc_3_pickle', 'rb') as f:
    clf = pickle.load(f)

1. Dataset of know inhibitors (validation of models)

In [4]:
# read data
df_inhi = pd.read_csv('../../../../../data/Molecular_Descriptors_Known_Inhibitors.csv')

# check shape and names of columns
df_inhi.shape, df_inhi.columns

((9, 779),
 Index(['ReadIn_ID', 'USER_ID', 'D001', 'D002', 'D003', 'D004', 'D005', 'D006',
        'D007', 'D008',
        ...
        'D768', 'D769', 'D770', 'D771', 'D772', 'D773', 'D774', 'D775', 'D776',
        'D777'],
       dtype='object', length=779))

In [5]:
# drop columns
df_inhi.drop(['ReadIn_ID'], inplace=True, axis=1)
df_inhi.shape

(9, 778)

In [6]:
# Make USER_ID index
df_inhi.set_index('USER_ID', inplace=True)
df_inhi.shape

(9, 777)

In [7]:
# Subset the dataset for prediction
df_inhi_pred = df_inhi.loc[:, mask]

# check shape
df_inhi_pred.shape

(9, 359)

In [8]:
# predict
pred_1 = clf.predict(df_inhi_pred)
pred_prob_1 = clf.predict_proba(df_inhi_pred)

# check if there is any prediction of 1 (active)
pred_1.sum()

5

In [9]:
# create a dataframe of predictions and probabilities
df_inhi['Predictions'] = pred_1
df_inhi['Probabilities'] = pred_prob_1[:, 1]
df_inhi.head()

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
8087741,2,0,0,2,3,0,0,0,0,0,...,1,0,0,0,5.459,-0.313,0.353,3.386,1,0.999999
2545304,2,0,0,0,3,0,0,0,0,0,...,0,0,0,0,5.358,0.322,0.4,2.651,1,0.994888
2534913,2,0,0,1,4,0,0,0,0,0,...,1,0,0,0,5.644,-0.737,0.308,1.511,1,1.0
11703255,1,0,0,0,2,0,0,0,0,0,...,0,0,0,0,4.954,-0.643,0.25,2.766,1,0.999985
53480255,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,6.209,4.027,0.0,0.974,0,1e-06


In [10]:
# create a dataframe of predictions of 1 only
df_inhi[df_inhi['Predictions'] == 1]

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
8087741,2,0,0,2,3,0,0,0,0,0,...,1,0,0,0,5.459,-0.313,0.353,3.386,1,0.999999
2545304,2,0,0,0,3,0,0,0,0,0,...,0,0,0,0,5.358,0.322,0.4,2.651,1,0.994888
2534913,2,0,0,1,4,0,0,0,0,0,...,1,0,0,0,5.644,-0.737,0.308,1.511,1,1.0
11703255,1,0,0,0,2,0,0,0,0,0,...,0,0,0,0,4.954,-0.643,0.25,2.766,1,0.999985
44224215,2,0,0,1,2,0,0,0,0,0,...,0,0,0,0,4.755,0.437,0.6,2.149,1,0.987991


In [11]:
# create a dataframe of predictions of 1 only with probabilities greater than 0.7
df_inhi[(df_inhi['Predictions'] == 1) & (df_inhi['Probabilities'] > 0.7)]

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
8087741,2,0,0,2,3,0,0,0,0,0,...,1,0,0,0,5.459,-0.313,0.353,3.386,1,0.999999
2545304,2,0,0,0,3,0,0,0,0,0,...,0,0,0,0,5.358,0.322,0.4,2.651,1,0.994888
2534913,2,0,0,1,4,0,0,0,0,0,...,1,0,0,0,5.644,-0.737,0.308,1.511,1,1.0
11703255,1,0,0,0,2,0,0,0,0,0,...,0,0,0,0,4.954,-0.643,0.25,2.766,1,0.999985
44224215,2,0,0,1,2,0,0,0,0,0,...,0,0,0,0,4.755,0.437,0.6,2.149,1,0.987991


2. Dataset of Peptidomimetics from ChemDiv

In [12]:
# read data
df_pep_cd = pd.read_csv('../../../../../data/Molecular_Descriptors_Peptidomimetics_ChemDiv.csv')

# check shape and names of columns
df_pep_cd.shape, df_pep_cd.columns

((36711, 780),
 Index(['ReadIn_ID', 'USER_ID', 'USER_ID_2', 'D001', 'D002', 'D003', 'D004',
        'D005', 'D006', 'D007',
        ...
        'D768', 'D769', 'D770', 'D771', 'D772', 'D773', 'D774', 'D775', 'D776',
        'D777'],
       dtype='object', length=780))

In [13]:
# drop columns
df_pep_cd.drop(['ReadIn_ID', 'USER_ID_2'], inplace=True, axis=1)
df_pep_cd.shape

(36711, 778)

In [14]:
# Make USER_ID index
df_pep_cd.set_index('USER_ID', inplace=True)
df_pep_cd.shape

(36711, 777)

In [15]:
# Subset the dataset for prediction
df_pep_cd_pred = df_pep_cd.loc[:, mask]

# check shape
df_pep_cd_pred.shape

(36711, 359)

In [16]:
# predict
pred_2 = clf.predict(df_pep_cd_pred)
pred_prob_2 = clf.predict_proba(df_pep_cd_pred)

# check if there is any prediction of 1 (active)
pred_2.sum()

9

In [17]:
# create a dataframe of predictions and probabilities
df_pep_cd['Predictions'] = pred_2
df_pep_cd['Probabilities'] = pred_prob_2[:, 1]
df_pep_cd.head()

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
M652-0486,1,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.42626,-0.648642,0.181818,0.498807,0,6.764093e-10
L258-0120,2,0,0,0,3,0,0,0,0,0,...,0,0,0,0,5.35755,-0.726331,0.375,2.81018,0,2.925672e-09
L834-0670,2,0,0,1,3,0,0,0,0,0,...,0,0,0,0,5.2854,-0.696023,0.4,2.89726,0,8.014505e-10
L258-0113,2,0,0,0,3,0,0,0,0,0,...,0,0,0,0,5.35755,-0.726331,0.375,2.81018,0,1.980704e-09
L834-0614,2,0,0,1,3,0,0,0,0,0,...,0,0,0,0,5.2854,-0.696023,0.4,3.01043,0,4.354943e-10


In [18]:
# create a dataframe of predictions of 1 only
df_pep_cd[df_pep_cd['Predictions'] == 1]

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
P940-4610,1,0,1,1,2,0,0,0,0,0,...,0,0,0,0,5.08746,-0.750851,0.214286,3.24701,1,0.748327
SA40-0613,0,0,1,2,0,0,0,0,0,0,...,0,0,0,0,4.85798,-0.686336,0.0,0.457157,1,0.527632
SA40-0688,0,0,1,2,0,0,0,0,0,0,...,0,0,0,0,4.90689,-0.69935,0.0,0.700386,1,0.691512
S550-0594,0,0,1,2,1,0,0,0,0,0,...,0,0,0,0,5.08746,-0.658212,0.0,1.49061,1,0.618218
P940-4469,0,0,1,1,1,0,0,0,0,0,...,0,0,0,0,4.75489,-0.716962,0.0,2.52466,1,0.634546
S551-0296,0,0,1,2,1,0,0,0,0,0,...,0,0,0,0,5.08746,-0.658212,0.0,1.49061,1,0.732229
F687-1061,1,0,1,0,2,0,0,0,0,0,...,0,0,0,0,5.24793,-0.313118,0.1875,1.72683,1,0.835102
SA43-0900,0,0,1,2,1,0,0,0,0,0,...,0,0,0,0,5.04439,-0.69935,0.0,0.937965,1,0.782468
SA43-0950,0,0,1,2,1,0,0,0,0,0,...,0,0,0,0,5.12928,-0.722401,0.0,1.40788,1,0.558903


In [19]:
# create a dataframe of predictions of 1 only with probabilities greater than 0.7
df_pep_cd[(df_pep_cd['Predictions'] == 1) & (df_pep_cd['Probabilities'] > 0.69)]

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
P940-4610,1,0,1,1,2,0,0,0,0,0,...,0,0,0,0,5.08746,-0.750851,0.214286,3.24701,1,0.748327
SA40-0688,0,0,1,2,0,0,0,0,0,0,...,0,0,0,0,4.90689,-0.69935,0.0,0.700386,1,0.691512
S551-0296,0,0,1,2,1,0,0,0,0,0,...,0,0,0,0,5.08746,-0.658212,0.0,1.49061,1,0.732229
F687-1061,1,0,1,0,2,0,0,0,0,0,...,0,0,0,0,5.24793,-0.313118,0.1875,1.72683,1,0.835102
SA43-0900,0,0,1,2,1,0,0,0,0,0,...,0,0,0,0,5.04439,-0.69935,0.0,0.937965,1,0.782468


3. Dataset of Peptidomimetics from Life Chemicals

In [20]:
# read data
df_pep_lc = pd.read_csv('../../../../../data/Molecular_Descriptors_Peptidomimetics_Life_Chemicals.csv')

# check shape and names of columns
df_pep_lc.shape, df_pep_lc.columns

((5836, 779),
 Index(['ReadIn_ID', 'USER_ID', 'D001', 'D002', 'D003', 'D004', 'D005', 'D006',
        'D007', 'D008',
        ...
        'D768', 'D769', 'D770', 'D771', 'D772', 'D773', 'D774', 'D775', 'D776',
        'D777'],
       dtype='object', length=779))

In [21]:
# drop columns
df_pep_lc.drop(['ReadIn_ID'], inplace=True, axis=1)
df_pep_lc.shape

(5836, 778)

In [22]:
# Make USER_ID index
df_pep_lc.set_index('USER_ID', inplace=True)
df_pep_lc.shape

(5836, 777)

In [23]:
# Subset the dataset for prediction
df_pep_lc_pred = df_pep_lc.loc[:, mask]

# check shape
df_pep_lc_pred.shape

(5836, 359)

In [24]:
# predict
pred_3 = clf.predict(df_pep_lc_pred)
pred_prob_3 = clf.predict_proba(df_pep_lc_pred)

# check if there is any prediction of 1 (active)
pred_3.sum()

3344

In [25]:
# create a dataframe of predictions and probabilities
df_pep_lc['Predictions'] = pred_3
df_pep_lc['Probabilities'] = pred_prob_3[:, 1]
df_pep_lc.head()

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
F8881-1049,0,0,0,2,0,0,0,0,0,0,...,0,0,0,0,4.087,-0.626,0.0,0.259,1,0.930692
F6513-5720,0,0,0,1,2,0,0,0,0,0,...,0,0,0,0,4.644,-0.776,0.0,3.033,1,0.999487
F6619-2116,0,0,0,2,1,0,0,0,0,0,...,0,0,0,0,4.644,-0.244,0.0,0.265,1,0.99019
F6497-5659,0,0,0,1,1,0,0,0,0,0,...,0,0,0,0,4.459,-0.135,0.0,-0.512,0,0.444401
F6559-1463,0,0,0,0,2,0,0,0,0,0,...,0,0,0,0,4.459,-0.692,0.0,1.597,0,1.1e-05


In [26]:
# create a dataframe of predictions of 1 only
df_pep_lc[df_pep_lc['Predictions'] == 1]

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
F8881-1049,0,0,0,2,0,0,0,0,0,0,...,0,0,0,0,4.087,-0.626,0.000,0.259,1,0.930692
F6513-5720,0,0,0,1,2,0,0,0,0,0,...,0,0,0,0,4.644,-0.776,0.000,3.033,1,0.999487
F6619-2116,0,0,0,2,1,0,0,0,0,0,...,0,0,0,0,4.644,-0.244,0.000,0.265,1,0.990190
F6545-0060,0,0,0,1,1,0,0,0,0,0,...,0,0,0,0,4.524,-0.692,0.000,0.836,1,0.999879
F6497-5670,0,0,0,2,1,0,0,0,0,0,...,0,0,0,0,4.755,-0.240,0.000,0.561,1,0.950132
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
F0554-0828,4,0,0,1,4,0,0,0,0,0,...,1,0,0,0,5.895,-0.795,0.523,4.625,1,0.996381
F0411-0012,5,0,0,0,6,0,0,0,0,0,...,0,0,0,0,6.044,-0.824,0.652,6.743,1,0.999272
F0410-0003,5,0,0,0,6,0,0,0,0,0,...,0,0,0,0,6.044,-0.828,0.638,6.922,1,0.994577
F0410-0005,5,0,0,0,6,0,0,0,0,0,...,0,0,0,0,6.044,-0.828,0.638,7.011,1,0.835009


In [27]:
# create a dataframe of predictions of 1 only with probabilities greater than 0.7
df_pep_lc[(df_pep_lc['Predictions'] == 1) & (df_pep_lc['Probabilities'] > 0.95)]

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
F6513-5720,0,0,0,1,2,0,0,0,0,0,...,0,0,0,0,4.644,-0.776,0.000,3.033,1,0.999487
F6619-2116,0,0,0,2,1,0,0,0,0,0,...,0,0,0,0,4.644,-0.244,0.000,0.265,1,0.990190
F6545-0060,0,0,0,1,1,0,0,0,0,0,...,0,0,0,0,4.524,-0.692,0.000,0.836,1,0.999879
F6497-5670,0,0,0,2,1,0,0,0,0,0,...,0,0,0,0,4.755,-0.240,0.000,0.561,1,0.950132
F6507-8802,0,0,0,0,1,0,0,0,0,0,...,1,0,0,0,4.170,-0.750,0.000,1.891,1,0.989067
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
F1280-0022,4,0,0,1,5,0,0,0,0,0,...,1,0,0,0,5.943,-0.770,0.511,5.545,1,0.999894
F0554-0832,4,0,0,2,4,0,0,0,0,0,...,1,0,0,0,5.845,-0.784,0.535,4.177,1,0.999055
F0554-0828,4,0,0,1,4,0,0,0,0,0,...,1,0,0,0,5.895,-0.795,0.523,4.625,1,0.996381
F0411-0012,5,0,0,0,6,0,0,0,0,0,...,0,0,0,0,6.044,-0.824,0.652,6.743,1,0.999272


4. Dataset of Anti-inflammatory compounds

In [28]:
# read data
df_infla = pd.read_csv('../../../../../data/Molecular_Descriptors_Antiinflammatory.csv')

# check shape and names of columns
df_infla.shape, df_infla.columns

((23839, 779),
 Index(['ReadIn_ID', 'USER_ID', 'D001', 'D002', 'D003', 'D004', 'D005', 'D006',
        'D007', 'D008',
        ...
        'D768', 'D769', 'D770', 'D771', 'D772', 'D773', 'D774', 'D775', 'D776',
        'D777'],
       dtype='object', length=779))

In [29]:
# drop columns
df_infla.drop(['ReadIn_ID'], inplace=True, axis=1)
df_infla.shape

(23839, 778)

In [30]:
# Make USER_ID index
df_infla.set_index('USER_ID', inplace=True)
df_infla.shape

(23839, 777)

In [31]:
#  Subset the dataset for prediction
df_infla_pred = df_infla.loc[:, mask]

# check shape
df_infla_pred.shape

(23839, 359)

In [32]:
# predict
pred_4 = clf.predict(df_infla_pred)
pred_prob_4 = clf.predict_proba(df_infla_pred)

# check if there is any prediction of 1 (active)
pred_4.sum()

12

In [33]:
# create a dataframe of predictions and probabilities
df_infla['Predictions'] = pred_4
df_infla['Probabilities'] = pred_prob_4[:, 1]
df_infla.head()

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
F987-0090,2,0,0,2,2,0,0,0,0,0,...,1,0,0,0,5.49185,-0.703176,0.352941,2.63176,0,1.546746e-07
F279-0513,1,0,0,2,2,0,0,0,0,0,...,0,0,0,0,5.32193,-0.706842,0.193548,1.8641,0,2.181659e-09
G300-0457,3,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.58496,-0.7434,0.5,4.46192,0,2.789479e-09
G732-2734,3,0,0,2,3,0,0,0,0,0,...,0,0,0,0,5.70044,-0.737017,0.461538,2.03623,0,2.789731e-08
F838-0741,2,0,0,2,2,0,0,0,0,0,...,0,0,0,1,5.52356,-0.671214,0.352941,2.08026,0,6.428272e-08


In [34]:
# create a dataframe of predictions of 1 only
df_infla[df_infla['Predictions'] == 1]

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
000S-0424,4,0,0,0,4,0,0,0,0,0,...,0,0,0,0,5.26679,-0.822765,0.703704,4.16923,1,0.999698
E754-0565,2,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.24793,-0.798443,0.4,4.26613,1,0.59575
2729-0759,4,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.67243,-0.841302,0.594595,4.72314,1,0.551924
1661-1353,3,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.37504,-0.879188,0.548387,5.27682,1,0.910178
D643-0110,1,0,0,2,3,0,0,0,0,0,...,1,0,0,0,5.45943,-0.693316,0.176471,2.79739,1,0.518104
D643-0011,1,0,0,2,3,0,0,0,0,0,...,1,0,0,0,5.35755,-0.732656,0.193548,2.35561,1,0.647126
E754-0574,2,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.2854,-0.805922,0.387097,4.48492,1,0.951807
E754-0557,2,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.24793,-0.798443,0.4,4.26613,1,0.655816
D144-0210,2,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.32193,-0.777526,0.375,4.96776,1,0.752038
Y040-6752,3,0,0,2,4,0,0,0,0,0,...,0,0,0,0,5.61471,-0.836324,0.486486,5.71204,1,0.572174


In [35]:
# create a dataframe of predictions of 1 only with probabilities greater than 0.7
df_infla[(df_infla['Predictions'] == 1) & (df_infla['Probabilities'] > 0.9)]

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
000S-0424,4,0,0,0,4,0,0,0,0,0,...,0,0,0,0,5.26679,-0.822765,0.703704,4.16923,1,0.999698
1661-1353,3,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.37504,-0.879188,0.548387,5.27682,1,0.910178
E754-0574,2,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.2854,-0.805922,0.387097,4.48492,1,0.951807
1189-2114,3,0,0,1,5,0,0,0,0,0,...,0,0,0,0,5.62936,-0.7434,0.459459,4.80221,1,0.980065
