Screen the following datasets to predict active compounds using the `gbc_5` model <br>
1. Dataset of known inhibitors (validation of models)
2. Dataset of Peptidomimetics from ChemDiv
3. Dataset of Peptidomimetics from Life Chemicals
4. Dataset of Anti-inflammatory compounds


In [2]:
# importing libraries
import pandas as pd
import numpy as np
import pickle

In [3]:
# load vt pickle object
with open('gbc_5_vt_pickle', 'rb') as f:
    mask = pickle.load(f)

In [4]:
# load model
with open('gbc_5_pickle', 'rb') as f:
    clf = pickle.load(f)

**Dataset of know inhibitors (validation of models)**

In [5]:
# read data
df_inhi = pd.read_csv('../../../../../data/Molecular_Descriptors_Known_Inhibitors.csv')

# check shape and names of columns
df_inhi.shape, df_inhi.columns

((9, 779),
 Index(['ReadIn_ID', 'USER_ID', 'D001', 'D002', 'D003', 'D004', 'D005', 'D006',
        'D007', 'D008',
        ...
        'D768', 'D769', 'D770', 'D771', 'D772', 'D773', 'D774', 'D775', 'D776',
        'D777'],
       dtype='object', length=779))

In [6]:
# drop columns
df_inhi.drop(['ReadIn_ID'], inplace=True, axis=1)
df_inhi.shape

(9, 778)

In [7]:
# Make USER_ID index
df_inhi.set_index('USER_ID', inplace=True)
df_inhi.shape

(9, 777)

In [8]:
# Subset the dataset for prediction
df_inhi_pred = df_inhi.loc[:, mask]

# check shape
df_inhi_pred.shape

(9, 354)

In [9]:
# predict
pred_1 = clf.predict(df_inhi_pred)
pred_prob_1 = clf.predict_proba(df_inhi_pred)

# check if there is any prediction of 1 (active)
pred_1.sum()

5

In [10]:
# create a dataframe of predictions and probabilities
df_inhi['Predictions'] = pred_1
df_inhi['Probabilities'] = pred_prob_1[:, 1]
df_inhi.head()

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
8087741,2,0,0,2,3,0,0,0,0,0,...,1,0,0,0,5.459,-0.313,0.353,3.386,1,0.897402
2545304,2,0,0,0,3,0,0,0,0,0,...,0,0,0,0,5.358,0.322,0.4,2.651,0,0.027623
2534913,2,0,0,1,4,0,0,0,0,0,...,1,0,0,0,5.644,-0.737,0.308,1.511,1,0.968404
11703255,1,0,0,0,2,0,0,0,0,0,...,0,0,0,0,4.954,-0.643,0.25,2.766,0,0.213194
53480255,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,6.209,4.027,0.0,0.974,1,0.731497


In [11]:
# create a dataframe of predictions of 1 only
df_inhi[df_inhi['Predictions'] == 1]

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
8087741,2,0,0,2,3,0,0,0,0,0,...,1,0,0,0,5.459,-0.313,0.353,3.386,1,0.897402
2534913,2,0,0,1,4,0,0,0,0,0,...,1,0,0,0,5.644,-0.737,0.308,1.511,1,0.968404
53480255,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,6.209,4.027,0.0,0.974,1,0.731497
44224215,2,0,0,1,2,0,0,0,0,0,...,0,0,0,0,4.755,0.437,0.6,2.149,1,0.975637
6912404,0,0,0,0,2,0,0,0,0,0,...,0,0,0,0,6.585,1.655,0.0,1.633,1,0.504077


In [12]:
# write dataframe to csv
df_inhi[df_inhi['Predictions'] == 1].to_csv('../../data/gbc_5_predicted_known_inhibitors.csv', index=True, columns=['Probabilities'])

In [13]:
# create a dataframe of predictions of 1 only with probabilities greater than 0.7
df_inhi[(df_inhi['Predictions'] == 1) & (df_inhi['Probabilities'] > 0.7)]

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
8087741,2,0,0,2,3,0,0,0,0,0,...,1,0,0,0,5.459,-0.313,0.353,3.386,1,0.897402
2534913,2,0,0,1,4,0,0,0,0,0,...,1,0,0,0,5.644,-0.737,0.308,1.511,1,0.968404
53480255,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,6.209,4.027,0.0,0.974,1,0.731497
44224215,2,0,0,1,2,0,0,0,0,0,...,0,0,0,0,4.755,0.437,0.6,2.149,1,0.975637


2. Dataset of Peptidomimetics from ChemDiv

In [14]:
# read data
df_pep_cd = pd.read_csv('../../../../../data/Molecular_Descriptors_Peptidomimetics_ChemDiv.csv')

# check shape and names of columns
df_pep_cd.shape, df_pep_cd.columns

((36711, 780),
 Index(['ReadIn_ID', 'USER_ID', 'USER_ID_2', 'D001', 'D002', 'D003', 'D004',
        'D005', 'D006', 'D007',
        ...
        'D768', 'D769', 'D770', 'D771', 'D772', 'D773', 'D774', 'D775', 'D776',
        'D777'],
       dtype='object', length=780))

In [15]:
# drop columns
df_pep_cd.drop(['ReadIn_ID', 'USER_ID_2'], inplace=True, axis=1)
df_pep_cd.shape

(36711, 778)

In [16]:
# Make USER_ID index
df_pep_cd.set_index('USER_ID', inplace=True)
df_pep_cd.shape

(36711, 777)

In [17]:
# Subset the dataset for prediction
df_pep_cd_pred = df_pep_cd.loc[:, mask]

# check shape
df_pep_cd_pred.shape

(36711, 354)

In [18]:
# predict
pred_2 = clf.predict(df_pep_cd_pred)
pred_prob_2 = clf.predict_proba(df_pep_cd_pred)

# check if there is any prediction of 1 (active)
pred_2.sum()

7

In [19]:
# create a dataframe of predictions and probabilities
df_pep_cd['Predictions'] = pred_2
df_pep_cd['Probabilities'] = pred_prob_2[:, 1]
df_pep_cd.head()

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
M652-0486,1,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.42626,-0.648642,0.181818,0.498807,0,0.000466
L258-0120,2,0,0,0,3,0,0,0,0,0,...,0,0,0,0,5.35755,-0.726331,0.375,2.81018,0,0.000841
L834-0670,2,0,0,1,3,0,0,0,0,0,...,0,0,0,0,5.2854,-0.696023,0.4,2.89726,0,0.068351
L258-0113,2,0,0,0,3,0,0,0,0,0,...,0,0,0,0,5.35755,-0.726331,0.375,2.81018,0,0.000792
L834-0614,2,0,0,1,3,0,0,0,0,0,...,0,0,0,0,5.2854,-0.696023,0.4,3.01043,0,0.026258


In [20]:
# create a dataframe of predictions of 1 only
df_pep_cd[df_pep_cd['Predictions'] == 1]

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
D488-0121,3,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.72792,-0.822058,0.461538,3.66371,1,0.820958
D488-0312,2,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.75489,-0.799443,0.3,3.04257,1,0.580638
D488-0213,2,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.72792,-0.793711,0.307692,3.70433,1,0.63325
D488-0114,2,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.70044,-0.816815,0.315789,3.33826,1,0.669883
L036-0317,3,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.70044,-0.816815,0.473684,3.33826,1,0.791907
D488-0147,2,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.72792,-0.793711,0.307692,3.70433,1,0.682321
L036-0366,3,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.75489,-0.799443,0.45,3.04257,1,0.695809


In [21]:
# create a dataframe of predictions of 1 only with probabilities greater than 0.7
df_pep_cd[(df_pep_cd['Predictions'] == 1) & (df_pep_cd['Probabilities'] > 0.95)]

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1


3. Dataset of Peptidomimetics from Life Chemicals

In [22]:
# read data
df_pep_lc = pd.read_csv('../../../../../data/Molecular_Descriptors_Peptidomimetics_Life_Chemicals.csv')

# check shape and names of columns
df_pep_lc.shape, df_pep_lc.columns

((5836, 779),
 Index(['ReadIn_ID', 'USER_ID', 'D001', 'D002', 'D003', 'D004', 'D005', 'D006',
        'D007', 'D008',
        ...
        'D768', 'D769', 'D770', 'D771', 'D772', 'D773', 'D774', 'D775', 'D776',
        'D777'],
       dtype='object', length=779))

In [23]:
# drop columns
df_pep_lc.drop(['ReadIn_ID'], inplace=True, axis=1)
df_pep_lc.shape

(5836, 778)

In [24]:
# Make USER_ID index
df_pep_lc.set_index('USER_ID', inplace=True)
df_pep_lc.shape

(5836, 777)

In [25]:
# Subset the dataset for prediction
df_pep_lc_pred = df_pep_lc.loc[:, mask]

# check shape
df_pep_lc_pred.shape

(5836, 354)

In [26]:
# predict
pred_3 = clf.predict(df_pep_lc_pred)
pred_prob_3 = clf.predict_proba(df_pep_lc_pred)

# check if there is any prediction of 1 (active)
pred_3.sum()

2112

In [27]:
# create a dataframe of predictions and probabilities
df_pep_lc['Predictions'] = pred_3
df_pep_lc['Probabilities'] = pred_prob_3[:, 1]
df_pep_lc.head()

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
F8881-1049,0,0,0,2,0,0,0,0,0,0,...,0,0,0,0,4.087,-0.626,0.0,0.259,0,0.041678
F6513-5720,0,0,0,1,2,0,0,0,0,0,...,0,0,0,0,4.644,-0.776,0.0,3.033,1,0.865767
F6619-2116,0,0,0,2,1,0,0,0,0,0,...,0,0,0,0,4.644,-0.244,0.0,0.265,0,0.023649
F6497-5659,0,0,0,1,1,0,0,0,0,0,...,0,0,0,0,4.459,-0.135,0.0,-0.512,0,0.009823
F6559-1463,0,0,0,0,2,0,0,0,0,0,...,0,0,0,0,4.459,-0.692,0.0,1.597,0,0.049908


In [28]:
# create a dataframe of predictions of 1 only
df_pep_lc[df_pep_lc['Predictions'] == 1]

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
F6513-5720,0,0,0,1,2,0,0,0,0,0,...,0,0,0,0,4.644,-0.776,0.000,3.033,1,0.865767
F6492-0089,0,0,0,1,1,0,0,0,0,0,...,0,0,0,0,4.322,-0.673,0.000,1.368,1,0.806874
F6507-8802,0,0,0,0,1,0,0,0,0,0,...,1,0,0,0,4.170,-0.750,0.000,1.891,1,0.672406
F6507-8808,0,0,0,0,2,0,0,0,0,0,...,1,0,0,0,4.322,-0.763,0.000,2.155,1,0.783268
F6521-7740,0,1,0,1,0,0,0,0,0,0,...,1,0,0,0,4.322,-0.673,0.000,1.415,1,0.713178
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
F1280-0022,4,0,0,1,5,0,0,0,0,0,...,1,0,0,0,5.943,-0.770,0.511,5.545,1,0.891494
F0554-0832,4,0,0,2,4,0,0,0,0,0,...,1,0,0,0,5.845,-0.784,0.535,4.177,1,0.985413
F0554-0828,4,0,0,1,4,0,0,0,0,0,...,1,0,0,0,5.895,-0.795,0.523,4.625,1,0.963080
F0411-0012,5,0,0,0,6,0,0,0,0,0,...,0,0,0,0,6.044,-0.824,0.652,6.743,1,0.648455


In [39]:
df_pep_lc[df_pep_lc['Predictions'] == 1].to_csv('../../data/gbc_5_predicted_peptidomimetics.csv', index=True, columns=['Probabilities'])

In [29]:
# create a dataframe of predictions of 1 only with probabilities greater than 0.7
df_pep_lc[(df_pep_lc['Predictions'] == 1) & (df_pep_lc['Probabilities'] > 0.95)]

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
F6521-9485,1,0,0,1,2,0,0,0,0,0,...,0,0,0,0,4.807,-0.844,0.261,3.673,1,0.970411
F6438-0734,0,0,0,0,2,0,0,0,0,0,...,0,0,0,0,4.644,-0.738,0.000,1.333,1,0.972660
F6514-4323,0,0,1,3,0,0,0,0,0,0,...,1,0,0,0,5.044,-0.617,0.000,0.289,1,0.959250
F6440-2509,0,0,0,1,2,0,0,0,0,0,...,0,0,0,0,4.700,-0.787,0.000,3.282,1,0.960708
F6440-2686,0,0,0,2,1,0,0,0,0,0,...,0,0,0,0,4.700,-0.797,0.000,3.616,1,0.967111
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
F0554-0372,3,0,0,1,3,0,0,0,0,0,...,1,0,0,0,5.728,-0.751,0.450,4.460,1,0.953215
F0617-0128,3,0,0,2,3,0,0,0,0,0,...,1,0,0,0,5.781,-0.713,0.419,5.024,1,0.963745
F0554-0835,3,0,0,2,3,0,0,0,0,0,...,1,0,0,0,5.714,-0.794,0.436,3.407,1,0.991671
F0554-0832,4,0,0,2,4,0,0,0,0,0,...,1,0,0,0,5.845,-0.784,0.535,4.177,1,0.985413


In [30]:
# write dataframe to csv
df_pep_lc[(df_pep_lc['Predictions'] == 1) & (df_pep_lc['Probabilities'] > 0.95)].to_csv('../../data/gbc_5_predicted_peptidomimetics_life_chemicals.csv', index=True, columns=['Probabilities'])

4. Dataset of Anti-inflammatory compounds

In [31]:
# read data
df_infla = pd.read_csv('../../../../../data/Molecular_Descriptors_Antiinflammatory.csv')

# check shape and names of columns
df_infla.shape, df_infla.columns

((23839, 779),
 Index(['ReadIn_ID', 'USER_ID', 'D001', 'D002', 'D003', 'D004', 'D005', 'D006',
        'D007', 'D008',
        ...
        'D768', 'D769', 'D770', 'D771', 'D772', 'D773', 'D774', 'D775', 'D776',
        'D777'],
       dtype='object', length=779))

In [32]:
# drop columns
df_infla.drop(['ReadIn_ID'], inplace=True, axis=1)
df_infla.shape

(23839, 778)

In [33]:
# Make USER_ID index
df_infla.set_index('USER_ID', inplace=True)
df_infla.shape

(23839, 777)

In [34]:
#  Subset the dataset for prediction
df_infla_pred = df_infla.loc[:, mask]

# check shape
df_infla_pred.shape

(23839, 354)

In [35]:
# predict
pred_4 = clf.predict(df_infla_pred)
pred_prob_4 = clf.predict_proba(df_infla_pred)

# check if there is any prediction of 1 (active)
pred_4.sum()

514

In [36]:
# create a dataframe of predictions and probabilities
df_infla['Predictions'] = pred_4
df_infla['Probabilities'] = pred_prob_4[:, 1]
df_infla.head()

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
F987-0090,2,0,0,2,2,0,0,0,0,0,...,1,0,0,0,5.49185,-0.703176,0.352941,2.63176,0,0.006664
F279-0513,1,0,0,2,2,0,0,0,0,0,...,0,0,0,0,5.32193,-0.706842,0.193548,1.8641,0,0.00898
G300-0457,3,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.58496,-0.7434,0.5,4.46192,0,0.015657
G732-2734,3,0,0,2,3,0,0,0,0,0,...,0,0,0,0,5.70044,-0.737017,0.461538,2.03623,0,0.028572
F838-0741,2,0,0,2,2,0,0,0,0,0,...,0,0,0,1,5.52356,-0.671214,0.352941,2.08026,0,0.300818


In [37]:
# create a dataframe of predictions of 1 only
df_infla[df_infla['Predictions'] == 1]

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
L904-3704,3,0,0,1,3,0,0,0,0,0,...,1,0,0,0,5.49185,-0.716920,0.562500,3.20168,1,0.657800
3643-2339,2,0,0,0,4,0,0,0,0,0,...,0,0,0,0,5.61471,-0.700127,0.324324,2.48478,1,0.513148
8015-5656,2,0,0,2,2,0,0,0,0,0,...,0,0,0,0,5.28540,-0.711333,0.428571,3.28414,1,0.545405
F432-0488,3,0,0,1,3,0,0,0,0,0,...,1,0,0,0,5.61471,-0.681450,0.514286,2.99648,1,0.662654
F432-0576,3,0,0,1,3,0,0,0,0,0,...,1,0,0,0,5.55459,-0.650475,0.514286,4.20056,1,0.709401
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
C567-0037,3,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.55459,-0.831026,0.514286,3.97965,1,0.564811
M088-0034,1,0,0,2,1,0,0,0,0,0,...,1,0,0,0,5.35755,-0.561256,0.187500,1.14326,1,0.507144
C567-0417,3,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.55459,-0.831026,0.514286,3.97965,1,0.601564
M088-0157,1,0,0,2,1,0,0,0,0,0,...,1,0,0,0,5.28540,-0.565474,0.200000,1.40451,1,0.619860


In [38]:
# create a dataframe of predictions of 1 only with probabilities greater than 0.7
df_infla[(df_infla['Predictions'] == 1) & (df_infla['Probabilities'] > 0.95)]

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
C350-0468,3,0,0,2,4,0,0,0,0,0,...,0,0,0,0,5.64386,-0.712425,0.486486,3.74711,1,0.972233
4476-4936,2,0,0,1,3,0,0,0,0,0,...,0,0,0,0,5.85798,-0.745013,0.27907,3.32324,1,0.96528
4476-4931,2,0,0,1,3,0,0,0,0,0,...,0,0,0,0,5.85798,-0.745013,0.27907,3.32324,1,0.965627
3807-1819,3,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.64386,-0.774374,0.5,3.79728,1,0.959951
2268-1391,3,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.40939,-0.378997,0.548387,4.46886,1,0.989383
C645-0202,3,0,0,2,4,0,0,0,0,0,...,0,0,0,0,5.61471,-0.805349,0.486486,5.16064,1,0.957095
C645-0157,3,0,0,2,4,0,0,0,0,0,...,0,0,0,0,5.58496,-0.767101,0.5,5.1233,1,0.978868
F838-0524,2,0,0,2,2,0,0,0,0,0,...,0,0,0,1,5.55459,-0.68145,0.342857,3.90858,1,0.954462
8011-7096,3,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.52356,-0.785198,0.545455,3.52656,1,0.965186
8539-0699,3,0,0,0,6,0,0,0,0,0,...,0,0,0,0,5.70044,-0.811255,0.473684,4.88988,1,0.966829
