Screen the following datasets to predict active compounds using the `lightgbc_5` model <br>
1. Dataset of known inhibitors (validation of models)
2. Dataset of Peptidomimetics from ChemDiv
3. Dataset of Peptidomimetics from Life Chemicals
4. Dataset of Anti-inflammatory compounds


In [1]:
# importing libraries
import pandas as pd
import numpy as np
import pickle

In [2]:
# load vt pickle object
with open('lightgbc_5_vt_pickle', 'rb') as f:
    mask = pickle.load(f)

In [3]:
# load model
with open('lightgbc_5_pickle', 'rb') as f:
    clf = pickle.load(f)

1. Dataset of know inhibitors (validation of models)

In [4]:
# read data
df_inhi = pd.read_csv('../../../../../data/Molecular_Descriptors_Known_Inhibitors.csv')

# check shape and names of columns
df_inhi.shape, df_inhi.columns

((9, 779),
 Index(['ReadIn_ID', 'USER_ID', 'D001', 'D002', 'D003', 'D004', 'D005', 'D006',
        'D007', 'D008',
        ...
        'D768', 'D769', 'D770', 'D771', 'D772', 'D773', 'D774', 'D775', 'D776',
        'D777'],
       dtype='object', length=779))

In [5]:
# drop columns
df_inhi.drop(['ReadIn_ID'], inplace=True, axis=1)
df_inhi.shape

(9, 778)

In [6]:
# Make USER_ID index
df_inhi.set_index('USER_ID', inplace=True)
df_inhi.shape

(9, 777)

In [7]:
# Subset the dataset for prediction
df_inhi_pred = df_inhi.loc[:, mask]

# check shape
df_inhi_pred.shape

(9, 356)

In [8]:
# predict
pred_1 = clf.predict(df_inhi_pred)
pred_prob_1 = clf.predict_proba(df_inhi_pred)

# check if there is any prediction of 1 (active)
pred_1.sum()

6

In [9]:
# create a dataframe of predictions and probabilities
df_inhi['Predictions'] = pred_1
df_inhi['Probabilities'] = pred_prob_1[:, 1]
df_inhi.head()

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
8087741,2,0,0,2,3,0,0,0,0,0,...,1,0,0,0,5.459,-0.313,0.353,3.386,1,0.9999988
2545304,2,0,0,0,3,0,0,0,0,0,...,0,0,0,0,5.358,0.322,0.4,2.651,1,0.9919381
2534913,2,0,0,1,4,0,0,0,0,0,...,1,0,0,0,5.644,-0.737,0.308,1.511,1,1.0
11703255,1,0,0,0,2,0,0,0,0,0,...,0,0,0,0,4.954,-0.643,0.25,2.766,1,0.9997428
53480255,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,6.209,4.027,0.0,0.974,0,6.837672e-08


In [10]:
# create a dataframe of predictions of 1 only
df_inhi[df_inhi['Predictions'] == 1]

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
8087741,2,0,0,2,3,0,0,0,0,0,...,1,0,0,0,5.459,-0.313,0.353,3.386,1,0.999999
2545304,2,0,0,0,3,0,0,0,0,0,...,0,0,0,0,5.358,0.322,0.4,2.651,1,0.991938
2534913,2,0,0,1,4,0,0,0,0,0,...,1,0,0,0,5.644,-0.737,0.308,1.511,1,1.0
11703255,1,0,0,0,2,0,0,0,0,0,...,0,0,0,0,4.954,-0.643,0.25,2.766,1,0.999743
44224215,2,0,0,1,2,0,0,0,0,0,...,0,0,0,0,4.755,0.437,0.6,2.149,1,0.997138
DB01136,3,0,0,1,3,0,0,0,0,0,...,0,0,0,0,5.392,-0.379,0.545,2.339,1,0.995041


In [11]:
# write dataframe to csv
df_inhi[df_inhi['Predictions'] == 1].to_csv('../../data/lightgbc_5_predicted_known_inhibitors.csv', index=True, columns=['Probabilities'])

In [12]:
# create a dataframe of predictions of 1 only with probabilities greater than 0.7
df_inhi[(df_inhi['Predictions'] == 1) & (df_inhi['Probabilities'] > 0.7)]

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
8087741,2,0,0,2,3,0,0,0,0,0,...,1,0,0,0,5.459,-0.313,0.353,3.386,1,0.999999
2545304,2,0,0,0,3,0,0,0,0,0,...,0,0,0,0,5.358,0.322,0.4,2.651,1,0.991938
2534913,2,0,0,1,4,0,0,0,0,0,...,1,0,0,0,5.644,-0.737,0.308,1.511,1,1.0
11703255,1,0,0,0,2,0,0,0,0,0,...,0,0,0,0,4.954,-0.643,0.25,2.766,1,0.999743
44224215,2,0,0,1,2,0,0,0,0,0,...,0,0,0,0,4.755,0.437,0.6,2.149,1,0.997138
DB01136,3,0,0,1,3,0,0,0,0,0,...,0,0,0,0,5.392,-0.379,0.545,2.339,1,0.995041


2. Dataset of Peptidomimetics from ChemDiv

In [13]:
# read data
df_pep_cd = pd.read_csv('../../../../../data/Molecular_Descriptors_Peptidomimetics_ChemDiv.csv')

# check shape and names of columns
df_pep_cd.shape, df_pep_cd.columns

((36711, 780),
 Index(['ReadIn_ID', 'USER_ID', 'USER_ID_2', 'D001', 'D002', 'D003', 'D004',
        'D005', 'D006', 'D007',
        ...
        'D768', 'D769', 'D770', 'D771', 'D772', 'D773', 'D774', 'D775', 'D776',
        'D777'],
       dtype='object', length=780))

In [14]:
# drop columns
df_pep_cd.drop(['ReadIn_ID', 'USER_ID_2'], inplace=True, axis=1)
df_pep_cd.shape

(36711, 778)

In [15]:
# Make USER_ID index
df_pep_cd.set_index('USER_ID', inplace=True)
df_pep_cd.shape

(36711, 777)

In [16]:
# Subset the dataset for prediction
df_pep_cd_pred = df_pep_cd.loc[:, mask]

# check shape
df_pep_cd_pred.shape

(36711, 356)

In [17]:
# predict
pred_2 = clf.predict(df_pep_cd_pred)
pred_prob_2 = clf.predict_proba(df_pep_cd_pred)

# check if there is any prediction of 1 (active)
pred_2.sum()

3

In [18]:
# create a dataframe of predictions and probabilities
df_pep_cd['Predictions'] = pred_2
df_pep_cd['Probabilities'] = pred_prob_2[:, 1]
df_pep_cd.head()

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
M652-0486,1,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.42626,-0.648642,0.181818,0.498807,0,1.035614e-08
L258-0120,2,0,0,0,3,0,0,0,0,0,...,0,0,0,0,5.35755,-0.726331,0.375,2.81018,0,9.922779e-08
L834-0670,2,0,0,1,3,0,0,0,0,0,...,0,0,0,0,5.2854,-0.696023,0.4,2.89726,0,1.945371e-06
L258-0113,2,0,0,0,3,0,0,0,0,0,...,0,0,0,0,5.35755,-0.726331,0.375,2.81018,0,1.610818e-09
L834-0614,2,0,0,1,3,0,0,0,0,0,...,0,0,0,0,5.2854,-0.696023,0.4,3.01043,0,1.733529e-08


In [19]:
# create a dataframe of predictions of 1 only
df_pep_cd[df_pep_cd['Predictions'] == 1]

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
SA49-1335,1,0,1,1,3,0,0,0,0,0,...,0,0,0,0,5.49185,-0.767101,0.171429,1.24623,1,0.644598
SA49-0214,1,0,1,1,2,0,0,0,0,0,...,0,0,0,0,5.39232,-0.735139,0.176471,1.24034,1,0.516015
S724-1887,0,0,2,1,0,0,0,0,0,0,...,0,0,0,0,4.85798,-0.64344,0.0,-0.540563,1,0.551417


In [20]:
# create a dataframe of predictions of 1 only with probabilities greater than 0.7
df_pep_cd[(df_pep_cd['Predictions'] == 1) & (df_pep_cd['Probabilities'] > 0.69)]

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1


3. Dataset of Peptidomimetics from Life Chemicals

In [21]:
# read data
df_pep_lc = pd.read_csv('../../../../../data/Molecular_Descriptors_Peptidomimetics_Life_Chemicals.csv')

# check shape and names of columns
df_pep_lc.shape, df_pep_lc.columns

((5836, 779),
 Index(['ReadIn_ID', 'USER_ID', 'D001', 'D002', 'D003', 'D004', 'D005', 'D006',
        'D007', 'D008',
        ...
        'D768', 'D769', 'D770', 'D771', 'D772', 'D773', 'D774', 'D775', 'D776',
        'D777'],
       dtype='object', length=779))

In [22]:
# drop columns
df_pep_lc.drop(['ReadIn_ID'], inplace=True, axis=1)
df_pep_lc.shape

(5836, 778)

In [23]:
# Make USER_ID index
df_pep_lc.set_index('USER_ID', inplace=True)
df_pep_lc.shape

(5836, 777)

In [24]:
# Subset the dataset for prediction
df_pep_lc_pred = df_pep_lc.loc[:, mask]

# check shape
df_pep_lc_pred.shape

(5836, 356)

In [25]:
# predict
pred_3 = clf.predict(df_pep_lc_pred)
pred_prob_3 = clf.predict_proba(df_pep_lc_pred)

# check if there is any prediction of 1 (active)
pred_3.sum()

3711

In [26]:
# create a dataframe of predictions and probabilities
df_pep_lc['Predictions'] = pred_3
df_pep_lc['Probabilities'] = pred_prob_3[:, 1]
df_pep_lc.head()

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
F8881-1049,0,0,0,2,0,0,0,0,0,0,...,0,0,0,0,4.087,-0.626,0.0,0.259,1,0.835037
F6513-5720,0,0,0,1,2,0,0,0,0,0,...,0,0,0,0,4.644,-0.776,0.0,3.033,1,0.99862
F6619-2116,0,0,0,2,1,0,0,0,0,0,...,0,0,0,0,4.644,-0.244,0.0,0.265,0,0.367096
F6497-5659,0,0,0,1,1,0,0,0,0,0,...,0,0,0,0,4.459,-0.135,0.0,-0.512,0,0.142971
F6559-1463,0,0,0,0,2,0,0,0,0,0,...,0,0,0,0,4.459,-0.692,0.0,1.597,0,4.1e-05


In [27]:
# create a dataframe of predictions of 1 only
df_pep_lc[df_pep_lc['Predictions'] == 1]

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
F8881-1049,0,0,0,2,0,0,0,0,0,0,...,0,0,0,0,4.087,-0.626,0.000,0.259,1,0.835037
F6513-5720,0,0,0,1,2,0,0,0,0,0,...,0,0,0,0,4.644,-0.776,0.000,3.033,1,0.998620
F6545-0060,0,0,0,1,1,0,0,0,0,0,...,0,0,0,0,4.524,-0.692,0.000,0.836,1,0.998421
F6512-1852,0,0,0,1,2,0,0,0,0,0,...,1,0,0,0,4.755,-0.797,0.000,3.521,1,0.722818
F6556-2677,1,0,0,1,1,0,0,0,0,0,...,0,0,0,0,4.459,-0.192,0.333,1.726,1,0.993778
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
F0410-0004,5,0,0,0,6,0,0,0,0,0,...,0,0,0,0,6.044,-0.852,0.638,6.654,1,0.725195
F0411-0012,5,0,0,0,6,0,0,0,0,0,...,0,0,0,0,6.044,-0.824,0.652,6.743,1,0.999997
F0410-0003,5,0,0,0,6,0,0,0,0,0,...,0,0,0,0,6.044,-0.828,0.638,6.922,1,0.999973
F0410-0005,5,0,0,0,6,0,0,0,0,0,...,0,0,0,0,6.044,-0.828,0.638,7.011,1,0.998117


In [28]:
# create a dataframe of predictions of 1 only with probabilities greater than 0.7
df_pep_lc[(df_pep_lc['Predictions'] == 1) & (df_pep_lc['Probabilities'] > 0.95)]

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
F6513-5720,0,0,0,1,2,0,0,0,0,0,...,0,0,0,0,4.644,-0.776,0.000,3.033,1,0.998620
F6545-0060,0,0,0,1,1,0,0,0,0,0,...,0,0,0,0,4.524,-0.692,0.000,0.836,1,0.998421
F6556-2677,1,0,0,1,1,0,0,0,0,0,...,0,0,0,0,4.459,-0.192,0.333,1.726,1,0.993778
F6521-8783,1,0,0,2,1,0,0,0,0,0,...,0,0,0,0,4.644,-0.348,0.286,2.366,1,0.996657
F6548-0173,1,0,0,0,3,0,0,0,0,0,...,0,0,0,0,4.585,-0.750,0.316,1.645,1,0.950876
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
F0554-0828,4,0,0,1,4,0,0,0,0,0,...,1,0,0,0,5.895,-0.795,0.523,4.625,1,0.988393
F0411-0012,5,0,0,0,6,0,0,0,0,0,...,0,0,0,0,6.044,-0.824,0.652,6.743,1,0.999997
F0410-0003,5,0,0,0,6,0,0,0,0,0,...,0,0,0,0,6.044,-0.828,0.638,6.922,1,0.999973
F0410-0005,5,0,0,0,6,0,0,0,0,0,...,0,0,0,0,6.044,-0.828,0.638,7.011,1,0.998117


In [38]:
# write dataframe to csv
df_pep_lc[(df_pep_lc['Predictions'] == 1) & (df_pep_lc['Probabilities'] > 0.95)].to_csv('../../data/lightgbc_5_predicted_peptidomimetics_life_chemicals.csv', index=True, columns=['Probabilities'])

4. Dataset of Anti-inflammatory compounds

In [30]:
# read data
df_infla = pd.read_csv('../../../../../data/Molecular_Descriptors_Antiinflammatory.csv')

# check shape and names of columns
df_infla.shape, df_infla.columns

((23839, 779),
 Index(['ReadIn_ID', 'USER_ID', 'D001', 'D002', 'D003', 'D004', 'D005', 'D006',
        'D007', 'D008',
        ...
        'D768', 'D769', 'D770', 'D771', 'D772', 'D773', 'D774', 'D775', 'D776',
        'D777'],
       dtype='object', length=779))

In [31]:
# drop columns
df_infla.drop(['ReadIn_ID'], inplace=True, axis=1)
df_infla.shape

(23839, 778)

In [32]:
# Make USER_ID index
df_infla.set_index('USER_ID', inplace=True)
df_infla.shape

(23839, 777)

In [33]:
#  Subset the dataset for prediction
df_infla_pred = df_infla.loc[:, mask]

# check shape
df_infla_pred.shape

(23839, 356)

In [34]:
# predict
pred_4 = clf.predict(df_infla_pred)
pred_prob_4 = clf.predict_proba(df_infla_pred)

# check if there is any prediction of 1 (active)
pred_4.sum()

59

In [35]:
# create a dataframe of predictions and probabilities
df_infla['Predictions'] = pred_4
df_infla['Probabilities'] = pred_prob_4[:, 1]
df_infla.head()

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
F987-0090,2,0,0,2,2,0,0,0,0,0,...,1,0,0,0,5.49185,-0.703176,0.352941,2.63176,0,8.371389e-06
F279-0513,1,0,0,2,2,0,0,0,0,0,...,0,0,0,0,5.32193,-0.706842,0.193548,1.8641,0,1.386399e-08
G300-0457,3,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.58496,-0.7434,0.5,4.46192,0,1.224048e-06
G732-2734,3,0,0,2,3,0,0,0,0,0,...,0,0,0,0,5.70044,-0.737017,0.461538,2.03623,0,4.575959e-06
F838-0741,2,0,0,2,2,0,0,0,0,0,...,0,0,0,1,5.52356,-0.671214,0.352941,2.08026,0,2.201582e-05


In [36]:
# create a dataframe of predictions of 1 only
df_infla[df_infla['Predictions'] == 1]

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
C893-0646,3,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.61471,-0.811255,0.486486,4.28037,1,0.957012
L322-0443,3,0,0,1,3,0,0,0,0,0,...,0,0,0,1,5.61471,-0.661023,0.5,4.15144,1,0.933503
J084-0372,2,0,0,3,3,0,0,0,0,0,...,2,0,0,0,5.70044,-0.729299,0.307692,4.20421,1,0.558291
8017-0333,2,0,0,2,4,0,0,0,0,0,...,0,0,0,0,5.72792,-0.765364,0.3,4.65719,1,0.825653
000S-0424,4,0,0,0,4,0,0,0,0,0,...,0,0,0,0,5.26679,-0.822765,0.703704,4.16923,1,0.608102
1190-0070,5,0,0,0,5,0,0,0,0,0,...,0,0,0,0,5.70044,0.149649,0.777778,3.59076,1,0.548131
1125-0367,4,0,0,1,5,0,0,0,0,0,...,0,0,0,0,5.64386,-0.867299,0.648649,5.86471,1,0.997805
E754-0623,2,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.2854,-0.805922,0.387097,4.48492,1,0.996313
F235-1124,3,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.75489,-0.804866,0.439024,5.33884,1,0.854839
5648-0904,3,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.72792,-0.82701,0.45,4.97542,1,0.532484


In [37]:
# create a dataframe of predictions of 1 only with probabilities greater than 0.7
df_infla[(df_infla['Predictions'] == 1) & (df_infla['Probabilities'] > 0.9)]

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
C893-0646,3,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.61471,-0.811255,0.486486,4.28037,1,0.957012
L322-0443,3,0,0,1,3,0,0,0,0,0,...,0,0,0,1,5.61471,-0.661023,0.5,4.15144,1,0.933503
1125-0367,4,0,0,1,5,0,0,0,0,0,...,0,0,0,0,5.64386,-0.867299,0.648649,5.86471,1,0.997805
E754-0623,2,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.2854,-0.805922,0.387097,4.48492,1,0.996313
8006-5235,4,0,0,0,6,0,0,0,0,0,...,0,0,0,0,5.87036,-0.783876,0.534884,4.57107,1,0.99607
3807-1821,3,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.67243,-0.781209,0.486486,4.00162,1,0.99719
3807-1819,3,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.64386,-0.774374,0.5,3.79728,1,0.994866
3651-6108,4,0,0,0,5,0,0,0,0,0,...,0,0,0,0,5.80735,-0.72438,0.585366,4.25084,1,0.934683
3257-3390,5,0,0,0,6,0,0,0,0,0,...,0,0,0,0,5.93074,-0.778038,0.666667,3.91502,1,0.974567
2729-0759,4,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.67243,-0.841302,0.594595,4.72314,1,0.968128
