Screen the following datasets to predict active compounds using the `xgboost_3` model <br>
1. Dataset of known inhibitors (validation of models)
2. Dataset of Peptidomimetics from ChemDiv
3. Dataset of Peptidomimetics from Life Chemicals
4. Dataset of Anti-inflammatory compounds


In [1]:
# importing libraries
import pandas as pd
import numpy as np
import pickle

In [2]:
# load vt pickle object
with open('xgboost_3_vt_pickle', 'rb') as f:
    mask = pickle.load(f)

In [3]:
# load model
with open('xgboost_3_pickle', 'rb') as f:
    clf = pickle.load(f)

1. Dataset of know inhibitors (validation of models)

In [4]:
# read data
df_inhi = pd.read_csv('../../../../../data/Molecular_Descriptors_Known_Inhibitors.csv')

# check shape and names of columns
df_inhi.shape, df_inhi.columns

((9, 779),
 Index(['ReadIn_ID', 'USER_ID', 'D001', 'D002', 'D003', 'D004', 'D005', 'D006',
        'D007', 'D008',
        ...
        'D768', 'D769', 'D770', 'D771', 'D772', 'D773', 'D774', 'D775', 'D776',
        'D777'],
       dtype='object', length=779))

In [5]:
# drop columns
df_inhi.drop(['ReadIn_ID'], inplace=True, axis=1)
df_inhi.shape

(9, 778)

In [6]:
# Make USER_ID index
df_inhi.set_index('USER_ID', inplace=True)
df_inhi.shape

(9, 777)

In [7]:
# Subset the dataset for prediction
df_inhi_pred = df_inhi.loc[:, mask]

# check shape
df_inhi_pred.shape

(9, 359)

In [8]:
# predict
pred_1 = clf.predict(df_inhi_pred)
pred_prob_1 = clf.predict_proba(df_inhi_pred)

# check if there is any prediction of 1 (active)
pred_1.sum()

5

In [9]:
# create a dataframe of predictions and probabilities
df_inhi['Predictions'] = pred_1
df_inhi['Probabilities'] = pred_prob_1[:, 1]
df_inhi.head()

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
8087741,2,0,0,2,3,0,0,0,0,0,...,1,0,0,0,5.459,-0.313,0.353,3.386,1,0.995117
2545304,2,0,0,0,3,0,0,0,0,0,...,0,0,0,0,5.358,0.322,0.4,2.651,1,0.922887
2534913,2,0,0,1,4,0,0,0,0,0,...,1,0,0,0,5.644,-0.737,0.308,1.511,1,0.999158
11703255,1,0,0,0,2,0,0,0,0,0,...,0,0,0,0,4.954,-0.643,0.25,2.766,1,0.907255
53480255,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,6.209,4.027,0.0,0.974,0,0.011046


In [10]:
# create a dataframe of predictions of 1 only
df_inhi[df_inhi['Predictions'] == 1]

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
8087741,2,0,0,2,3,0,0,0,0,0,...,1,0,0,0,5.459,-0.313,0.353,3.386,1,0.995117
2545304,2,0,0,0,3,0,0,0,0,0,...,0,0,0,0,5.358,0.322,0.4,2.651,1,0.922887
2534913,2,0,0,1,4,0,0,0,0,0,...,1,0,0,0,5.644,-0.737,0.308,1.511,1,0.999158
11703255,1,0,0,0,2,0,0,0,0,0,...,0,0,0,0,4.954,-0.643,0.25,2.766,1,0.907255
44224215,2,0,0,1,2,0,0,0,0,0,...,0,0,0,0,4.755,0.437,0.6,2.149,1,0.973598


In [11]:
# write dataframe to csv
df_inhi[df_inhi['Predictions'] == 1].to_csv('../../data/xgboost_3_predicted_known_inhibitors.csv', index=True, columns=['Probabilities'])

In [12]:
# create a dataframe of predictions of 1 only with probabilities greater than 0.7
df_inhi[(df_inhi['Predictions'] == 1) & (df_inhi['Probabilities'] > 0.7)]

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
8087741,2,0,0,2,3,0,0,0,0,0,...,1,0,0,0,5.459,-0.313,0.353,3.386,1,0.995117
2545304,2,0,0,0,3,0,0,0,0,0,...,0,0,0,0,5.358,0.322,0.4,2.651,1,0.922887
2534913,2,0,0,1,4,0,0,0,0,0,...,1,0,0,0,5.644,-0.737,0.308,1.511,1,0.999158
11703255,1,0,0,0,2,0,0,0,0,0,...,0,0,0,0,4.954,-0.643,0.25,2.766,1,0.907255
44224215,2,0,0,1,2,0,0,0,0,0,...,0,0,0,0,4.755,0.437,0.6,2.149,1,0.973598


2. Dataset of Peptidomimetics from ChemDiv

In [13]:
# read data
df_pep_cd = pd.read_csv('../../../../../data/Molecular_Descriptors_Peptidomimetics_ChemDiv.csv')

# check shape and names of columns
df_pep_cd.shape, df_pep_cd.columns

((36711, 780),
 Index(['ReadIn_ID', 'USER_ID', 'USER_ID_2', 'D001', 'D002', 'D003', 'D004',
        'D005', 'D006', 'D007',
        ...
        'D768', 'D769', 'D770', 'D771', 'D772', 'D773', 'D774', 'D775', 'D776',
        'D777'],
       dtype='object', length=780))

In [14]:
# drop columns
df_pep_cd.drop(['ReadIn_ID', 'USER_ID_2'], inplace=True, axis=1)
df_pep_cd.shape

(36711, 778)

In [15]:
# Make USER_ID index
df_pep_cd.set_index('USER_ID', inplace=True)
df_pep_cd.shape

(36711, 777)

In [16]:
# Subset the dataset for prediction
df_pep_cd_pred = df_pep_cd.loc[:, mask]

# check shape
df_pep_cd_pred.shape

(36711, 359)

In [17]:
# predict
pred_2 = clf.predict(df_pep_cd_pred)
pred_prob_2 = clf.predict_proba(df_pep_cd_pred)

# check if there is any prediction of 1 (active)
pred_2.sum()

0

In [18]:
# create a dataframe of predictions and probabilities
df_pep_cd['Predictions'] = pred_2
df_pep_cd['Probabilities'] = pred_prob_2[:, 1]
df_pep_cd.head()

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
M652-0486,1,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.42626,-0.648642,0.181818,0.498807,0,2.1e-05
L258-0120,2,0,0,0,3,0,0,0,0,0,...,0,0,0,0,5.35755,-0.726331,0.375,2.81018,0,1.6e-05
L834-0670,2,0,0,1,3,0,0,0,0,0,...,0,0,0,0,5.2854,-0.696023,0.4,2.89726,0,6.7e-05
L258-0113,2,0,0,0,3,0,0,0,0,0,...,0,0,0,0,5.35755,-0.726331,0.375,2.81018,0,1.4e-05
L834-0614,2,0,0,1,3,0,0,0,0,0,...,0,0,0,0,5.2854,-0.696023,0.4,3.01043,0,1.2e-05


In [19]:
# create a dataframe of predictions of 1 only
df_pep_cd[df_pep_cd['Predictions'] == 1]

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1


In [20]:
# create a dataframe of predictions of 1 only with probabilities greater than 0.7
df_pep_cd[(df_pep_cd['Predictions'] == 1) & (df_pep_cd['Probabilities'] > 0.69)]

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1


3. Dataset of Peptidomimetics from Life Chemicals

In [21]:
# read data
df_pep_lc = pd.read_csv('../../../../../data/Molecular_Descriptors_Peptidomimetics_Life_Chemicals.csv')

# check shape and names of columns
df_pep_lc.shape, df_pep_lc.columns

((5836, 779),
 Index(['ReadIn_ID', 'USER_ID', 'D001', 'D002', 'D003', 'D004', 'D005', 'D006',
        'D007', 'D008',
        ...
        'D768', 'D769', 'D770', 'D771', 'D772', 'D773', 'D774', 'D775', 'D776',
        'D777'],
       dtype='object', length=779))

In [22]:
# drop columns
df_pep_lc.drop(['ReadIn_ID'], inplace=True, axis=1)
df_pep_lc.shape

(5836, 778)

In [23]:
# Make USER_ID index
df_pep_lc.set_index('USER_ID', inplace=True)
df_pep_lc.shape

(5836, 777)

In [24]:
# Subset the dataset for prediction
df_pep_lc_pred = df_pep_lc.loc[:, mask]

# check shape
df_pep_lc_pred.shape

(5836, 359)

In [25]:
# predict
pred_3 = clf.predict(df_pep_lc_pred)
pred_prob_3 = clf.predict_proba(df_pep_lc_pred)

# check if there is any prediction of 1 (active)
pred_3.sum()

3057

In [26]:
# create a dataframe of predictions and probabilities
df_pep_lc['Predictions'] = pred_3
df_pep_lc['Probabilities'] = pred_prob_3[:, 1]
df_pep_lc.head()

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
F8881-1049,0,0,0,2,0,0,0,0,0,0,...,0,0,0,0,4.087,-0.626,0.0,0.259,0,0.187595
F6513-5720,0,0,0,1,2,0,0,0,0,0,...,0,0,0,0,4.644,-0.776,0.0,3.033,1,0.783657
F6619-2116,0,0,0,2,1,0,0,0,0,0,...,0,0,0,0,4.644,-0.244,0.0,0.265,0,0.050415
F6497-5659,0,0,0,1,1,0,0,0,0,0,...,0,0,0,0,4.459,-0.135,0.0,-0.512,0,0.0386
F6559-1463,0,0,0,0,2,0,0,0,0,0,...,0,0,0,0,4.459,-0.692,0.0,1.597,0,0.002276


In [27]:
# create a dataframe of predictions of 1 only
df_pep_lc[df_pep_lc['Predictions'] == 1]

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
F6513-5720,0,0,0,1,2,0,0,0,0,0,...,0,0,0,0,4.644,-0.776,0.000,3.033,1,0.783657
F6545-0060,0,0,0,1,1,0,0,0,0,0,...,0,0,0,0,4.524,-0.692,0.000,0.836,1,0.943680
F6492-0089,0,0,0,1,1,0,0,0,0,0,...,0,0,0,0,4.322,-0.673,0.000,1.368,1,0.651420
F6507-8802,0,0,0,0,1,0,0,0,0,0,...,1,0,0,0,4.170,-0.750,0.000,1.891,1,0.575509
F6507-8808,0,0,0,0,2,0,0,0,0,0,...,1,0,0,0,4.322,-0.763,0.000,2.155,1,0.924329
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
F0514-4159,3,0,0,3,3,0,0,0,0,0,...,2,0,0,0,6.109,-0.725,0.340,3.522,1,0.526989
F0411-0012,5,0,0,0,6,0,0,0,0,0,...,0,0,0,0,6.044,-0.824,0.652,6.743,1,0.999377
F0410-0003,5,0,0,0,6,0,0,0,0,0,...,0,0,0,0,6.044,-0.828,0.638,6.922,1,0.999630
F0410-0005,5,0,0,0,6,0,0,0,0,0,...,0,0,0,0,6.044,-0.828,0.638,7.011,1,0.932891


In [28]:
# create a dataframe of predictions of 1 only with probabilities greater than 0.7
df_pep_lc[(df_pep_lc['Predictions'] == 1) & (df_pep_lc['Probabilities'] > 0.95)]

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
F6556-2677,1,0,0,1,1,0,0,0,0,0,...,0,0,0,0,4.459,-0.192,0.333,1.726,1,0.967725
F6521-7751,0,0,0,2,0,0,0,0,0,0,...,1,0,0,0,4.459,-0.709,0.000,1.951,1,0.998952
F6475-4409,0,0,0,0,2,0,0,0,0,0,...,0,0,0,0,4.807,-0.171,0.000,-0.690,1,0.994932
F6521-7936,0,0,0,1,1,0,0,0,0,0,...,1,0,0,0,4.585,-0.724,0.000,3.028,1,0.995871
F6481-3834,0,0,0,2,1,1,0,0,0,0,...,1,0,0,0,4.954,-0.686,0.000,1.130,1,0.991797
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
F0554-0828,4,0,0,1,4,0,0,0,0,0,...,1,0,0,0,5.895,-0.795,0.523,4.625,1,0.998023
F0514-4528,3,0,0,3,3,0,0,0,0,0,...,1,0,0,0,6.066,-0.713,0.353,3.518,1,0.973545
F0514-4562,3,0,0,3,3,0,0,0,0,0,...,1,0,0,0,6.066,-0.757,0.353,3.978,1,0.966125
F0411-0012,5,0,0,0,6,0,0,0,0,0,...,0,0,0,0,6.044,-0.824,0.652,6.743,1,0.999377


In [29]:
# write dataframe to csv
df_pep_lc[(df_pep_lc['Predictions'] == 1) & (df_pep_lc['Probabilities'] > 0.95)].to_csv('../../data/xgboost_3_predicted_peptidomimetics_life_chemicals.csv', index=True, columns=['Probabilities'])

4. Dataset of Anti-inflammatory compounds

In [30]:
# read data
df_infla = pd.read_csv('../../../../../data/Molecular_Descriptors_Antiinflammatory.csv')

# check shape and names of columns
df_infla.shape, df_infla.columns

((23839, 779),
 Index(['ReadIn_ID', 'USER_ID', 'D001', 'D002', 'D003', 'D004', 'D005', 'D006',
        'D007', 'D008',
        ...
        'D768', 'D769', 'D770', 'D771', 'D772', 'D773', 'D774', 'D775', 'D776',
        'D777'],
       dtype='object', length=779))

In [31]:
# drop columns
df_infla.drop(['ReadIn_ID'], inplace=True, axis=1)
df_infla.shape

(23839, 778)

In [32]:
# Make USER_ID index
df_infla.set_index('USER_ID', inplace=True)
df_infla.shape

(23839, 777)

In [33]:
#  Subset the dataset for prediction
df_infla_pred = df_infla.loc[:, mask]

# check shape
df_infla_pred.shape

(23839, 359)

In [34]:
# predict
pred_4 = clf.predict(df_infla_pred)
pred_prob_4 = clf.predict_proba(df_infla_pred)

# check if there is any prediction of 1 (active)
pred_4.sum()

48

In [35]:
# create a dataframe of predictions and probabilities
df_infla['Predictions'] = pred_4
df_infla['Probabilities'] = pred_prob_4[:, 1]
df_infla.head()

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
F987-0090,2,0,0,2,2,0,0,0,0,0,...,1,0,0,0,5.49185,-0.703176,0.352941,2.63176,0,0.002598
F279-0513,1,0,0,2,2,0,0,0,0,0,...,0,0,0,0,5.32193,-0.706842,0.193548,1.8641,0,4.4e-05
G300-0457,3,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.58496,-0.7434,0.5,4.46192,0,0.000204
G732-2734,3,0,0,2,3,0,0,0,0,0,...,0,0,0,0,5.70044,-0.737017,0.461538,2.03623,0,0.000194
F838-0741,2,0,0,2,2,0,0,0,0,0,...,0,0,0,1,5.52356,-0.671214,0.352941,2.08026,0,0.002067


In [36]:
# create a dataframe of predictions of 1 only
df_infla[df_infla['Predictions'] == 1]

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
J084-0372,2,0,0,3,3,0,0,0,0,0,...,2,0,0,0,5.70044,-0.729299,0.307692,4.20421,1,0.731655
D132-0070,2,0,0,2,2,1,0,0,0,0,...,0,0,0,0,5.67243,-0.822058,0.307692,4.72264,1,0.666828
C350-0468,3,0,0,2,4,0,0,0,0,0,...,0,0,0,0,5.64386,-0.712425,0.486486,3.74711,1,0.77251
000S-0424,4,0,0,0,4,0,0,0,0,0,...,0,0,0,0,5.26679,-0.822765,0.703704,4.16923,1,0.921956
Y040-1566,4,0,0,0,5,0,0,0,0,0,...,0,0,0,0,5.75489,-0.850405,0.615385,4.02449,1,0.933836
E754-0623,2,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.2854,-0.805922,0.387097,4.48492,1,0.740208
E844-1414,3,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.58496,-0.805349,0.5,5.78054,1,0.535567
M108-0228,2,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.52356,-0.671214,0.342857,1.36855,1,0.562572
8012-9877,3,0,0,0,5,0,0,0,0,0,...,0,0,0,0,5.52356,-0.785198,0.545455,5.50332,1,0.648042
8006-5235,4,0,0,0,6,0,0,0,0,0,...,0,0,0,0,5.87036,-0.783876,0.534884,4.57107,1,0.702016


In [37]:
# create a dataframe of predictions of 1 only with probabilities greater than 0.7
df_infla[(df_infla['Predictions'] == 1) & (df_infla['Probabilities'] > 0.9)]

Unnamed: 0_level_0,D001,D002,D003,D004,D005,D006,D007,D008,D009,D010,...,D770,D771,D772,D773,D774,D775,D776,D777,Predictions,Probabilities
USER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
000S-0424,4,0,0,0,4,0,0,0,0,0,...,0,0,0,0,5.26679,-0.822765,0.703704,4.16923,1,0.921956
Y040-1566,4,0,0,0,5,0,0,0,0,0,...,0,0,0,0,5.75489,-0.850405,0.615385,4.02449,1,0.933836
C645-0202,3,0,0,2,4,0,0,0,0,0,...,0,0,0,0,5.61471,-0.805349,0.486486,5.16064,1,0.986997
L829-0380,1,1,0,3,1,0,0,0,0,0,...,2,0,0,0,5.24793,-0.750851,0.206897,3.89168,1,0.91677
1189-2114,3,0,0,1,5,0,0,0,0,0,...,0,0,0,0,5.62936,-0.7434,0.459459,4.80221,1,0.901099
5137-3663,3,0,0,2,4,0,0,0,0,0,...,0,0,0,0,5.75489,-0.765364,0.45,4.64035,1,0.981451
Y040-7511,2,0,0,1,4,0,0,0,0,0,...,0,0,0,0,5.24793,-0.798443,0.4,5.1637,1,0.904596
