In [87]:
import numpy as np
import pandas as pd

In [88]:
df = pd.read_csv('/kaggle/input/pgprnon-pgpr/pgpr_non_pgpr_clean.csv')

In [89]:
df.shape

(86446, 7)

In [90]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 86446 entries, 0 to 86445
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Unnamed: 0         86446 non-null  int64  
 1   Orientation        86446 non-null  object 
 2   Protein accession  86446 non-null  object 
 3   Protein name       86446 non-null  object 
 4   Protein length     86446 non-null  float64
 5   DNA_sequence       86446 non-null  object 
 6   Target             86446 non-null  float64
dtypes: float64(2), int64(1), object(4)
memory usage: 4.6+ MB


In [91]:
df.isnull().sum()

Unnamed: 0           0
Orientation          0
Protein accession    0
Protein name         0
Protein length       0
DNA_sequence         0
Target               0
dtype: int64

In [92]:
df = df.drop(columns=['Unnamed: 0', 'Protein accession', 'DNA_sequence'], axis= 1)

In [93]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 86446 entries, 0 to 86445
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Orientation     86446 non-null  object 
 1   Protein name    86446 non-null  object 
 2   Protein length  86446 non-null  float64
 3   Target          86446 non-null  float64
dtypes: float64(2), object(2)
memory usage: 2.6+ MB


In [94]:
df['Protein name'].nunique()

14048

In [95]:
df.Target.unique()

array([1., 0.])

In [96]:
# Veri setindeki yinelenen gözlemleri kontrol eder ve bunları kaldırır

def duplicate_values(df):
    print("Duplicate check...")
    num_duplicates = df.duplicated(subset=None, keep='first').sum()
    if num_duplicates > 0:
        print("There are", num_duplicates, "duplicated observations in the dataset.")
        df.drop_duplicates(keep='first', inplace=True)
        print(num_duplicates, "duplicates were dropped!")
        print("No more duplicate rows!")
    else:
        print("There are no duplicated observations in the dataset.")

In [97]:
#duplicate_values(df)

In [98]:
X = df.drop('Target',axis=1)
y = df['Target']

In [99]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=11)

In [100]:
ohe = X_train.select_dtypes("object").columns
num = X_train.select_dtypes("float64").columns

In [101]:
num

Index(['Protein length'], dtype='object')

In [102]:
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.preprocessing import StandardScaler

ohe_enc = OneHotEncoder(handle_unknown='ignore') #OneHotEncoder
std_sca = StandardScaler()
column_trans = make_column_transformer((ohe_enc, ohe),(std_sca, num), remainder='passthrough')

In [103]:
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier

operations = [("OneHotEncoder", column_trans),
              ("RF_model", RandomForestClassifier(random_state=11))]

pipe_model = Pipeline(steps=operations)

pipe_model.fit(X_train, y_train)

In [107]:
from sklearn.metrics import accuracy_score,precision_recall_fscore_support

In [108]:
def calculate_results(y_true, y_pred):
    model_accuracy = accuracy_score(y_true, y_pred)
    model_precision, model_recall, model_f1,_ = precision_recall_fscore_support(y_true, y_pred,average="weighted")
    model_results = {"accuracy":model_accuracy,
                     "precision":model_precision,
                     "recall" :model_recall,
                     "f1":model_f1}
    return model_results

In [109]:
calculate_results(y_true=y_test,
                  y_pred=(pipe_model.predict(X_test)))

{'accuracy': 0.6910931174089069,
 'precision': 0.6917918484995064,
 'recall': 0.6910931174089069,
 'f1': 0.6908134549492512}

In [111]:
column_trans.get_feature_names_out()

array(['onehotencoder__Orientation_minus',
       'onehotencoder__Orientation_plus',
       'onehotencoder__Protein name_(2,3-dihydroxybenzoyl)adenylate synthase',
       ...,
       'onehotencoder__Protein name_zincin-like metallopeptidase domain-containing protein',
       'onehotencoder__Protein name_zonular occludens toxin domain-containing protein',
       'standardscaler__Protein length'], dtype=object)

In [110]:
pipe_model["RF_model"].feature_importances_ 

array([8.61534571e-04, 6.77444891e-04, 3.25929779e-05, ...,
       5.49067620e-05, 1.99193182e-05, 3.20117965e-01])

In [112]:
features = pipe_model["OneHotEncoder"].get_feature_names_out()
features

array(['onehotencoder__Orientation_minus',
       'onehotencoder__Orientation_plus',
       'onehotencoder__Protein name_(2,3-dihydroxybenzoyl)adenylate synthase',
       ...,
       'onehotencoder__Protein name_zincin-like metallopeptidase domain-containing protein',
       'onehotencoder__Protein name_zonular occludens toxin domain-containing protein',
       'standardscaler__Protein length'], dtype=object)

In [114]:
new_features = [i.replace("onehotencoder__","").replace("remainder__", "") for i in features]
new_features

['Orientation_minus',
 'Orientation_plus',
 'Protein name_(2,3-dihydroxybenzoyl)adenylate synthase',
 'Protein name_(2,3-dihydroxybenzoyl)adenylate synthase EntE',
 'Protein name_(2E,6E)-farnesyl diphosphate synthase',
 'Protein name_(2E,6E)-farnesyl-diphosphate-specific ditrans,polycis-undecaprenyl-diphosphate synthase',
 'Protein name_(2Fe-2S)-binding protein',
 'Protein name_(3,5-dihydroxyphenyl)acetyl-CoA 1,2-dioxygenase DpgC',
 'Protein name_(3R)-hydroxymyristoyl-ACP dehydratase',
 'Protein name_(4S)-4-hydroxy-5-phosphonooxypentane-2,3-dione isomerase',
 'Protein name_(E)-4-hydroxy-3-methylbut-2-enyl-diphosphate synthase (flavodoxin)',
 'Protein name_(Fe-S)-binding protein',
 'Protein name_(Na+)-NQR maturation NqrM',
 'Protein name_(S)-acetoin forming diacetyl reductase',
 'Protein name_(S)-benzoin forming benzil reductase',
 'Protein name_(S)-ureidoglycine aminohydrolase',
 'Protein name_(d)CMP kinase',
 'Protein name_(dimethylallyl)adenosine tRNA methylthiotransferase',
 'Protei

In [116]:
df_fi = pd.DataFrame(data = pipe_model["RF_model"].feature_importances_, index=new_features, #index=X.columns
                      columns = ["Feature Importance"])
df_fi = df_fi.sort_values("Feature Importance", ascending=False)

df_fi.head(10)

Unnamed: 0,Feature Importance
standardscaler__Protein length,0.320118
Protein name_hypothetical protein,0.029448
Protein name_LysR family transcriptional regulator,0.003857
Protein name_acyl-CoA dehydrogenase family protein,0.001291
Protein name_helix-turn-helix domain-containing protein,0.001177
Protein name_MFS transporter,0.001154
Protein name_ABC transporter ATP-binding protein,0.001054
Protein name_TetR/AcrR family transcriptional regulator,0.001051
Protein name_cytochrome P450,0.000988
Protein name_ATP-binding protein,0.000941


In [82]:
from xgboost import XGBClassifier

In [83]:
operations = [("OneHotEncoder", column_trans),
              ("XGB_model", XGBClassifier(random_state=11))]

pipe_model = Pipeline(steps=operations)

pipe_model.fit(X_train, y_train)

In [84]:
calculate_results(y_true=y_test,
                  y_pred=(pipe_model.predict(X_test)))

{'accuracy': 0.6045112781954888,
 'precision': 0.6570734568028934,
 'recall': 0.6045112781954888,
 'f1': 0.5684362475539015}

# ANN

In [29]:
X_train

Unnamed: 0,Orientation,Protein name,Protein length
46032,plus,glycoside hydrolase,733.0
82630,plus,hypothetical protein,57.0
6783,plus,hypothetical protein,31.0
83620,minus,OmpA family lipoprotein,219.0
86383,minus,hypothetical protein,66.0
...,...,...,...
23452,plus,hypothetical protein,87.0
75453,plus,DUF4158 domain-containing protein,344.0
40904,plus,YcgL domain-containing protein,86.0
47111,plus,crotonase/enoyl-CoA hydratase family protein,268.0


In [30]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 86446 entries, 0 to 86445
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Orientation     86446 non-null  object 
 1   Protein name    86446 non-null  object 
 2   Protein length  86446 non-null  float64
 3   Target          86446 non-null  float64
dtypes: float64(2), object(2)
memory usage: 2.6+ MB


In [31]:
cat = X_train.select_dtypes("object").columns
cat
num = X_train.select_dtypes("float64").columns
num

Index(['Protein length'], dtype='object')

In [23]:
num

Index(['Protein length'], dtype='object')

In [38]:
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder
from sklearn.preprocessing import StandardScaler

enc = OneHotEncoder()
enc.fit_transform(X_train[cat])
X_test[cat] = enc.transform(X_test[cat])

ValueError: Found unknown categories ['DNA-binding transcriptional regulator/c-di-GMP phosphodiesterase PdeL', 'YmaF family protein', 'excinuclease ABC subunit A', 'uncharacterized protein YebO', 'putative transposase', 'abortive infection system antitoxin AbiGi family protein', 'propionyl-CoA:succinate CoA transferase', 'D-arabinose 5-phosphate isomerase GutQ', '(deoxy)nucleoside triphosphate pyrophosphohydrolase', 'DNA-binding transcriptional dual regulator AsnC', 'putative adenosine monophosphate--protein transferase Fic', 'phage DNA ejection protein', 'electron transfer flavoprotein subunit alpha', 'CRISPR-associated endoribonuclease Cas2', 'tetrahydrofolate dehydrogenase/cyclohydrolase catalytic domain-containing protein', 'V-type ATP synthase subunit E', 'type IV secretion protein Rhs', 'V-type ATP synthase subunit B', 'phage minor tail protein G', 'hydroxyacylglutathione hydrolase GloC', '2-oxoglutaramate amidase', 'DUF1440 domain-containing inner membrane protein YagU', 'thiol-disulfide exchange protein DsbD', 'DUF896 family protein', 'putative GTP-binding protein YkfA', 'SinI family autotransporter-associated protein', 'DUF3165 family protein', 'cytochrome-c peroxidase', 'PTS system glucose-specific transporter subunit IIBC', 'stage V sporulation protein D', 'bifunctional S-methyl-5-thioribose-1-phosphate isomerase/methylthioribulose 1-phosphate dehydratase', 'cell shape determining protein MreD', 'DNA-binding transcriptional activator DcuR', 'secretion protein', 'PTS system fructose-specific transporter subunit IIABC', 'DUF3999 family protein', 'DUF1869 domain-containing protein YoaG', 'IS701-like element ISSav3 family transposase', 'RNA polymerase sporulation sigma factor SigE', 'SCO5918 family protein', 'glutamic-type intramembrane protease PrsW', 'phosphate butyryltransferase', '50S ribosomal subunit protein L24', 'nucleoid-associated protein SymE', 'DUF480 domain-containing protein', 'phosphatase domain-containing protein YnbD', 'PF02464 family protein YdeJ', 'argininosuccinate synthetase', 'hydroxymethylglutaryl-CoA synthase family protein', 'DUF5970 family protein', 'adenosine kinase', 'PF05406 family protein YehF', 'DUF6482 family protein', 'OsmC family peroxiredoxin', 'acryloyl-CoA reductase', 'copper ion binding protein', 'mannosyltransferase', 'sodium, sulfate symporter', 'anti-sigma factor antagonist BldG', 'hexitol phosphatase B', 'YqeG family HAD IIIA-type phosphatase', 'glycoside hydrolase family 36 protein', 'sigma-E processing peptidase SpoIIGA', 'putative kinase inhibitor', 'immunodominant antigen A', 'DUF951 domain-containing protein', 'RNB domain-containing ribonuclease', 'RNase I', 'DNA-binding/iron metalloprotein/AP endonuclease', 'DUF1444 domain-containing protein', 'uncharacterized protein YggT', 'hydrogenase 4 component C', 'DUF2591 domain-containing protein', "beta-carotene 15,15'-dioxygenase, Brp/Blh family", 'putative fimbrial protein YcbU', 'acetohydroxy acid synthase I subunit IlvN', 'phosphoribosylglycinamide formyltransferase 1', 'putative DNA-binding transcriptional regulator DicC', 'murein hydrolase regulator LrgA', 'putative enamine/imine deaminase', '6-pyruvoyl tetrahydropterin synthase', 'gpW family head-tail joining protein', 'FAD-I family protein', 'putative basic amino acid antiporter YfcC', 'nucleoside:H(+) symporter NupC', 'methyl-accepting chemotaxis protein Tar', 'opine metallophore biosynthesis dehydrogenase', 'aspartate semialdehyde dehydrogenase', 'DUF6636 domain-containing protein', 'para-aminobenzoate synthase glutamine amidotransferase component II', 'tRNA preQ1(34) S-adenosylmethionine ribosyltransferase-isomerase', 'ica operon transcriptional regulator IcaR', '16S rRNA m(3)U1498 methyltransferase', 'DUF2280 domain-containing protein', 'fibrinogen-binding protein SdrD', 'putative enzyme-specific chaperone YaaW', 'sugar phosphatase YbiV', 'uncharacterized protein YaeQ', 'DUF6113 family protein', 'EndoU domain-containing protein', 'putative HspQ acetyl donor', 'phi PV83 orf 20-like protein', 'PF05360 family inner membrane protein YiaA', 'competence system response regulator transcription factor ComE', 'DNA-binding transcriptional repressor/NMN adenylyltransferase NadR', 'pyridoxal phosphate/fructose-1,6-bisphosphate phosphatase', 'DNA polymerase Y family protein', 'flotillin-like protein FloA', 'Fe-only nitrogenase subunit delta', '1-acylglycerol-3-phosphate O-acyltransferase PlsC', 'DUF1318 domain-containing protein YdbL', 'antitoxin of the GhoTS toxin-antitoxin system', 'DUF2256 domain-containing protein', 'type II secretion system protein J', 'phage DnaC-like protein', 'ethanolamine utilization microcompartment protein EutN', 'G5 domain-containing protein', '23S rRNA pseudouridine(746) and tRNA pseudouridine(32) synthase', 'omptin family outer membrane protease Kop', 'M protein trans-acting positive regulator PRD domain-containing protein', 'M2 family metallopeptidase', 'conjugal transfer protein TrbF', '2-oxo-hept-4-ene-1,7-dioate hydratase', 'putative uncharacterized protein YddL', 'uncharacterized protein YnbG', 'NapA signal peptide-binding chaperone NapD', 'Zn(2(+))/Cd(2(+))/Ni(2(+))/Cu(2(+)) exporter', 'glycine betaine ABC transporter membrane subunit YehW', 'N-acetylmuramoyl-L-alanine amidase-like domain-containing protein', 'D-galactonate dehydratase', 'DUF4035 domain-containing protein', 'undecaprenyldiphospho-muramoylpentapeptide beta-N- acetylglucosaminyltransferase', 'putative LysR-type DNA-binding transcriptional regulator YfiE', '50S ribosomal protein L7ae-like protein', 'pyrrolidone-carboxylate peptidase', 'DUF4345 family protein', 'soluble Cu(+) chaperone', '6-phospho 3-hexuloisomerase', 'DNA-binding transcriptional repressor CytR', 'endo-1,4-D-glucanase', 'Holliday junction resolvase-like protein', 'glucose-specific PTS enzyme IIBC component', 'ATP synthase F1 complex subunit alpha', 'TonB C-terminal domain-containing protein', 'putrescine transporter PotE', 'NinE family protein', 'succinate dehydrogenase cytochrome b558 subunit', 'putative phosphate acyltransferase', 'formate dehydrogenase accessory protein', 'carbamoyl-phosphate synthase (glutamine-hydrolyzing) large subunit', 'benzoate 1,2-dioxygenase large subunit', 'exodeoxyribonuclease V subunit RecD', 'MobQ family relaxase', 'DNA-binding transcriptional dual regulator HdfR', 'DUF3388 domain-containing protein', 'low activity glyoxalase ElbB', 'N-acetylmuramoyl-L-alanine amidase family protein', 'ribonuclease E inhibitor protein B', 'sensor histidine kinase BaeS', '30S ribosomal subunit protein S20', 'transcriptional activator rinb-like protein', 'D-serine transporter', 'cell division inhibitor SulA', 'competence protein ComGF', 'YuiA family protein', '3-oxoacyl- synthase', 'putative lipoprotein YjbH', 'N(6)-L-threonylcarbamoyladenine synthase, TsaB subunit', '50S ribosomal subunit protein L4', 'ABC-F type ribosomal protection protein', 'DUF4822 domain-containing protein', 'leukocidin s subunit', 'aspartyl protease family protein', 'Ni(2(+)) ABC transporter periplasmic binding protein', 'DUF6802 family protein', 'zinc finger-like domain-containing protein', 'protein YhgP', 'QacE family quaternary ammonium compound efflux SMR transporter', 'DUF1127 domain-containing protein YjiS', 'DUF3631 domain-containing protein', 'fumarase A', 'putative prophage lysis lipoprotein RzoD', 'DNA-binding transcriptional repressor PuuR', 'exodeoxyribonuclease V subunit RecC', 'PF05166 family protein YcgL', 'tryptophanyl-tRNA synthetase', 'DUF3376 domain-containing protein', 'DUF3613 domain-containing protein', '1-(5-phosphoribosyl)-5-[(5-phosphoribosylamino)methylideneamino] imidazole-4-carboxamide isomerase', 'terminase large subunit', 'DUF2059 domain-containing protein', 'geranyl diphosphate/farnesyl diphosphate synthase', '5-oxoprolinase/urea amidolyase family protein', 'protein YibY', 'T6SS amidase immunity protein Tai4 family protein', 'peptidoglycan bridge formation glycyltransferase FemX', 'cell division inhibition protein DicB', 'Cu(+) exporting P-type ATPase', 'uncharacterized protein AzuC', 'hydroxyacylglutathione hydrolase GloB', 'PTS system glucose-specific protein', 'YwgA family protein', "pyridoxal 5'-phosphate synthase", 'PF16456 family protein YmgD', 'prophage antitermination protein Q homolog QuuD', 'protealysin inhibitor emfourin', 'GrlR family regulatory protein', 'melibiose:H(+)/Na(+)/Li(+) symporter', 'putative DNA-binding transcriptional regulator YegW', 'thiamine monophosphate kinase', 'type 1 fimbria chaperone FimC', 'ECF RNA polymerase sigma factor SigK', 'RsfA family transcriptional regulator', 'carnitine--CoA ligase', 'Ni(II)/Co(II)-binding transcriptional repressor RcnR', 'putative invasin YchO', 'ethanolamine utilization cobalamin adenosyltransferase', 'DUF1871 family protein', 'PTS enzyme IIBC component MalX', 'energy-coupled thiamine transporter ThiT', 'putative tagatose-1,6-bisphosphate aldolase 2 chaperone', 'putative ATP-binding protein YheS', 'sulfite reductase subunit alpha', 'DUF1456 domain-containing protein YehS', 'citrate:succinate antiporter', '5-aminolevulinate synthase', 'glucose-1-phosphate thymidylyltransferase 1', 'colicin M resistance protein', 'DUF4118 domain-containing protein', 'SctV family type III secretion system export apparatus subunit AscV', 'MutS domain-containing protein', 'histone H1-like repetitive region-containing protein', 'PF13436 family protein YmgG', 'acid-soluble spore protein N', 'cold shock protein', 'bacteriophage L54a deoxyuridine 5-triphosphate nucleotidohydrolase', 'RNA polymerase sigma factor SigI', 'putative fimbrial chaperone EcpE', 'LPXTG-anchored SHIRT domain periscope protein', 'exonuclease RexB', 'YlxQ-related RNA-binding protein', 'choline/carnitine O-acyltransferase', 'arginyl-tRNA synthetase', 'K(+):H(+) symporter Kup', 'protein YmiD', '1-deoxy-D-xylulose 5-phosphate reductoisomerase', 'tRNA-specific 2-thiouridylase', 'regulatory protein MokC', 'DUF6177 family protein', 'putative fatty acid biosynthesis enzyme FabY', 'thiol:disulfide interchange protein', 'putative ATP-dependent protease YcbZ', 'guanitoxin biosynthesis heme-dependent pre-guanitoxin N-hydroxylase GntA', 'nitrogenase component 1', 'spore germination transcription factor GerE', 'succinate:quinone oxidoreductase, FAD binding protein', 'Na+:solute symporter', 'putative transporter ProY', 'type I Zorya anti-phage system protein ZorA1', 'superoxide dismutase (Cu-Zn)', 'UDP-N-acetylmuramoylalanyl-D-glutamate--L-lysine ligase', 'multidrug efflux pump membrane fusion protein MdtE', 'DNA-binding transcriptional dual regulator SdiA', 'C4 dicarboxylate/orotate:H(+) symporter', 'transglutaminase-like/TPR repeat-containing protein', 'lipoamide dehydrogenase', '2,3-diketo-L-gulonate:Na(+) symporter - periplasmic binding protein', 'styrene monooxygenase/indole monooxygenase family protein', 'arsenical resistance protein ArsH', 'lipid-A-disaccharide synthase N-terminal domain-containing protein', 'excisionase-like protein', 'threo-3-hydroxy-L-aspartate ammonia-lyase', 'pirin-like bicupin family protein', 'glutamyl aminopeptidase', 'ribbon-helix-helix domain-containing protein YbfE', 'ATP-dependent protease ATPase subunit HslU', 'ferric enterobactin ABC transporter ATP binding subunit', '6-N-hydroxylaminopurine resistance protein', 'DUF4479 family protein', 'D-glucoside 3-dehydrogenase', 'cation transport protein', 'lactate/glycolate:H(+) symporter LldP', 'uncharacterized protein YdcD', 'modulator protein MzrA', 'putative uncharacterized protein YahH', 'low molecular weight protein-tyrosine-phosphatase Wzb', 'hydrogenase maturation protein HybF', 'uncharacterized protein YidF', 'phage infection protein', 'type III secretion system translocon subunit AopB', 'YfkD famly protein', 'stage V sporulation protein B', 'tRNA-binding protein', 'aminopeptidase PepS', 'DUF5590 domain-containing protein', 'putative lipoprotein YacC', 'fructoselysine 6-phosphate deglycase', 'blue light- and temperature-regulated antirepressor BluF', 'dipeptide ABC transporter membrane subunit DppC', 'protein YecT', 'DUF4825 domain-containing protein', 'mannitol-specific PTS enzyme IIA component CmtB', 'L-serine dehydratase, iron-sulfur-dependent subunit alpha', '2-succinyl-5-enolpyruvyl-6-hydroxy-3-cyclohexene-1-carboxylate synthase', 'sugar-binding protein', 'putative fimbrial chaperone ElfD', 'phosphate ABC transporter membrane subunit PstA', 'protein HokE', 'arylsulfotransferase family protein', 'type I restriction-modification system subunit M N-terminal domain-containing protein', 'uncharacterized protein YkgH', 'Ton complex subunit ExbB', 'STY4199 family HEPN domain-containing protein', 'stage 0 sporulation family protein', 'putative ABC transporter periplasmic binding protein YcjN', 'cell division protein FtsX', '2-hydroxypentadienoate hydratase', 'zinc ABC transporter substrate-binding lipoprotein AdcA', "23S rRNA 2'-O-ribose C2498 methyltransferase", 'putative HD superfamily phosphohydrolase YedJ', 'DNA-binding transcriptional dual regulator NarL', 'SNF2 family protein', 'HK97 family phage major capsid protein', 'RecA inhibitor RecX', 'DUF6624 domain-containing protein', 'DUF3290 family protein', 'peptidyl-prolyl cis-trans isomerase B', 'cysteine/O-acetylserine exporter EamA', 'DUF6764 family protein', 'gluconate permease GntP', 'putative fimbrial chaperone YcbF', 'FeoB-associated Cys-rich membrane protein', 'signal recognition particle receptor', 'DUF6376 family protein', 'selenocysteyl-tRNA-specific translation elongation factor', '30S ribosomal subunit protein S1', 'pyrimidine monooxygenase RutA', 'formate/nitrite transporter', 'O-acetylhomoserine aminocarboxypropyltransferase/cysteine synthase', 'N-acetyl-D-galactosamine specific PTS enzyme IIB component', 'branched-chain alpha-keto acid dehydrogenase subunit E2', 'DUF433 domain-containing protein', 'capsular biosynthesis protein', 'TerC family inner membrane protein YoaE', 'ChpS antitoxin of the ChpB-ChpS toxin-antitoxin system', 'putative inner membrane protein YidI', 'ferrous iron transporter B', 'protein YkiE', 'DUF1576 domain-containing protein', 'protein S-nitrosylase', 'protein YciI', 'phosphotransferase system enzyme IIA(Ntr)', 'holocytochrome c synthase CcmF component', 'DNA-binding transcriptional repressor AscG', 'heme uptake protein IsdC', 'self recognizing antigen 43 (Ag43) autotransporter', 'phage encoded DNA polymerase I', 'HemK family modification methylase', 'K(+) transporting P-type ATPase subunit KdpC', 'protein-disulfide reductase DsbD domain-containing protein', 'membrane protein insertion efficiency factor', 'putative DNA-binding transcriptional regulator FrvR', 'putative allantoin permease', 'ferric citrate ABC transporter periplasmic binding protein', "aminoglycoside phosphotransferase APH(3')", 'sporulation histidine kinase inhibitor Sda', 'ArdC family protein', 'ExeA family protein', "S-methyl-5'-thioinosine phosphorylase", 'protein YbiE', 'sigma factor G inhibitor Gin', 'DUF1861 family protein', 'intermembrane phospholipid transport system - outer membrane lipoprotein MlaA', 'sensor histidine kinase BasS', 'DUF962 domain-containing protein', 'peptidoglycan meso-diaminopimelic acid protein amidase A', 'mRNA interferase toxin RelE', 'DUF2191 domain-containing protein', 'limit dextrin alpha-1,6-glucohydrolase', 'fructose bisphosphate aldolase', 'competence type IV pilus assembly protein ComGB', 'GspM family type II secretion system protein ExeM', '2-phospho-L-lactate transferase CofD family protein', 'YugE family protein', 'omptin family outer membrane protease', 'DUF3802 family protein', '5-methylcytosine-specific restriction enzyme subunit McrB', 'DUF2635 domain-containing protein', 'DUF3892 domain-containing protein', 'deaminase', 'N-acetylmuramoyl-L-alanine amidase C', 'tryptophan ABC transporter substrate-binding protein', '1-deoxypentalenic acid 11-beta-hydroxylase', 'glycerol-3-phosphate dehydrogenase C-terminal domain-containing protein', 'periplasmic protein YbcL', 'PF03458 family inner membrane protein YicG', 'TPR repeat-containing putative chaperone YgeG', 'FAD-containing oxidoreductase', 'STM4014 family protein', 'YojF family protein', 'putative PTS enzyme IIA component FrvA', 'phenylacetic acid degradation protein PaaN', 'ESPR domain-containing protein', 'ubiquinol-cytochrome c reductase cytochrome b subunit', 'cytochrome c biogenesis protein', 'DNA replication inhibitor CspD', 'serine protease SplF', 'heme ABC transporter substrate-binding protein IsdE', 'kinase-associated lipoprotein B', 'heptaprenyl diphosphate syntase component II', 'putative glutathione S-transferase YncG', 'uncharacterized protein YpfJ', 'proteasome activator', 'sporulation integral membrane protein YlbJ', 'CRISPR-associated protein Cas4', 'IS21-like element helper ATPase IstB', '2-octaprenylphenol 6-hydroxylase', 'DUF6182 family protein', 'superoxide oxidase', 'immunity 49 family protein', 'glutaminase', '3-oxoacyl-[acyl carrier protein] synthase 2', 'AhpD-like domain-containing protein YnjA', '3-hydroxy-2,4-pentadione 5-phosphate thiolase', 'tryptophan-rich sensory protein', 'lipoprotein YteS', 'cell wall biosynthesis protein ScdA', 'frataxin CyaY', 'plasmid segregation protein ParM domain-containing protein', 'DNA-binding transcriptional repressor NfeR', 'two-component system response regulator', 'uncharacterized protein YoeI', 'ferrichrome outer membrane transporter/phage receptor', 'purine nucleotidase', 'putative transporter YbiR', 'valine--pyruvate aminotransferase', 'putative cytochrome b561 YodB', 'putative formate-dependent nitrite reductase complex subunit NrfF', 'uncharacterized protein YjeV', 'phosphoribosylglycinamide formyltransferase 2', 'DUF697 domain-containing inner membrane protein YcjF', 'molybdopterin-guanine dinucleotide biosynthesis protein MobA', 'DUF1987 domain-containing protein', 'bacteriophage L54a antirepressor', 'LapD/MoxY N-terminal periplasmic domain-containing protein', 'PTS 2-O-a-mannosyl-D-glycerate transporter subunit IIABC', 'CE1759 family FMN reductase', 'enterobacterial common antigen polymerase', 'DUF4007 family protein', 'glycoside hydrolase family 57 protein', 'YokU family protein', 'amino acid ABC transporter-like protein', 'N-acetylglucosamine/diacetylchitobiose ABC transporter substrate-binding protein', 'quinol oxidase subunit I', 'NnrS family protein', 'NADH:quinone oxidoreductase subunit CD', 'ribonuclease E inhibitor protein A', 'G5 and 3D domain-containing protein', 'heme peroxidase', 'protein YbjN', 'DUF903 domain-containing protein', 'DUF386 domain-containing toxin-antitoxin biofilm protein TabA', 'glutamate decarboxylase A', 'aminobenzoyl-glutamate utilization protein B', 'DUF3885 domain-containing protein', 'putative 4Fe-4S cluster-containing protein', 'hydrogenase 2 membrane subunit', 'L-valine exporter, YgaZ component', 'amino acid racemase YgeA', 'pseudaminic acid cytidylyltransferase', 'DUF3748 domain-containing galacturonate catabolism protein YidR', 'penicillin-binding protein PBP2B', 'ADP-heptose:LPS heptosyltransferase 1', 'YhzD family protein', 'CdaR family protein', 'host factor 1 protein', 'DUF3239 domain-containing protein', 'putative electron transport protein HydN', 'DUF808 family protein', 'murein lipoprotein', 'lipid A biosynthesis acyltransferase', 'small acid-soluble spore protein SspJ', 'iron catecholate outer membrane transporter Fiu', 'DNA-binding transcriptional dual regulator MelR', 'putative ABC transporter ATP-binding subunit YhdZ', 'DUF2755 family protein', 'type II secretion system protein GspD', 'sulfurtransferase for molybdenum cofactor sulfuration', 'lateral flagellar basal body rod protein LfgB', '50S ribosomal protein L16-arginine 3-hydroxylase', 'fumarate reductase membrane protein FrdC', 'ssDNA-binding protein', 'twin transmembrane helix small protein', 'hydroxycarboxylate dehydrogenase B', 'inner membrane protein HofC', 'SAVMC3_10250 family protein', 'DUF2768 domain-containing protein', 'uncharacterized protein YjeN', 'J domain-containing protein DjlB', 'cell death peptidase Lit', 'zinc ribbon domain-containing protein YfgJ', 'DNA-binding transcriptional activator GlrR', 'OapA family protein', 'bifunctional pyrimidine regulatory protein PyrR/uracil phosphoribosyltransferase', 'ABC exporter membrane subunit YbhS', 'DedA family protein YghB', 'putative endonuclease YhhZ', 'Rnf electron transport complex subunit RnfB', 'sarcosine oxidase subunit gamma family protein', 'L-ascorbate specific PTS enzyme IIC component', 'inactive transposase YbfQ', 'DUF3014 domain-containing protein', '16S rRNA m(2)G966 methyltransferase', 'thermonuclease family protein', 'DUF162 domain-containing lactate utilization protein YkgG', 'cytochrome c maturation protein B', 'acyl-CoA carboxylase epsilon subunit', 'bifunctional aldolase/short-chain dehydrogenase', 'D-xylonate dehydratase', 'copper resistance membrane spanning protein PcoS', 'transcriptional regulator Fur', 'transcriptional repressor GlnR', 'protease IV, a signal peptide peptidase', 'methylaspartate mutase subunit E', 'DNA-binding transcriptional dual regulator FlhD', '3-isopropylmalate dehydratase subunit LeuD', '16S rRNA m(5)C967 methyltransferase', 'putative epimerase SgcE', 'DUF1398 domain-containing protein YcgX', 'DUF5615 family PIN-like protein', 'phosphoenolpyruvate synthetase regulatory protein', 'catabolic threonine dehydratase', 'putative colanic acid biosynthesis protein WcaM', 'enoyl-[acyl-carrier-protein] reductase FabK', 'diguanylate cyclase DgcQ', 'chromosome partitioning protein', 'inverse autotransporter beta domain-containing protein', 'amino acid exporter YddG', 'putative peptidoglycan hydrolase FlgJ', 'DGQHR domain-containing protein', 'putative oxamate carbamoyltransferase FdrA', 'LysR family transcriptional regulator AmpR', 'phosphoprotein phosphatase 2', 'PF03887 family protein YfbU', 'DUF987 domain-containing protein YeeT', 'capsular polysaccharide biosynthesis protein CpsC', 'putative DNA-binding transcriptional regulator YbeF', 'cbb3-type cytochrome oxidase assembly protein CcoS', 'acyl carrier protein phosphodiesterase', 'type VII secretion protein EsaA', 'radical SAM superfamily protein YgiQ', 'DUF3397 domain-containing protein', 'virulence-associated V antigen', 'cystine ABC transporter membrane subunit', 'putative hydrolase YheT', 'MarC family putative inner membrane protein YhgN', 'DUF2845 domain-containing protein', 'type 1 glycerol-3-phosphate oxidase', '50S ribosomal subunit protein L10', 'regulator for fimA', 'N-acetylmuramoyl-L-alanine amidase A', 'AmiS/UreI family transporter', 'NADPH:quinone oxidoreductase MdaB', 'DUF1315 domain-containing protein YeaC', 'DNA-binding transcriptional dual regulator ArgR', 'putative metal-chelating domain-containing protein YcgN', 'GDP/GTP pyrophosphokinase', 'D-alanine--poly(phosphoribitol) ligase subunit DltC', 'efflux pump MdtG', 'cold shock protein YdfK', 'ubiquinone biosynthesis protein UbiB', 'type I CRISPR-associated protein Cas8a1/Csx8', 'decarboxylase', 'competence protein ComK', 'DUF3830 family protein', 'glutathione binding-like protein', '50S ribosomal subunit protein L30', 'putative cellulose biosynthesis protein BcsF', 'rpoE leader peptide', 'DUF493 domain-containing protein YbeD', 'single-strand DNA-binding protein', 'scaffold protein', 'DUF559 domain-containing protein YcjD', 'Lpp/OprI family alanine-zipper lipoprotein', 'spore germination lipoprotein GerD', 'DNA-binding transcriptional repressor RacR', 'primosomal replication protein', 'putative ethanolamine utilization acetate kinase EutP', 'rhs element protein RhsC', 'oxidoreductase UcpA', 'DUF1317 family protein', 'DNA-binding transcriptional dual regulator NarP', 'sorbitol dehydrogenase', 'sporulation inhibitor of replication protein SirA', 'pyrimidine:H(+) symporter', 'putative iron-binding protein YecA', 'peptidase M1', 'DUF2500 domain-containing protein YhhM', '30S ribosomal subunit protein S5', 'DUF2254 domain-containing protein', 'putative ABC transporter ATP-binding protein YadG', 'N-acetylglutamylphosphate reductase', 'protein YoaI', 'N-acetylmuramate alpha-1-phosphate uridylyltransferase MurU', 'putative toxic peptide IbsD', "cell surface ecto-5'-nucleotidase Nt5e", 'NADH:quinone oxidoreductase subunit M', 'cephalosporin hydroxylase family protein', 'putative hydrolase YcaC', '50S ribosomal subunit protein L23', 'zinc-binding phosphatase YcdX', 'DNA-binding transcriptional activator CadC', 'nitrate reductase A subunit beta', 'cell shape determining protein MreC', 'putative endonuclease SmrB', 'adenine transporter', 'putative electron transport protein YccM', 'capsular polysaccharide biosynthesis protein CapA', 'DNA-binding transcriptional activator ZraR', 'flagellar hook-associated protein 2', 'asparagine synthetase A', 'isopentenyl-adenosine A37 tRNA methylthiolase', 'protein YdfX', 'motility protein B', 'PF06977 family protein YjiK', 'BhlA/UviB family holin-like peptide', 'type VII secretion protein EccCa', 'osmoregulated periplasmic glucans (OPG) biosynthesis protein C', 'diaminobutyrate--2-oxoglutarate transaminase family protein', 'uncharacterized protein YmfJ', 'cation:proton antiporter family protein', 'DUF2686 domain-containing protein YjiC', 'helicase-exonuclease AddAB subunit AddB', 'DUF4268 domain-containing protein', 'DUF3530 family protein', 'DNA-binding transcriptional activator GutM', 'IPR002035 domain-containing protein YegL', 'DUF5915 domain-containing protein', 'ABC exporter ATP binding subunit YbhF', 'geranylgeranylglyceryl phosphate synthase-like protein', 'TrmB family transcriptional regulator', 'glucuronoxylanase', 'multifunctional enoyl-CoA hydratase, 3-hydroxyacyl-CoA epimerase, Delta(3)-cis- Delta(2)-trans-enoyl-CoA isomerase, L-3-hydroxyacyl-CoA dehydrogenase', '4-hydroxythreonine-4-phosphate dehydrogenase', 'DNA-binding protein HU', 'protein YkfF', 'ribosomal protein S12 methylthiotransferase accessory factor YcaO', 'ABC transporter permease/substrate-binding protein', 'SPW_0924 family protein', 'DsrE/DsrF/DrsH-like family protein', 'anti-sigma-F factor Fin family protein', 'dolichyl-phosphate beta-glucosyltransferase', 'DUF5105 domain-containing protein', 'replication initiation and membrane attachment family protein', 'DUF6213 family protein', 'xanthine:H(+) symporter XanQ', 'inner membrane protein YbjJ', 'putative lipid-binding lipoprotein YceB', 'maltose operon transcriptional repressor', 'class I mannose-6-phosphate isomerase', 'regulatory signaling modulator protein AmpE', 'specificity factor for ClpA-ClpP chaperone-protease complex', 'DUF1694 domain-containing protein', 'phosphoglycerol transferase I', 'biotin carboxylase', 'RNA polymerase-binding ATPase and RNAP recycling factor', 'putative methyltransferase YfdM', 'respiratory response protein SrrB', 'DUF903 domain-containing lipoprotein YgdR', 'protease 3', 'ATP-dependent Clp protease proteolytic subunit ClpP', 'inhibitor of FtsZ', 'putative fimbrial protein SfmA', 'outer membrane porin PhoE', 'putative transport protein YdcO', 'Ni(2(+))/Co(2(+)) exporter', 'uncharacterized protein YqeL', 'alpha-glucosidase C-terminal domain-containing protein', 'DNA-binding transcriptional repressor YebK', 'TIR-like protein FxsC', 'DUF5718 family protein', 'phosphoribosylformylglycinamidine synthase II', 'formaldehyde-responsive transcriptional repressor FrmR', 'HSP20 family small heat-shock protein', 'sugar phosphatase YidA', 'type III restriction-modification system endonuclease', 'NAD(P)H oxidoreductase', 'type III secretion system needle length determinant', 'hotdog fold domain-containing protein', 'chemotaxis protein CheB', 'putative papain-like amidase YebB', 'LEA type 2 family protein', 'DNA-binding transcriptional dual regulator YfeC', 'ectoine/hydroxyectoine ABC transporter permease subunit EhuD', '2-methylaconitate cis-trans isomerase PrpF', 'putative phage abortive infection protein', '3-aminobutyryl-CoA ammonia lyase', 'SpoIIIAH-like family protein', 'plasmid replication/partition related protein', 'fatty acid biosynthesis transcriptional regulator', 'multidrug efflux RND transporter periplasmic adaptor subunit SdeX', 'DUF883 family protein', 'putative DNA binding domain-containing protein', 'YARHG domain-containing protein', 'ornithine aminotransferase', 'putative RNA-binding protein YhgF', 'autotransporter YapF', 'sensor lipoprotein NlpE', 'relaxase/mobilization nuclease domain-containing protein', 'phenylacetyl-CoA 1,2-epoxidase, structural subunit', 'putative inner membrane protein YdcZ', 'cell wall elongation/penicillin-binding protein regulator TseB', 'DNA-binding transcriptional activator YqhC', 'protein YhgO', 'glucans biosynthesis protein D', 'polyhydroxyalkanoate synthesis repressor PhaR', 'multidrug efflux pump membrane fusion lipoprotein AcrE', 'Helicase associated domain protein', 'glucosylglycerate phosphorylase', 'glutathione ABC transporter membrane subunit GsiC', 'putative oxidoreductase YnfG', 'glutathione/L-cysteine ABC exporter subunit CydC', 'tRNA m(7)G46 methyltransferase', 'UDP-glucuronic acid decarboxylase family protein', 'O-acetyltransferase WecH', 'autotransporter adhesin RadD', 'DUF1116 domain-containing protein YlbE', 'DUF6384 family protein', 'ribonucleoside-diphosphate reductase 2 subunit alpha', 'colanic acid biosynthesis protein WcaM', 'YycC family protein', 'protein YicU', 'glycerate 2-kinase 2', 'inner membrane protein YcdZ', 'small, acid-soluble spore protein K', 'DUF1287 domain-containing protein', 'penicillin-binding protein 1A', 'glutamyl-Q tRNA(Asp) synthetase', 'ComZ family protein', 'DUF2058 family protein', 'cysteine desulfurase DndA', 'protein YmgL', 'putative fimbrial chaperone SfmC', 'L-ascorbate specific PTS enzyme IIA component', 'NAD(P)H-dependent nitroreductase NfsB', 'biotin/lipoate A/B protein ligase family protein', 'TerB N-terminal domain-containing protein', 'DUF2141 domain-containing protein', 'ferrioxamine B receptor FoxA', 'acetyl-CoA:acetoacetyl-CoA transferase subunit beta', 'Na(+):H(+) antiporter NhaA', 'putative transcriptional regulator PerR', 'DUF438 domain-containing protein', 'L-serine deaminase II', 'type III secretion system stator protein SctL', 'uncharacterized protein YdfC', 'S28 family serine protease', 'putative sporulation protein YtxC', 'putative electron transport protein YsaA', 'zinc-binding protein', 'DUF2238 domain-containing inner membrane protein YjdF', 'protein-PII uridylyltransferase/uridylyl-removing enzyme', 'putative membrane fusion protein YhiI', 'trehalose operon repressor', 'putative Fe-S cluster assembly protein SufT', 'periplasmic trehalase', 'carbamate kinase-like protein YahI', 'peptidoglycan pentaglycine interpeptide biosynthesis protein FmhB', 'DUF2523 family protein', 'protein YdfU', 'L-glutamine ABC transporter periplasmic binding protein', 'Sec translocon subunit SecY', 'erythritol/L-threitol dehydrogenase', 'KPN_01571 family protein', 'glycerate 3-kinase', 'type IV toxin-antitoxin system AbiEi family antitoxin', 'ribosome rescue factor HflX', 'phage protein NinX family protein', 'histidinol-phosphatase HisJ family protein', 'universal stress protein G', 'lipoprotein SirB', 'Tex-like N-terminal domain-containing protein', 'putative menaquinol-cytochrome c reductase subunit NrfD', 'saccharopine dehydrogenase family protein', 'sorbitol-specific PTS enzyme IIC2 component', 'DUF2812 domain-containing protein', 'chromosome partitioning protein MukE', 'sugar phosphate antiporter', 'OXA-12 family class D beta-lactamase', 'molybdopterin synthase sulfurtransferase', 'DNA topology modulation protein', 'RNA decapping hydrolase', 'phosphogluconate dehydrogenase (NAD(+)-dependent, decarboxylating)', 'YybS family protein', 'cupric reductase RclA', 'YbbR-like domain-containing protein', 'uncharacterized protein YffR', 'cytosol aminopeptidase', 'uncharacterized protein YfjW', 'capsular polysaccharide biosynthesis protein Cps4B', 'type 1 fimbriae D-mannose specific adhesin', 'nocobactin polyketide synthase NbtC', 'colanic acid biosynthesis phosphomannomutase CpsG', 'DnaJ family molecular chaperone', 'cobalamin-independent homocysteine transmethylase', 'DUF4298 domain-containing protein', 'DUF2058 domain-containing protein YaiL', 'glycerol facilitator', 'fructose-bisphosphate aldolase class I', 'autolysin', 'glycine decarboxylase', 'GHKL domain-containing protein', 'PDGLE domain-containing protein', 'bifunctional homocysteine S-methyltransferase/methylenetetrahydrofolate reductase', 'citrate synthase/methylcitrate synthase', 'methylglyoxal reductase DkgB', 'TIGR01621 family pseudouridine synthase', 'putative LysR-type DNA-binding transcriptional regulator AbgR', 'methyl-accepting chemotaxis protein IV', 'HTH-type transcriptional activator AllS', 'SOS-response repressor and protease LexA', 'dTDP-glucose 4,6-dehydratase 2', 'AppA family phytase/histidine-type acid phosphatase', 'putative 2-dehydro-3-deoxygluconate aldolase', 'DUF4245 family protein', '2-amino-3-ketobutyrate coenzyme A ligase', 'putative acetyltransferase TopAI antitoxin YjhQ', 'DUF2799 domain-containing lipoprotein YfiL', 'acetyl-CoA hydrolase/transferase C-terminal domain-containing protein', 'dipeptide/tripeptide:H(+) symporter DtpA', 'methylphosphonate degradation complex subunit PhnL', 'tRNA m(1)G37 methyltransferase', 'toxin of the TopAI-YjhQ toxin-antitoxin system, TopA inhibitor', 'putative transport protein YhgE', 'protein YfdF', 'protein YggI', 'lysine decarboxylase DesA', 'DUF6670 family protein', 'OAM dimerization domain-containing protein', 'peptidoglycan endopeptidase/peptidoglycan L,D-carboxypeptidase MepH', 'peptidase M42 family protein FrvX', 'phosphoglycerate mutase family protein', 'T2SSE family protein', 'TRAP transporter small permease subunit', 'SCO2522 family protein', 'glycosyltransferase family protein', 'MSMEG_3727 family PQQ-associated protein', 'RseA family anti-sigma factor', 'alanine-phosphoribitol ligase', 'PTS system glucose-specific transporter subunit IIABC', 'disulfide bond oxidoreductase YfcG', 'diguanylate cyclase DgcP', 'isopentenyl pyrophosphate isomerase', 'phi PVL orf 32-like protein', 'SmvA family efflux MFS transporter', 'multidrug efflux RND transporter permease subunit SdeY', 'fused indole-3-glycerol phosphate synthase/phosphoribosylanthranilate isomerase', 'iron-sulfur cluster biosynthesis family protein', 'aldehyde dehydrogenase A', 'putative electron transfer flavoprotein subunit YdiQ', 'LamG domain-containing protein', 'cobinamide/cobalamin adenosyltransferase', 'sulfate adenylyltransferase', 'protein YldA', 'apo-citrate lyase phosphoribosyl-dephospho-CoA transferase', 'glutamyl-tRNA synthetase', 'lipoprotein release complex - ATP binding subunit', 'TIGR00300 family protein', 'general secretion pathway protein', 'ferritin', 'pyrroline-5-carboxylate reductase ProG', 'pyridoxal biosynthesis lyase PdxS', 'DUF1441 family protein', 'putative ethanolamine utilization chaperonin EutJ', 'alpha-D-xyloside xylohydrolase', 'glycoside hydrolase family 30 beta sandwich domain-containing protein', 'conjugative transfer protein MobI(A/C)', 'STM4012 family radical SAM protein', 'lasso RiPP family leader peptide-containing protein', 'RPnTP hydrolase', 'D-isomer specific 2-hydroxyacid dehydrogenase family protein', 'PF00078 domain-containing protein YkfC', 'DUF1837 domain-containing protein', 'prophage CPS-53 integrase', 'putative type II secretion system L-type protein YghE', 'DUF922 domain-containing protein', 'lipopolysaccharide transport system ATP binding protein LptB', 'YscQ/HrcQ family type III secretion apparatus protein', 'bifunctional 5,10-methylene-tetrahydrofolate dehydrogenase/ 5,10-methylene-tetrahydrofolate cyclohydrolase', 'DNA-binding transcriptional dual regulator MarA', 'colanic acid biosynthesis glycosyltransferase WcaC', 'flagellin B', 'RNA-binding virulence regulatory protein CvfB', 'hydrogenase 1 small subunit', 'DUF5063 domain-containing protein', 'putative selenoprotein YdfZ', 'DUF2960 domain-containing protein', 'stage VI sporulation protein F', 'stress-induced alternate pyruvate formate-lyase subunit', 'DNA-binding transcriptional activator MalT', 'Mn(2(+)) exporter', 'DNA-specific endonuclease I', 'cation transporter dimerization domain-containing protein', 'small, acid-soluble spore protein, alpha/beta type', 'interstrand DNA crosslink repair glycosylase', 'DUF2620 domain-containing protein YhfU', 'D-xylose:H(+) symporter', 'recombination mediator protein RecR', 'DUF5130 domain-containing protein', 'DUF1152 domain-containing protein', 'periplasmic chaperone Skp', 'Sel1 repeat-containing protein YbeQ', 'succinate:quinone oxidoreductase, membrane protein SdhD', 'YfhJ family protein', 'putrescine ABC transporter periplasmic binding protein', 'PTS system transporter subunit IIC domain-containing protein', 'gluconate permease', 'class II fructose-bisphosphate aldolase family protein', 'N-acetyltransferase YhhY', 'ABC transporter permease/substrate binding protein', 'lipopolysaccharide signal transducer LapC', 'transcriptional regulator SpxA', 'ubiquinone biosynthesis accessory factor UbiJ', 'putative transporter YqeG', 'putative bactoprenol-linked glucose translocase', 'type I Zorya anti-phage system protein ZorD', 'thiamine ABC transporter substrate-binding protein', 'assimilatory nitrite reductase [NAD(P)H] small subunit', 'inner membrane protein YegH', 'membrane toxin DinQ', 'DsrE/F sulfur relay family protein YchN', 'two component system response regulator', 'murein tripeptide amidase A', 'acid stress protein IbaG', 'type 1 fimbriae major subunit', 'L-methionine/branched chain amino acid exporter', 'uncharacterized protein YthA', 'ubiquinone biosynthesis protein UbiV', 'polyprenol phosphomannose-dependent alpha 1,6 mannosyltransferase MptB', 'type II toxin-antitoxin system MqsA family antitoxin', 'putative methyltransferase YfiF', 'uncharacterized protein YniB', 'DUF444 domain-containing protein YeaH', 'DUF2145 domain-containing protein', 'multiple monosaccharide ABC transporter permease', 'phage lysis regulatory protein LysB', 'short-chain fatty acid transporter', 'FMN dependent NADH:quinone oxidoreductase', 'F-type conjugal transfer protein TrbF', 'hydroxymethylglutaryl-CoA reductase, degradative', 'ferric enterobactin esterase', 'phenylacetaldehyde dehydrogenase', 'DUF3880 domain-containing protein', 'virulence master transcriptional regulator RovA', 'hydrogenase 4 component F', 'sporulation killing factor system integral membrane protein', 'GTPase-associated system all-helical protein GASH', '23S rRNA m(1)G745 methyltransferase', 'Type II secretion system protein GspK', 'putative tail fiber assembly protein YfdK', 'tryptophan 2,3-dioxygenase', 'protein YhiY', 'intermembrane phospholipid transport system, substrate binding protein MlaD', 'putative glycosyl hydrolase YegX', 'LPP20 family lipoprotein', 'lactose permease', 'putative uncharacterized protein YchS', 'antitoxin of the YkfI-YafW toxin-antitoxin pair', 'PBECR4 domain-containing protein', 'vanadium-dependent haloperoxidase', 'DNA-binding transcriptional repressor EbgR', 'TIGR04086 family membrane protein', 'galactofuranose ABC transporter putative membrane subunit YjtF', 'lipoprotein YjbF', 'lysophospholipid transporter', 'lysine decarboxylase 1', 'L-lysine 6-transaminase', 'UFP0181 family protein YoaH', 'protein YdaT', 'putative membrane-bound redox modulator Alx', 'putative exporter YdiM', 'protein TfaX', 'Tc toxin subunit A', 'tellurium resistance cAMP binding protein TerE', 'DUF6122 family protein', 'DNA-binding transcriptional repressor YgiV', 'ancillary SecYEG translocon subunit', 'iron(III) hydroxamate ABC transporter ATP binding subunit', 'dipeptide ABC transporter membrane subunit DppB', 'peptide chain release factor RF2', 'LEN family class A beta-lactamase', 'Ars family arsenical pump', 'DNA-binding transcriptional regulator DgoR', 'dihydrolipoyltranssuccinylase', 'putative lipid-binding hydrolase YiiX', 'DNA polymerase III subunit gamma', 'phosphorylcholine transferase LicD', 'inhibitor of g-type lysozyme', 'DsbC family protein', 'tnaAB operon leader peptide', 'DUF2544 domain-containing protein YfcO', 'putative fimbrial usher protein YraJ', 'protein YadW', 'DUF4156 domain-containing lipoprotein YjeI', 'accessory Sec system translocase SecA2', 'DUF5997 family protein', 'genome maintenance protein', 'type II toxin-antitoxin system MqsR family toxin', 'DUF350 domain-containing inner membrane protein YjfL', 'N-acetyl-D-glucosamine kinase', 'stage V sporulation protein SpoVM', 'putative D,D-dipeptide ABC transporter membrane subunit DdpC', 'YesL family protein', 'putative D-xylonate transporter YagG', 'K(+) transporter TrkG', 'DUF1722 domain-containing protein YbgA', '3-hydroxyadipyl-CoA dehydrogenase', 'SUF system Fe-S cluster assembly regulator', 'FcoT family thioesterase', 'putative transporter YhhS', 'beta-ketoadipyl-CoA thiolase', 'YfhE family protein', 'uncharacterized protein YmfE', 'DUF448 domain-containing protein', 'aspartyl-tRNA synthetase', 'Bcr/CflA family efflux MFS transporter', 'acylglycerol kinase family protein', 'lateral flagellar basal-body rod protein LfgG', 'flavin prenyltransferase UbiX', 'C80 family cysteine peptidase', 'division plane positioning ATPase MipZ', 'tellurium resistance system protein TerA', 'putative chloride:H(+) antiporter ClcB', '3-hydroxyacyl-CoA dehydrogenase FadJ', '2-oxoisovalerate dehydrogenase, E1 component subunit beta', 'transcriptional regulator SinR', 'TenA family transcriptional regulator', 'anti-sigma F factor antagonist', 'TIM-barrel domain-containing protein', 'mycofactocin system transcriptional regulator', 'YDG/SRA domain-containing protein', 'GerW family sporulation protein', 'polynucleotide kinase-phosphatase', 'ATP-dependent protease ATP-binding subunit HslU', 'aminopeptidase C', 'PTS transporter subunit EIIB', 'DUF6691 family protein', 'CDF family manganese efflux transporter MntE', 'sporulation protein YqfD', 'Fur-regulated basic protein FbpC', 'putative DNA-binding transcriptional regulator YidL', 'osmoregulated periplasmic glucans biosynthesis protein G', 'DUF2617 family protein', 'mobilome CxxCx(11)CxxC protein', 'mini-ribonuclease 3-like protein', 'HupE/UreJ family protein', 'H-NS histone family protein', 'heme oxygenase', '50S ribosomal subunit protein L19', 'acetyl-CoA carboxyltransferase subunit beta', 'integrase core domain-containing protein YagA', 'sporulation-specific diadenylate cyclase CdaS', 'DUF2987 domain-containing protein', 'protein YljB', 'L-glutamine--D-fructose-6-phosphate aminotransferase', 'carbon starvation CstA 5TM domain-containing protein', 'stringent response modulator YtfK', '2TM domain-containing protein', 'putative flavodoxin YqcA', 'protein YdfV', 'type II toxin-antitoxin system CcdA family antitoxin', 'putative phosphotransferase YcbJ', 'DUF2314 domain-containing protein YegJ', "3' terminal RNA ribose 2'-O-methyltransferase Hen1", 'phosphoethanolamine transferase EptB', 'lipoprotein CseA', 'NAD(P)HX epimerase/NAD(P)HX dehydratase', 'putative porin', 'putative ribosome biogenesis factor YjgA', 'inner membrane protein sensing glucose-6-phosphate', 'colicin immunity domain-containing protein', 'citrate lyase alpha subunit', 'NADH dehydrogenase (quinone) subunit D', 'exporter of polyketide antibiotics-like protein', 'DUF6529 family protein', 'protein YsdD', '3-aminoacrylate deaminase', 'LysM peptidoglycan-binding domain-containing M23 family metallopeptidase', 'cyclic dehypoxanthinyl futalosine synthase', 'DUF6616 family protein', 'D-allose ABC transporter membrane subunit', 'tartronate semialdehyde reductase 2', 'isoaspartyl dipeptidase', 'leucyl aminopeptidase family protein', 'DUF465 domain-containing protein YdcH', 'sn-glycerol 3-phosphate ABC transporter ATP binding subunit', 'ferric uptake regulator-like protein', 'serine/threonine-protein kinase toxin HipA', 'tagatose-1,6-bisphosphate aldolase 1', 'SemiSWEET transporter', 'Zn(2(+)) ABC transporter membrane subunit', 'elongation factor Ts', 'Y4yA family PLP-dependent enzyme', 'flagellar filament structural protein', 'UvrABC excision nuclease subunit C', 'replication protein B', 'Sec translocon accessory complex subunit SecD', 'DUF1803 domain-containing protein', 'leucine exporter', 'DNA-binding transcriptional repressor LexA', 'DUF616 domain-containing protein', 'sn-glycerol 3-phosphate ABC transporter membrane subunit UgpA', 'fused 3-oxo-5,6-dehydrosuberyl-CoA semialdehyde dehydrogenase and oxepin-CoA hydrolase', 'DedA family protein YqjA', 'DNA-binding transcriptional dual regulator GlcC', 'sorbitol-specific PTS enzyme IIA component', 'EscV/YscV/HrcV family type III secretion system export apparatus protein', 'quinol oxidase AA3 subunit II', 'CpaE family protein', 'tRNA-dihydrouridine(16) synthase', 'membrane-depolarizing toxin TisB', 'putative adhesin-related protein YdhQ', 'polysaccharide biosynthesis/export family protein', 'UDP-N-acetylmuramoylalanyl-D-glutamyl-2,6-diaminopimelate--D-alanyl-D-alanyl ligase', 'DNA-binding transcriptional dual regulator CsqR', 'putative lateral flagellar export/assembly protein LafU', 'DctP family TRAP transporter solute-binding subunit', 'holo-[acyl-carrier-protein] synthase', 'osmoprotectant ABC transporter substrate-binding protein', 'DUF86 domain-containing protein', 'small acid-soluble spore protein P', 'aminotransferase A', 'DUF2511 domain-containing protein', 'YwdI family protein', 'SCO3374 family protein', 'putative fimbrial usher protein ElfC', 'small toxic polypeptide LdrC', 'DUF4128 domain-containing protein', 'stress response protein AzuC', 'glycine cleavage T C-terminal barrel domain-containing protein', 'nitrate reductase cytochrome c-type subunit', 'spore coat protein YsxE', 'c-di-GMP-binding biofilm dispersal mediator protein', 'DNA polymerase ligase N-terminal domain-containing protein', 'C39 family peptidase', "N,N'-diacetylchitobiose-specific PTS enzyme IIB component", 'YjgN family protein', 'prealbumin-like fold domain-containing protein', 'YbgA family protein', 'CRISPR-associated protein Cas5', 'ribosome-associated inhibitor A', 'DUF440 domain-containing protein YciU', 'DNA-binding transcriptional repressor FrmR', 'PPE domain-containing protein', 'class A sortase', 'gamma subclass chorismate mutase AroQ', 'DUF1176 domain-containing protein YpfG', 'nucleoid-associated protein RdgC', 'DUF3750 domain-containing protein', 'Cys-Gln thioester bond-forming surface protein', 'lysine 5,6-aminomutase subunit beta', 'anti-phage protein Ppl', 'DUF3300 domain-containing protein YacH', '20S proteasome subunit A/B', 'N-ethylmaleimide reductase', 'DUF3267 domain-containing protein', 'proline:Na(+) symporter', 'exodeoxyribonuclease V subunit RecB', 'exopolysaccharide secretion system outer membrane protein NfrA', 'cardiolipin synthase A', 'DUF6879 family protein', 'DUF6082 family protein', 'protein YjeJ', 'chemotaxis protein CheZ', 'polyketide synthase Pks13', 'ribosome-recycling factor', 'DUF2000 family protein', 'DUF2810 domain-containing protein YibL', 'superinfection exclusion B family protein', 'VRR-NUC domain-containing protein', 'spore cortex-lytic enzyme', 'protein YtiB', 'DUF5508 domain-containing protein YpjB', '23S rRNA m(6)A1618 methyltransferase', 'DUF2891 domain-containing protein', 'zinc-ribbon domain-containing transport protein', 'CpsD/CapB family tyrosine-protein kinase', 'putative multidrug efflux pump subunit MdtO', 'His-Xaa-Ser system protein HxsD', 'anti-sigma-D factor RsdA', 'D-arabinose 5-phosphate isomerase KdsD', 'putative NUDIX hydrolase with low 3-phosphohydroxypyruvate phosphatase activity', 'NrtA/SsuA/CpmA family ABC transporter substrate-binding protein', 'regulator of diguanylate cyclase RdcA', 'phosphopantothenate--cysteine ligase', 'integration host factor', 'phosphoribosylaminoimidazole synthetase', 'two-component system response regulator AfsQ1', 'phi ETA orf 56-like protein', 'protein YzcX', 'sensor histidine kinase NarX', 'PTS fructose transporter subunit IIB', 'putative flagella assembly protein', 'anaerobic glycerol-3-phosphate dehydrogenase subunit C', 'DEAD-box ATP dependent DNA helicase', 'colanic acid biosynthesis pyruvyl transferase WcaK', 'YwqJ-related putative deaminase', 'cysteine protease', 'DUF6297 family protein', 'DNA-binding transcriptional regulator GalR', '3-dehydroquinate synthase family protein', 'nodulation protein NfeD', 'accessory secretory protein Asp5', 'putative transporter YidK', 'putative lyase containing HEAT-repeat', 'anti-adaptor protein IraD', 'PF03966 family protein YcaR', 'succinylornithine transaminase', 'DUF370 domain-containing protein', 'DUF4136 domain-containing protein', 'N-acetylornithine aminotransferase/N-succinyldiaminopimelate aminotransferase', 'DUF1435 domain-containing protein YjjZ', 'transcriptional regulator EbgR', 'polynucleotide phosphorylase/polyadenylase', 'hydrogenase 3 maturation protease', 'type VI secretion system accessory protein TagJ', 'DNA-binding transcriptional dual regulator NtrC', 'heme biosynthesis HemY N-terminal domain-containing protein', 'putative transport protein YqcE', 'cyd operon YbgE family protein', 'D,D-dipeptide ABC transporter permease', 'hydrogenase 2-specific chaperone', 'tRNA(Met) cytidine acetyltransferase', 'putative transporter YdiK', 'small toxic polypeptide LdrB', 'choline:H(+) symporter', 'NAD-dependent epimerase/dehydratase', 'HEPN/Toprim-associated domain-containing protein', 'putative multidrug efflux pump outer membrane channel', 'ureidoacrylate amidohydrolase', 'RimK/LysX family protein', 'PF03932 family protein CutC', 'Type II secretion system protein GspA', 'tyrosyl-tRNA synthetase', 'branched chain amino acid/phenylalanine ABC transporter ATP binding subunit LivG', 'putative ethanolamine catabolic microcompartment shell protein EutS', 'IS5-like element ISGeob1 family transposase', 'PTS system lactose-specific transporter subunit IIBC', 'lateral flagellar motor stator protein LafT', 'putative sigma(54)-dependent transcriptional regulator YgeV', 'methicillin resistance factor FemA', 'sulfoquinovose isomerase', 'DUF6518 family protein', 'peroxiredoxin family protein', 'putative PTS enzyme IIB component FrwB', 'tail-anchored inner membrane protein ElaB', 'putative transporter YdhP', 'DUF4019 domain-containing protein', 'transcriptional regulator Rok', 'O9 family O-antigen export ABC transporter permease subunit', 'tandem DUF2300 domain-containing protein YfaQ', '5-phospho-alpha-D-ribosyl 1,2-cyclic phosphate phosphodiesterase', 'DUF817 domain-containing protein', 'aminopeptidase P N-terminal domain-containing protein', 'hydrolase/methyltransferase domain-containing protein YnbC', 'oxalate decarboxylase family bicupin', 'His/Gly/Thr/Pro-type tRNA ligase C-terminal domain-containing protein', '50S ribosomal subunit protein L5', 'RimK family protein', 'YaaC family protein', 'PF06902 family protein YjdI', 'porphyrinogen peroxidase', 'two component system sensor kinase', 'lipophilic envelope spanning tunnel', 'reduced glutaredoxin 3', 'putative uncharacterized protein YeeW', 'fructokinase ScrK', 'N-acetylornithine carbamoyltransferase', 'protein YdfW', 'tRNA 2-selenouridine synthase', 'diglucosylglycerate octanoyltransferase', 'sulfoquinovosidase', 'peptidyl-tRNA hydrolase, ribosome rescue factor', '50S ribosomal subunit protein L1', 'YlzJ-like family protein', 'DUF6227 family protein', 'multidrug efflux pump Bcr', 'uncharacterized protein YnaE', 'DUF3987 domain-containing protein YfjI', 'carbon-phosphorus lyase subunit PhnK', 'RhuM family protein', 'carbon monoxide dehydrogenase subunit G', 'chaperone CsaA', 'N-glycosylase/DNA lyase', 'uncharacterized protein YjbE', 'putative fimbrial protein YadN', 'putative DNA-binding transcriptional regulator YgaV', 'sensor histidine kinase EvgS', 'DNA-binding transcriptional dual regulator MntR', 'PIN/TRAM domain-containing protein', 'ferredoxin-type protein', "bis(5'-nucleosyl)-tetraphosphatase PrpE", 'DUF6882 domain-containing protein', 'YlaN family protein', 'TlrC/CarA/OleB/SrmB family ABC-F type ribosomal protection protein', 'ATPase', 'DNA-binding transcriptional dual regulator McbR', 'catalase/hydroperoxidase HPI', 'peptidase M16 family protein YhjJ', 'type I Zorya anti-phage system protein ZorB1', 'protein N-acetyltransferase RimI', 'Imm74 family immunity protein', 'DNA-binding transcriptional activator RcsA', 'peptide-N4-asparagine amidase A', 'putative oxidoreductase YdfI', 'sigma(X)-activator ComW', 'ATP-dependent nuclease subunit B', 'methyl-accepting chemotaxis protein Tap', 'benzoate transporter', 'stationary phase-inducible protein CsiE', 'putative fimbrial protein FimI', 'toxin of the YkfI-YafW toxin-antitoxin system', 'nucleoside phosphorylase PpnP', 'toxic peptide IbsE', 'sporulation transcriptional regulator SpoIIID', 'anti-FlhDC factor RflP', 'putative glutamine amidotransferase YafJ', 'Z-ring positioning protein MinC', 'Pro-rich N-terminal domain-containing protein', 'NPP1 family protein', 'uncharacterized protein YdiH', 'siphovirus ReqiPepy6 Gp37-like family protein', 'bacterial transcriptional activator domain-containing protein', 'Sec translocon accessory complex subunit YajC', 'CshA/CshB family fibrillar adhesin-related protein', 'arginine/ornithine antiporter', 'K(+) transporting P-type ATPase subunit KdpB', '5,10-methylenetetrahydrofolate reductase', 'imidazoleglycerol-phosphate dehydratase/histidinol-phosphatase', 'multidrug efflux pump membrane fusion protein EmrA', 'NADP(+)-dependent aldehyde reductase', 'protein SseB', 'NlpC/P60 family protein YafL', 'mycolate reductase', 'superoxide dismutase, Ni', 'DcaP family trimeric outer membrane transporter', 'toxin-antitoxin system HicB family antitoxin', 'lipopolysaccharide transport system protein LptA', 'ilvBN operon leader peptide', 'TIGR02536 family ethanolamine utilization protein', 'ATP synthase Fo complex subunit c', 'zinc-dependent peptidase', 'CadD family cadmium resistance transporter', 'DUF4880 domain-containing protein', 'ATP-binding protein Uup', 'PTS mannitol transporter subunit IICB', 'L-arabinonate dehydratase', 'O-succinylhomoserine sulfhydrylase', 'lateral flagellar response regulator transcription factor LafK', 'periplasmic chaperone Asr', 'lipid A-core phosphotransferase', 'zinc ABC transporter substrate-binding protein AztC', 'putative nuclease YbcO', 'expression modulating protein YmoA', 'type VI secretion system membrane subunit', 'uncharacterized protein YffM', 'glutamate--pyruvate aminotransferase AlaC', 'putative transporter YgjI', 'flagellar filament capping protein', 'DUF1054 domain-containing protein', 'DUF2207 domain-containing protein YciQ', 'DUF1033 family protein', 'DUF1109 domain-containing protein', 'protein YagM', 'darobactin maturation radical SAM/SPASM protein DarE', 'isoamylase early set domain-containing protein', 'allantoate deiminase', 'glycosyl transferase family 2', 'ATPase RavA domain-containing protein', 'putative oxidoreductase, Fe-S subunit', "3' -> 5' ssDNA/RNA exonuclease TatD", 'prepilin-type N-terminal cleavage/methylation domain-containing protein PpdA', 'dipicolinic acid synthetase subunit A', 'protein A', 'ssDNA-specific exonuclease RecJ', 'fibrinogen-binding protein', 'peptidoglycan DD-endopeptidase DacB', 'DUF3829 domain-containing lipoprotein YiiG', 'riboflavin biosynthesis protein RibF', 'amino acid deaminase/aldolase', 'YisL family protein', 'lysine/arginine/ornithine ABC transporter/histidine ABC transporter, ATP binding subunit', 'Zn(2(+)) ABC transporter periplasmic binding protein', 'DUF1283 domain-containing protein YnfB', 'NrdR transcriptional repressor', 'protein YjgL', 'RadC-like JAB domain-containing protein YkfG', 'putative polysaccharide deacetylase lipoprotein YadE', 'DUF748 domain-containing protein', 'DUF2824 family protein', 'DUF3325 domain-containing protein', 'dethiobiotin synthetase BidA', 'stage II sporulation protein R', 'DUF1471 domain-containing stress-induced protein YhcN', 'fumarate reductase', 'DUF2554 domain-containing protein YncJ', 'myo-inositol-1-phosphate synthase', 'putative glucosyltransferase', 'putative pseudouridine kinase', '2,3-diketo-L-gulonate reductase', 'DNA polymerase III PolC', 'glucose-6-phosphate dehydrogenase (coenzyme-F420)', '3-deoxy-D-manno-octulosonate 8-phosphate synthase', 'DUF2956 domain-containing protein', 'putative DNA-binding transcriptional regulator YiaU', 'epimerase/dehydratase', '16S rRNA methyltransferase GidB', 'nitrate reductase 1 molybdenum cofactor assembly chaperone', 'glycoside hydrolase family 97 protein', 'DUF2513 domain-containing protein', 'NEL-type E3 ubiquitin ligase domain-containing protein', 'filamentous haemagglutinin family protein', "2',3'-cyclic-nucleotide 2'-phosphodiesterase/3'-nucleotidase", 'thiol:disulfide oxidoreductase CcmG', 'galactofuranose ABC transporter putative membrane subunit YtfT'] in column 1 during transform

In [None]:

sca = StandardScaler()
sca.fit_transform(X_train[num])
X_test[num] = sca.transform(X_test[num])

In [34]:
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
from sklearn.preprocessing import LabelEncoder
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import seaborn as sns
from keras.layers import Dense, BatchNormalization, Dropout, LSTM
from keras.models import Sequential
from keras.utils import to_categorical
from keras.optimizers import Adam
from tensorflow.keras import regularizers
from sklearn.metrics import precision_score, recall_score, confusion_matrix, classification_report, accuracy_score, f1_score
from keras import callbacks

np.random.seed(0)

2024-06-03 18:53:12.440023: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-03 18:53:12.440199: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-03 18:53:12.650502: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [85]:
#Early stopping
early_stopping = callbacks.EarlyStopping(
    min_delta=0.001, # minimium amount of change to count as an improvement
    patience=20, # how many epochs to wait before stopping
    restore_best_weights=True,
)

# Initialising the NN
model = Sequential()

# layers

model.add(Dense(units = 32, kernel_initializer = 'uniform', activation = 'relu', input_dim = 26))
model.add(Dense(units = 32, kernel_initializer = 'uniform', activation = 'relu'))
model.add(Dense(units = 16, kernel_initializer = 'uniform', activation = 'relu'))
model.add(Dropout(0.25))
model.add(Dense(units = 8, kernel_initializer = 'uniform', activation = 'relu'))
model.add(Dropout(0.5))
model.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))

# Compiling the ANN
opt = Adam(learning_rate=0.00009)
model.compile(optimizer = opt, loss = 'binary_crossentropy', metrics = ['accuracy'])

# Train the ANN
history = model.fit(X_train, y_train, batch_size = 32, epochs = 150, callbacks=[early_stopping], validation_split=0.2)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


ValueError: could not convert string to float: 'plus'

In [None]:
history_df = pd.DataFrame(history.history)

plt.plot(history_df.loc[:, ['loss']], "#BDE2E2", label='Training loss')
plt.plot(history_df.loc[:, ['val_loss']],"#C2C4E2", label='Validation loss')
plt.title('Training and Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend(loc="best")

plt.show()

In [None]:
history_df = pd.DataFrame(history.history)

plt.plot(history_df.loc[:, ['accuracy']], "#BDE2E2", label='Training accuracy')
plt.plot(history_df.loc[:, ['val_accuracy']], "#C2C4E2", label='Validation accuracy')

plt.title('Training and Validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()