Title: Named Entity Recognition

Description:
In this NER-focused project, you will design and develop a custom Named Entity Recognition (NER) system for text analysis. Named Entity Recognition involves identifying and classifying specific entities, such as names, dates, locations, and more, within unstructured text data. Your project will offer a versatile NER solution that will work well on the provided dataset.

Dataset: Named Entity Recognition (NER) Corpus (kaggle.com)

https://www.kaggle.com/datasets/naseralqaydeh/named-entity-recognition-ner-corpus

In [1]:
!date

Tue Apr  8 04:21:53 UTC 2025


In [85]:
import warnings,sys, ast
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import tensorflow as tf
from IPython.display import display, HTML
# import matplotlib.pyplot as plt
from pprint import pprint

from tqdm import tqdm
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

from keras.layers import Dense, Input, Bidirectional, LSTM, Embedding, Dropout
from keras.models import Model
from keras.losses import SparseCategoricalCrossentropy
from keras.callbacks import EarlyStopping
from tensorflow.keras.models import load_model
# from keras.random import SeedGenerator 

# seed_gen = SeedGenerator(seed=42)
tf.random.set_seed(42)
np.random.seed(42)

import datetime as dt 

In [3]:
# !pip install kagglehub

In [4]:
# while True:
try:
    NER_df = pd.read_csv("ner.csv")
except FileNotFoundError:
    import kagglehub
    from kagglehub import KaggleDatasetAdapter

    # Set the path to the file you'd like to load
    file_path = "ner.csv"

    # Load the latest version
    NER_df = kagglehub.load_dataset(
    KaggleDatasetAdapter.PANDAS,
    "naseralqaydeh/named-entity-recognition-ner-corpus",
    file_path,

    )

In [5]:
display(NER_df.head(2).T)

Unnamed: 0,0,1
Sentence #,Sentence: 1,Sentence: 2
Sentence,Thousands of demonstrators have marched throug...,Families of soldiers killed in the conflict jo...
POS,"['NNS', 'IN', 'NNS', 'VBP', 'VBN', 'IN', 'NNP'...","['NNS', 'IN', 'NNS', 'VBN', 'IN', 'DT', 'NN', ..."
Tag,"['O', 'O', 'O', 'O', 'O', 'O', 'B-geo', 'O', '...","['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', ..."


Essential info about entities:

- geo = Geographical Entity
- org = Organization
- per = Person
- gpe = Geopolitical Entity
- tim = Time indicator
- art = Artifact
- eve = Event
- nat = Natural Phenomenon

### Class it

In [50]:
class The_Neural_Net:
    def __init__(self):
        self.max_len = 0

1. Read data

In [51]:
def read_data(self):
    try:
        NER_df = pd.read_csv("ner.csv")
    except FileNotFoundError:
        import kagglehub
        from kagglehub import KaggleDatasetAdapter

        file_path = "ner.csv"

        NER_df = kagglehub.load_dataset(
        KaggleDatasetAdapter.PANDAS,
        "naseralqaydeh/named-entity-recognition-ner-corpus",
        file_path,

        )
    print(NER_df.shape,"\n")
    print("\n",NER_df.info())
    return NER_df
The_Neural_Net.read_data = read_data

2. Preprocess

In [None]:
def PreProcess(self):
    NER_df = self.read_data()
    NER_df.dropna(inplace=True)
    NER_df.drop(columns=["Sentence #","POS"],inplace=True)
    NER_df["Tag"] = NER_df["Tag"].apply(lambda x: ast.literal_eval(x))
    
    self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(NER_df["Sentence"], NER_df["Tag"], shuffle=True,test_size=0.20, random_state=42)
    del NER_df

    self.X_tokenizer = Tokenizer(lower=False,oov_token="UNK")
    self.X_tokenizer.fit_on_texts(self.X_train)
    
    self.X_train = self.X_tokenizer.texts_to_sequences(self.X_train)
    self.X_test = self.X_tokenizer.texts_to_sequences(self.X_test)
 
    self.vocab_len = len(self.X_tokenizer.word_index)
    print(f"Number of unique tokens:\t{self.vocab_len}")

    self.y_tokenizer = Tokenizer(lower=False,oov_token="UNK")
    self.y_tokenizer.fit_on_texts(self.y_train)

    self.y_train = self.y_tokenizer.texts_to_sequences(self.y_train)
    self.y_test = self.y_tokenizer.texts_to_sequences(self.y_test)
    
    for dataset in [self.X_train,self.X_test]:
        for i in range(len(dataset)):
            self.max_len = max(self.max_len,len(dataset[i]))

    self.X_train = pad_sequences(self.X_train, maxlen=self.max_len, padding='post', value=0)
    self.X_test = pad_sequences(self.X_test, maxlen=self.max_len, padding='post', value=0)

    self.y_train = pad_sequences(self.y_train, maxlen=self.max_len, padding='post', value=0)
    self.y_test = pad_sequences(self.y_test, maxlen=self.max_len, padding='post', value=0)
 
    self.Number_of_classes_K = len(self.y_tokenizer.word_index) + 1

The_Neural_Net.PreProcess = PreProcess

In [None]:
def model_arch(self):
    vector_size = 64

    input_layer = Input(shape=(self.max_len,))
    embedding_layer = Embedding(input_dim=self.vocab_len + 1, output_dim=vector_size, mask_zero=True, trainable=True)(input_layer)
    dropout_layer_1 = Dropout(0.2)(embedding_layer)
    bidirectional_LSTM_Layer = Bidirectional(LSTM(vector_size * 2, return_sequences=True))(dropout_layer_1)
    output_layer = Dense(self.Number_of_classes_K)(bidirectional_LSTM_Layer)

    self.model = Model(input_layer, output_layer)
    print(self.model.summary())
    
    self.model.compile(optimizer="adam",loss=SparseCategoricalCrossentropy(from_logits=True),metrics=["accuracy"])    
The_Neural_Net.model_arch = model_arch

In [None]:
def model_fit(self,num_of_epochs):
    early_stopping = EarlyStopping(
    monitor='val_loss',  # Metric to monitor (e.g., validation loss)
    patience=3,          # Number of epochs with no improvement after which training will stop
    restore_best_weights=True  # Restore the weights of the best epoch
    )
        
    self.model.fit(
            self.X_train,
            self.y_train,
            epochs=num_of_epochs,
            validation_data=(self.X_test, self.y_test),
            callbacks=[early_stopping]  # Include EarlyStopping in callbacks
        )
The_Neural_Net.model_fit = model_fit


In [71]:
def predict(self,model,sentence):
    # sentence = """Is this the real life? Is this just fantasy? Caught in a landslide, no escape from reality"""
    # unpadded_len = len(sentence.split())
    # sentence_tokenized = self.X_tokenizer.texts_to_sequences([sentence])

    sentence_tokens = self.X_tokenizer.texts_to_sequences([sentence])

    tokens_to_words = [word for word, index in self.X_tokenizer.word_index.items() if index in sentence_tokens[0]]

    predictions = model.predict(pad_sequences(sentence_tokens,
                                            maxlen=self.max_len,
                                            padding="post"))
    # print(predictions)
    prediction_ner = np.argmax(predictions,axis=-1)
    # print(prediction_ner)

    NER_tags = [self.y_tokenizer.index_word[num] for num in list(prediction_ner.flatten())]
    final_pred = {}
    for i in range(len(tokens_to_words)):
        final_pred[tokens_to_words[i]] = NER_tags[i]
    return final_pred
The_Neural_Net.predict = predict

In [62]:
NN_obj = The_Neural_Net()
NN_obj.PreProcess()
NN_obj.model_arch()

(47959, 4) 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 47959 entries, 0 to 47958
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Sentence #  47959 non-null  object
 1   Sentence    47959 non-null  object
 2   POS         47959 non-null  object
 3   Tag         47959 non-null  object
dtypes: object(4)
memory usage: 1.5+ MB

 None
Number of unique tokens:	28761


None


In [63]:
num_of_epochs = 5
if tf.test.is_gpu_available():
    num_of_epochs = 100
    with tf.device('/device:GPU:0'):     
        NN_obj.model_fit(num_of_epochs)
else:
    NN_obj.model_fit(num_of_epochs)


Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
Epoch 1/5
[1m1199/1199[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 88ms/step - accuracy: 0.2141 - loss: 0.7660 - val_accuracy: 0.2179 - val_loss: 0.3559
Epoch 2/5
[1m1199/1199[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m109s[0m 91ms/step - accuracy: 0.2208 - loss: 0.3337 - val_accuracy: 0.2214 - val_loss: 0.2974
Epoch 3/5
[1m1199/1199[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 86ms/step - accuracy: 0.2250 - loss: 0.2629 - val_accuracy: 0.2229 - val_loss: 0.2860
Epoch 4/5
[1m1199/1199[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 86ms/step - accuracy: 0.2276 - loss: 0.2241 - val_accuracy: 0.2235 - val_loss: 0.2872
Epoch 5/5
[1m1199/1199[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 87ms/step - accuracy: 0.2296 - loss: 0.1973 - val_accuracy: 0.2236 - val_loss: 0.2915


In [81]:
def predict(self,model,sentence):
    sentence_tokens = self.X_tokenizer.texts_to_sequences([sentence])
    # print(len(sentence.split()))
    # print(len(sentence_tokens[0]))
    # tokens_to_words = [word for word, index in self.X_tokenizer.word_index.items() if index in sentence_tokens[0]]

    predictions = model.predict(pad_sequences(sentence_tokens,
                                            maxlen=self.max_len,
                                            padding="post"))
    # print(predictions)
    prediction_ner = np.argmax(predictions,axis=-1)
    # print(prediction_ner)

    NER_tags = [self.y_tokenizer.index_word[num] for num in list(prediction_ner.flatten())]
    final_pred = {"Word":[],"Tag":[]}
    sentence_split = sentence.split()
    for Word,Tag in zip(sentence_split,NER_tags):
        # final_pred[tokens_to_words[i]] = NER_tags[i]
        final_pred["Word"].append(Word)
        final_pred["Tag"].append(Tag)
    return pd.DataFrame(final_pred)
The_Neural_Net.predict = predict

In [83]:
sentence = """Is this the real life? Is this just fantasy? Caught in a landslide, no escape from reality"""
model = NN_obj.model
prediction_df = NN_obj.predict(model=NN_obj.model,sentence=sentence)
display(prediction_df)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step


Unnamed: 0,Word,Tag
0,Is,O
1,this,O
2,the,O
3,real,O
4,life?,O
5,Is,O
6,this,O
7,just,O
8,fantasy?,O
9,Caught,O


In [70]:
# Save the trained model to a file
# dt_now = dt.datetime.now().strftime("%y%m%d_%H%M")
model_save_path = f"ner_model_{num_of_epochs}.keras"  # You can specify any file path
# print(model_save_path)
NN_obj.model.save(model_save_path)
print(f"Model saved to {model_save_path}")

Model saved to ner_model_5.keras


In [None]:
loaded_model = load_model(model_save_path)

sentence = """Is this the real life? Is this just fantasy? Caught in a landslide, no escape from reality"""

prediction_df = NN_obj.predict(model=loaded_model,sentence=sentence)
display(prediction_df)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 259ms/step


Unnamed: 0,Word,Tag
0,Is,O
1,this,O
2,the,O
3,real,O
4,life?,O
5,Is,O
6,this,O
7,just,O
8,fantasy?,O
9,Caught,O


In [None]:
sys.exit()

# Rough work below this

In [None]:
from tensorflow.keras.models import load_model

loaded_model = load_model(model_save_path)

In [None]:
sys.exit()

In [None]:
from keras.layers import Dense, Dropout, Input
from keras.models import Model

input_layer = Input(shape=(10,))
dense_layer = Dense(32, activation='relu')(input_layer)
dropout_layer = Dropout(0.5)(dense_layer)
output_layer = Dense(1, activation='sigmoid')(dropout_layer)

model = Model(inputs=input_layer, outputs=output_layer)
model.summary()

3. Tokenize

In [None]:
def Tokenize(self):



The_Neural_Net.PreProcess = PreProcess

In [6]:
print(NER_df.shape)

(47959, 4)


In [7]:
print(NER_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 47959 entries, 0 to 47958
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Sentence #  47959 non-null  object
 1   Sentence    47959 non-null  object
 2   POS         47959 non-null  object
 3   Tag         47959 non-null  object
dtypes: object(4)
memory usage: 1.5+ MB
None


In [8]:
NER_df.dropna(inplace=True)
NER_df.drop(columns=["Sentence #","POS"],inplace=True)
NER_df["Tag"] = NER_df["Tag"].apply(lambda x: ast.literal_eval(x))

In [9]:
print(NER_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 47959 entries, 0 to 47958
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Sentence  47959 non-null  object
 1   Tag       47959 non-null  object
dtypes: object(2)
memory usage: 749.5+ KB
None


In [20]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(NER_df["Sentence"], NER_df["Tag"], shuffle=True,test_size=0.20, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, shuffle=True,test_size=0.50, random_state=42)

In [21]:
from tensorflow.keras.preprocessing.text import Tokenizer
X_tokenizer = Tokenizer(lower=False,oov_token="UNK")
X_tokenizer.fit_on_texts(X_train)

In [22]:
X_train.reset_index(drop=True)

0        The 58-year-old former analyst says he provide...
1        But he said he will not accept any Pakistani p...
2        The Swiss star was upset Wednesday by German T...
3        After taking office in 2004 , the SPENCER gove...
4        Israeli soldiers have killed two Palestinians ...
                               ...                        
38362    However , poverty , illiteracy , and unemploym...
38363    The report was published two days after human ...
38364    In a separate incident , kidnappers released a...
38365    Those groups were shut down by U.S. officials ...
38366    A statement on the Web site of the Health Depa...
Name: Sentence, Length: 38367, dtype: object

In [23]:
X_train = X_tokenizer.texts_to_sequences(X_train)
X_test = X_tokenizer.texts_to_sequences(X_test)
X_val = X_tokenizer.texts_to_sequences(X_val)

In [24]:
vocab_len = len(X_tokenizer.word_index)
print(f"Number of unique tokens:\t{vocab_len}")

Number of unique tokens:	28761


In [25]:
train_tags = set([val for sublist in y_train for val in sublist])
test_tags = set([val for sublist in y_test for val in sublist])
val_tags = set([val for sublist in y_val for val in sublist])

print("Unique NER tags in train set: ",train_tags)
print("Unique NER tags in test set: ",test_tags)
print("Unique NER tags in test set: ",val_tags)

Unique NER tags in train set:  {'B-geo', 'B-org', 'B-eve', 'I-eve', 'B-gpe', 'I-nat', 'I-gpe', 'I-art', 'I-per', 'B-per', 'I-tim', 'I-geo', 'B-art', 'I-org', 'B-tim', 'O', 'B-nat'}
Unique NER tags in test set:  {'B-geo', 'B-org', 'B-eve', 'I-eve', 'B-gpe', 'I-art', 'I-gpe', 'I-nat', 'I-per', 'B-per', 'I-tim', 'I-geo', 'B-art', 'I-org', 'B-tim', 'O', 'B-nat'}
Unique NER tags in test set:  {'B-geo', 'B-org', 'B-eve', 'I-eve', 'B-gpe', 'I-gpe', 'I-art', 'I-nat', 'I-per', 'B-per', 'I-org', 'I-geo', 'B-art', 'I-tim', 'B-tim', 'O', 'B-nat'}


In [26]:
y_tokenizer = Tokenizer(lower=False,oov_token="UNK")
y_tokenizer.fit_on_texts(y_train)

In [27]:
y_train = y_tokenizer.texts_to_sequences(y_train)
y_test = y_tokenizer.texts_to_sequences(y_test)
y_val = y_tokenizer.texts_to_sequences(y_val)

In [28]:
max_len = 0
for dataset in [X_train,X_test,X_val]:
    for i in range(len(dataset)):
        max_len = max(max_len,len(dataset[i]))

In [29]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

X_train = pad_sequences(X_train, maxlen=max_len, padding='post', value=0)
X_test = pad_sequences(X_test, maxlen=max_len, padding='post', value=0)
X_val = pad_sequences(X_val, maxlen=max_len, padding='post', value=0)

y_train = pad_sequences(y_train, maxlen=max_len, padding='post', value=0)
y_test = pad_sequences(y_test, maxlen=max_len, padding='post', value=0)
y_val = pad_sequences(y_val, maxlen=max_len, padding='post', value=0)

In [30]:
for dataset in [X_train,X_test,X_val,y_train,y_test,y_val]:
    print(dataset.shape)

(38367, 89)
(4796, 89)
(4796, 89)
(38367, 89)
(4796, 89)
(4796, 89)


In [31]:
Number_of_classes_K = len(y_tokenizer.word_index) + 1

In [37]:
from keras.layers import Dense, Dropout, Input
from keras.models import Model

input_layer = Input(shape=(10,))
dense_layer = Dense(32, activation='relu')(input_layer)
dropout_layer = Dropout(0.5)(dense_layer)
output_layer = Dense(1, activation='sigmoid')(dropout_layer)

model = Model(inputs=input_layer, outputs=output_layer)
model.summary()

In [None]:
from keras.layers import Dense, Input, Bidirectional, LSTM, Embedding, Dropout
from keras.models import Model
from keras.losses import SparseCategoricalCrossentropy
from keras.callbacks import EarlyStopping

In [39]:
vector_size = 64

input_layer = Input(shape=(max_len,))
embedding_layer = Embedding(input_dim=vocab_len + 1, output_dim=vector_size, mask_zero=True, trainable=True)(input_layer)
dropout_layer_1 = Dropout(0.2)(embedding_layer)
bidirectional_LSTM_Layer = Bidirectional(LSTM(vector_size * 2, return_sequences=True))(dropout_layer_1)
output_layer = Dense(Number_of_classes_K)(bidirectional_LSTM_Layer)

model = Model(input_layer, output_layer)
print(model.summary())

None


In [42]:
pprint(model.get_config())

{'input_layers': [['input_layer_4', 0, 0]],
 'layers': [{'class_name': 'InputLayer',
             'config': {'batch_shape': (None, 89),
                        'dtype': 'float32',
                        'name': 'input_layer_4',
                        'ragged': False,
                        'sparse': False},
             'inbound_nodes': [],
             'module': 'keras.layers',
             'name': 'input_layer_4',
             'registered_name': None},
            {'build_config': {'input_shape': (None, 89)},
             'class_name': 'Embedding',
             'config': {'activity_regularizer': None,
                        'dtype': {'class_name': 'DTypePolicy',
                                  'config': {'name': 'float32'},
                                  'module': 'keras',
                                  'registered_name': None},
                        'embeddings_constraint': None,
                        'embeddings_initializer': {'class_name': 'RandomUniform',
        

In [36]:
early_stopping = EarlyStopping(
    monitor='val_loss',  # Metric to monitor (e.g., validation loss)
    patience=3,          # Number of epochs with no improvement after which training will stop
    restore_best_weights=True  # Restore the weights of the best epoch
)
model.compile(optimizer="adam",loss=SparseCategoricalCrossentropy(from_logits=True),metrics=["accuracy"])
model.fit(
    X_train,
    y_train,
    epochs=6,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping]  # Include EarlyStopping in callbacks
)

Epoch 1/6


2025-04-08 04:30:30.712966: E tensorflow/core/util/util.cc:131] oneDNN supports DT_BOOL only on platforms with AVX-512. Falling back to the default Eigen-based implementation if present.


[1m1199/1199[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m124s[0m 101ms/step - accuracy: 0.2138 - loss: 0.7510 - val_accuracy: 0.2191 - val_loss: 0.3530
Epoch 2/6
[1m1199/1199[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m126s[0m 105ms/step - accuracy: 0.2211 - loss: 0.3118 - val_accuracy: 0.2228 - val_loss: 0.2955
Epoch 3/6
[1m1139/1199[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m5s[0m 85ms/step - accuracy: 0.2269 - loss: 0.2362

KeyboardInterrupt: 

In [48]:
sentence = """Is this the real life? Is this just fantasy? Caught in a landslide, no escape from reality"""
unpadded_len = len(sentence.split(" "))
predictions = model.predict(pad_sequences(X_tokenizer.texts_to_sequences([sentence]),
                                          maxlen=max_len,
                                         padding="post"))
print(predictions)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
[[[ -5.304609    -4.743709     1.8781298  ...  -1.0697396   -1.0944893
    -2.5503826 ]
  [ -6.229029    -7.766292     8.0266     ...  -4.0541368   -2.6854007
    -4.7166605 ]
  [ -6.1748357  -10.034082    11.106297   ...  -5.695456    -2.8237836
    -6.305474  ]
  ...
  [ -0.05605015  -0.13771345   0.19956562 ...  -0.17713553  -0.14530168
    -0.12879112]
  [ -0.05605015  -0.13771345   0.19956562 ...  -0.17713553  -0.14530168
    -0.12879112]
  [ -0.05605015  -0.13771345   0.19956562 ...  -0.17713553  -0.14530168
    -0.12879112]]]


In [49]:
prediction_ner = np.argmax(predictions,axis=-1)
print(prediction_ner)

[[2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
  7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
  7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7]]


In [50]:
NER_tags = [y_tokenizer.index_word[num] for num in list(prediction_ner.flatten())][:unpadded_len]
print(NER_tags)

['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']


In [47]:
sys.exit()

SystemExit: 

In [9]:
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    print("Name:", gpu.name, "  Type:", gpu.device_type)

2025-04-07 13:12:49.924812: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [10]:
from tensorflow.python.client import device_lib

device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 18137355800593050408
 xla_global_id: -1]

In [11]:
tf.test.is_gpu_available()

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


False

In [12]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [13]:
x_train, x_test = x_train / 255.0, x_test / 255.0

In [14]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10)
])

In [15]:
predictions = model(x_train[:1]).numpy()
predictions

array([[ 0.7193052 , -0.47969562,  0.32077536,  0.70289576, -0.04301793,
        -0.67874795, -0.23602428,  0.31352717, -0.5399128 ,  0.35995692]],
      dtype=float32)

In [16]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [17]:
model.compile(optimizer='adam',
              loss=loss_fn,
              metrics=['accuracy'])

In [18]:
tf.device('/device:GPU:0')

<tensorflow.python.eager.context._EagerDeviceContext at 0x7fdf7c3d5c80>

In [19]:
with tf.device('/device:GPU:0'):
    model.fit(x_train, y_train, epochs=500, batch_size=32)

Epoch 1/500
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.8551 - loss: 0.4949
Epoch 2/500
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9535 - loss: 0.1564
Epoch 3/500
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9668 - loss: 0.1071
Epoch 4/500
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9737 - loss: 0.0829
Epoch 5/500
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9762 - loss: 0.0744
Epoch 6/500
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9804 - loss: 0.0638
Epoch 7/500
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9813 - loss: 0.0562
Epoch 8/500
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9837 - loss: 0.0493
Epoch 9/500
[1m

KeyboardInterrupt: 

In [None]:
model.evaluate(x_test,  y_test, verbose=2)

In [None]:
predictions = model(x_test[:1]).numpy()

313/313 - 1s - 4ms/step - accuracy: 0.9763 - loss: 0.0764


[0.07639684528112411, 0.9763000011444092]

In [None]:
predictions