##### Copyright 2019 The TensorFlow Authors.

In [1]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# TensorFlow 2 quickstart for experts

<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://www.tensorflow.org/tutorials/quickstart/advanced"><img src="https://www.tensorflow.org/images/tf_logo_32px.png" />View on TensorFlow.org</a>
  </td>
  <td>
    <a target="_blank" href="https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/quickstart/advanced.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
  </td>
  <td>
    <a target="_blank" href="https://github.com/tensorflow/docs/blob/master/site/en/tutorials/quickstart/advanced.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View source on GitHub</a>
  </td>
  <td>
    <a href="https://storage.googleapis.com/tensorflow_docs/docs/site/en/tutorials/quickstart/advanced.ipynb"><img src="https://www.tensorflow.org/images/download_logo_32px.png" />Download notebook</a>
  </td>
</table>

This is a [Google Colaboratory](https://colab.research.google.com/notebooks/welcome.ipynb) notebook file. Python programs are run directly in the browser—a great way to learn and use TensorFlow. To follow this tutorial, run the notebook in Google Colab by clicking the button at the top of this page.

1. In Colab, connect to a Python runtime: At the top-right of the menu bar, select *CONNECT*.
2. Run all the notebook code cells: Select *Runtime* > *Run all*.

Download and install TensorFlow 2. Import TensorFlow into your program:

Note: Upgrade `pip` to install the TensorFlow 2 package. See the [install guide](https://www.tensorflow.org/install) for details.

Import TensorFlow into your program:

In [2]:
import tensorflow as tf
print("TensorFlow version:", tf.__version__)

from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras import Model

TensorFlow version: 2.18.0


Load and prepare the [MNIST dataset](http://yann.lecun.com/exdb/mnist/).

In [3]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

# Add a channels dimension
x_train = x_train[..., tf.newaxis].astype("float32")
x_test = x_test[..., tf.newaxis].astype("float32")

Use `tf.data` to batch and shuffle the dataset:

In [4]:
train_ds = tf.data.Dataset.from_tensor_slices(
    (x_train, y_train)).shuffle(10000).batch(32)

test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)

Build the `tf.keras` model using the Keras [model subclassing API](https://www.tensorflow.org/guide/keras/custom_layers_and_models):

In [5]:
class MyModel(Model):
  def __init__(self):
    super().__init__()
    self.conv1 = Conv2D(32, 3, activation='relu')
    self.flatten = Flatten()
    self.d1 = Dense(128, activation='relu')
    self.d2 = Dense(10)

  def call(self, x):
    x = self.conv1(x)
    x = self.flatten(x)
    x = self.d1(x)
    return self.d2(x)

# Create an instance of the model
model = MyModel()

Choose an optimizer and loss function for training:

In [6]:
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

optimizer = tf.keras.optimizers.Adam()

Select metrics to measure the loss and the accuracy of the model. These metrics accumulate the values over epochs and then print the overall result.

In [7]:
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

Use `tf.GradientTape` to train the model:

In [8]:
@tf.function
def train_step(images, labels):
  with tf.GradientTape() as tape:
    # training=True is only needed if there are layers with different
    # behavior during training versus inference (e.g. Dropout).
    predictions = model(images, training=True)
    loss = loss_object(labels, predictions)
  gradients = tape.gradient(loss, model.trainable_variables)
  optimizer.apply_gradients(zip(gradients, model.trainable_variables))

  train_loss(loss)
  train_accuracy(labels, predictions)

Test the model:

In [9]:
@tf.function
def test_step(images, labels):
  # training=False is only needed if there are layers with different
  # behavior during training versus inference (e.g. Dropout).
  predictions = model(images, training=False)
  t_loss = loss_object(labels, predictions)

  test_loss(t_loss)
  test_accuracy(labels, predictions)

In [None]:
EPOCHS = 5

for epoch in range(EPOCHS):
  # Reset the metrics at the start of the next epoch
  train_loss.reset_state()
  train_accuracy.reset_state()
  test_loss.reset_state()
  test_accuracy.reset_state()

  for images, labels in train_ds:
    train_step(images, labels)

  for test_images, test_labels in test_ds:
    test_step(test_images, test_labels)

  print(
    f'Epoch {epoch + 1}, '
    f'Loss: {train_loss.result():0.2f}, '
    f'Accuracy: {train_accuracy.result() * 100:0.2f}, '
    f'Test Loss: {test_loss.result():0.2f}, '
    f'Test Accuracy: {test_accuracy.result() * 100:0.2f}'
  )

Epoch 1, Loss: 0.14, Accuracy: 95.88, Test Loss: 0.07, Test Accuracy: 97.45
Epoch 2, Loss: 0.04, Accuracy: 98.69, Test Loss: 0.05, Test Accuracy: 98.23
Epoch 3, Loss: 0.02, Accuracy: 99.26, Test Loss: 0.06, Test Accuracy: 98.23


In [None]:
# 1. High level summary
model.summary()

In [None]:
# 2. Quick glance at the first few weight tensors
for w in model.weights[:3]:
    print(w.name, w.shape)

In [None]:
# 3. Zoom in on one layer
dense1 = model.layers[0]        # index or use .get_layer("dense") if you named it
kernel, bias = dense1.get_weights()
print("Kernel stats", kernel.mean(), kernel.std())
print("Bias stats", bias.mean(), bias.std())

In [None]:
# 3. Zoom in on one layer
dense1 = model.layers[0]        # index or use .get_layer("dense") if you named it
kernel, bias = dense1.get_weights()
print("Kernel stats", kernel.mean(), kernel.std())
print("Bias stats", bias.mean(), bias.std())

In [None]:
# 4. Visual sanity check (optional)
import matplotlib.pyplot as plt
plt.hist(kernel.flatten(), bins=50)
plt.title("Weight distribution for dense1")
plt.show()

The image classifier is now trained to ~98% accuracy on this dataset. To learn more, read the [TensorFlow tutorials](https://www.tensorflow.org/tutorials).

In [None]:
# Get the input shape from a batch of images
for images, labels in train_ds.take(1):
  input_shape = images.shape
  break

# Create a dummy input tensor with the correct shape
dummy_input = tf.zeros(input_shape)

# Pass the dummy input through the first layer to get its output
layer = model.layers[0]
output_tensor = layer(dummy_input)

# Now you can access the shape of the output tensor
print(layer.name)
print(output_tensor.shape)

In [None]:
!pip install transformers

In [None]:
from transformers import pipeline

# Load the sentiment analysis pipeline
sentiment_analyzer = pipeline("sentiment-analysis")

# Analyze the sentiment of a sample text
text = "I love using Google Colab, it's so convenient!"
result = sentiment_analyzer(text)

# Print the result
print(result)

In [None]:
from transformers import pipeline

# Load the Named Entity Recognition pipeline
ner_recognizer = pipeline("ner", grouped_entities=True)

# Analyze a sample text for named entities
text = "Google was founded by Larry Page and Sergey Brin in 1998."
results = ner_recognizer(text)

# Print the results
print(results)

In [None]:
# Use a pipeline as a high-level helper
from transformers import pipeline

# Use a newer, smaller model that is likely to work
pipe = pipeline("text-generation", model="distilgpt2")

In [None]:
# Define a prompt
prompt = "Write a short story about a robot learning to feel."

# Generate text using the loaded pipeline
# You can adjust max_length to control the length of the generated text
generated_text = pipe(prompt, max_length=200, num_return_sequences=1)

# Print the generated text
print(generated_text[0]['generated_text'])

In [None]:
!pip install pymupdf

In [None]:
from collections import Counter

# Get the entity groups from the extracted entities
entity_groups_2 = [entity['entity_group'] for entity in entities_2]

# Count the occurrences of each entity group
entity_group_counts_2 = Counter(entity_groups_2)

# Print the counts
print("Entity group counts from second PDF:")
for group, count in entity_group_counts_2.most_common():
    print(f"{group}: {count}")

In [None]:
# @title
# Apply NER to the extracted text from the PDF
# We'll process the text in chunks if it's very long
chunk_size = 500 # Process in chunks of 500 characters
entities = []
for i in range(0, len(text), chunk_size):
    chunk = text[i:i+chunk_size]
    chunk_entities = ner_recognizer(chunk)
    entities.extend(chunk_entities)

# Print the first 20 identified entities as an example
print("First 20 identified entities:")
for entity in entities[:20]:
    print(entity)

# You can explore the full 'entities' list to see all identified entities

In [None]:
!pip install networkx

In [None]:
import networkx as nx
import matplotlib.pyplot as plt

# Create an empty graph
G = nx.Graph()

# Add nodes with attributes (like a 'type' attribute)
G.add_node("Python", type="ProgrammingLanguage")
G.add_node("TensorFlow", type="Library/Framework")

# Add an edge (relationship) between nodes with a 'relation' attribute
G.add_edge("TensorFlow", "Python", relation="USES")

# You can add more nodes and edges
G.add_node("Keras", type="Library/Framework")
G.add_edge("Keras", "TensorFlow", relation="PART_OF")

# Print information about the graph
print("Nodes:", G.nodes(data=True)) # data=True shows node attributes
print("Edges:", G.edges(data=True)) # data=True shows edge attributes

# Optional: Visualize the graph (requires matplotlib)
# nx.draw(G, with_labels=True, node_color='lightblue', node_size=2000, font_size=10, font_weight='bold')
# plt.show()

In [None]:
import networkx as nx

# Create a new empty graph
entity_graph = nx.Graph()

# Iterate through the entities from the first PDF
# Make sure the 'entities' variable is defined by running cell faa4d34a first!
if 'entities' in locals():
    for entity in entities:
        # Use the entity word as the node ID
        node_id = entity['word']
        # Use the entity group as a node attribute
        node_attributes = {'type': entity['entity_group']}

        # Add the node to the graph
        entity_graph.add_node(node_id, **node_attributes)

    print(f"Added {entity_graph.number_of_nodes()} nodes to the graph from the first PDF entities.")
    # Print the first few nodes to check
    print("First 10 nodes:", list(entity_graph.nodes(data=True))[:10])
else:
    print("Error: 'entities' variable not found. Please run cell faa4d34a first to extract entities from the first PDF.")

In [None]:
# Iterate through the entities from the second PDF
# Make sure the 'entities_2' variable is defined by running cell 76eac752 first!
if 'entities_2' in locals():
    for entity in entities_2:
        # Use the entity word as the node ID
        node_id = entity['word']
        # Use the entity group as a node attribute
        node_attributes = {'type': entity['entity_group']}

        # Add the node to the graph. NetworkX handles adding existing nodes gracefully.
        entity_graph.add_node(node_id, **node_attributes)

    print(f"Added entities from the second PDF to the graph.")
    print(f"Total number of nodes in the graph: {entity_graph.number_of_nodes()}")
    # Print the first few nodes again to check
    print("First 10 nodes:", list(entity_graph.nodes(data=True))[:10])
else:
    print("Error: 'entities_2' variable not found. Please run cell 76eac752 first to extract entities from the second PDF.")