

> ### TPU Initialization



In [22]:
# Check if a TPU is available and initialize it
import tensorflow as tf
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Running on TPU ', tpu.master())
    # print('Running on TPU {}'.format(tpu.cluster_spec().as_dict()['worker']))
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.TPUStrategy(tpu)
    print('Connected to TPU')
else:
    strategy = tf.distribute.get_strategy()
    print('Running on CPU/GPU')

print("REPLICAS: {}".format(strategy.num_replicas_in_sync))

Running on TPU  grpc://10.48.206.90:8470




Connected to TPU
REPLICAS: 8


### HyperParameters

In [23]:
vocab_size = 10000 # should replace
d_model = 128
nhead = 8
num_encoder_layers = 6
num_classes = 56  # Change this according to your problem
num_epochs = 10

### Dummy data

In [24]:
# max_sequence_length = 50  # Adjust according to your actual data
# num_samples = 1000

In [25]:
# import numpy as np
# from tensorflow.keras.preprocessing.sequence import pad_sequences
# # Dummy data for demonstration purposes


# # Generate random team descriptions and labels
# team_descriptions = [" ".join(np.random.choice(vocab_size, size=np.random.randint(5, 20), replace=True).astype(str)) for _ in range(num_samples)]
# labels = np.random.randint(num_classes, size=num_samples)

# # Tokenize team descriptions and pad sequences
# tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=vocab_size)
# tokenizer.fit_on_texts(team_descriptions)
# sequences = tokenizer.texts_to_sequences(team_descriptions)
# padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)

# # Create a dummy train_dataset


In [26]:
import pickle
with open('/content/data.pkl', 'rb') as file:
    loaded_data = pickle.load(file)

padded_sequences = loaded_data['questions']
labels = loaded_data['answers']
vocab_size = loaded_data['VOCAB_SIZE']
num_classes = loaded_data['total_teams']


In [27]:
train_dataset = tf.data.Dataset.from_tensor_slices((padded_sequences, labels)).shuffle(num_samples).batch(batch_size=32)

In [None]:
labels[0]

In [None]:
padded_sequences[0]

### Transformer Model

In [30]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, MultiHeadAttention, GlobalAveragePooling1D, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

def create_transformer_model(vocab_size, d_model, nhead, num_encoder_layers, num_classes):
    inputs = Input(shape=(None,))
    x = Embedding(vocab_size, d_model)(inputs)

    for _ in range(num_encoder_layers):
        # Self-attention layer (multi-head)
        x = MultiHeadAttention(num_heads=nhead, key_dim=d_model)(x, x)
        # Add and Norm
        x = tf.keras.layers.Add()([x, Embedding(vocab_size, d_model)(inputs)])
        x = tf.keras.layers.LayerNormalization(epsilon=1e-6)(x)

    x = GlobalAveragePooling1D()(x)
    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=inputs, outputs=outputs)
    return model

# Example usage
# Define your dataset and DataLoader here

# Assuming you have a vocabulary size, embedding dimension, etc.


# Create model
with strategy.scope():
  model = create_transformer_model(vocab_size, d_model, nhead, num_encoder_layers, num_classes)

# Compile the model
  model.compile(optimizer=Adam(learning_rate=0.001),
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])
model.summary()
# Now you can use train_dataset in the training loop
# Training loop

# for epoch in range(num_epochs):
#     total_loss = 0
#     total_accuracy = 0
#     batches = 0

#     for inputs, labels in train_dataset:
#         loss, accuracy = model.train_on_batch(inputs, labels)
#         total_loss += loss
#         total_accuracy += accuracy
#         batches += 1

#     average_loss = total_loss / batches
#     average_accuracy = total_accuracy / batches

#     print(f"Epoch {epoch + 1}/{num_epochs} - Loss: {average_loss:.4f} - Accuracy: {average_accuracy:.4f}")



Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, None)]       0           []                               
                                                                                                  
 embedding_7 (Embedding)        (None, None, 128)    6015360     ['input_2[0][0]']                
                                                                                                  
 multi_head_attention_6 (MultiH  (None, None, 128)   527488      ['embedding_7[0][0]',            
 eadAttention)                                                    'embedding_7[0][0]']            
                                                                                                  
 embedding_8 (Embedding)        (None, None, 128)    6015360     ['input_2[0][0]']          

In [31]:
model.fit(train_dataset, epochs=num_epochs, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x79acac9d02e0>

In [32]:
# Assuming you have a new team description in the form of a sequence
new_team_description = "your new team description here"

# Tokenize and pad the new team description
new_sequence = tokenizer.texts_to_sequences([new_team_description])
padded_new_sequence = pad_sequences(new_sequence, maxlen=max_sequence_length)

# Make predictions
predictions = model.predict(padded_new_sequence)

# Print the predicted class probabilities
print("Predicted Probabilities:", predictions)

# Get the predicted class (index with the maximum probability)
predicted_class = np.argmax(predictions)
print("Predicted Class:", predicted_class)

Predicted Probabilities: [[2.8022507e-02 1.7055346e-01 1.2558098e-01 3.0308437e-02 8.2716621e-02
  1.2518027e-03 2.2175602e-04 2.4074170e-01 1.7088924e-01 1.2620963e-02
  1.3386293e-03 2.8831733e-03 2.3919649e-03 5.0834976e-03 7.6606398e-04
  3.0035194e-04 4.8170374e-03 3.9225840e-03 1.1030734e-03 1.3615083e-04
  1.2850510e-03 1.1460884e-03 3.1798388e-04 6.9174457e-05 9.8312208e-05
  3.4435291e-03 1.1493506e-02 2.6982187e-04 1.9310595e-02 1.8061996e-02
  7.5875776e-04 4.9647558e-03 5.4325154e-03 1.2273801e-02 8.0744579e-04
  1.5066161e-04 1.5797639e-04 1.0800840e-04 3.9705189e-04 1.2368596e-03
  1.0250230e-04 9.4714045e-04 7.3127019e-05 1.1730097e-02 4.2959619e-03
  7.7021662e-03 9.2421327e-04 6.8289874e-04 6.4318633e-04 4.1043584e-04
  7.6580465e-05 3.0787953e-03 1.5409822e-03 7.6849297e-05 2.0187182e-04
  7.9308702e-05]]
Predicted Class: 7
