In [1]:
import os

OUTPUT_DIR = "shakespeare_works"
combined_text_file = "combined_shakespeare.txt"

combined_text = ""
for filename in os.listdir(OUTPUT_DIR):
    with open(os.path.join(OUTPUT_DIR, filename), "r", encoding="utf-8") as file:
        combined_text += file.read().strip() + "\n\n"

with open(combined_text_file, "w", encoding="utf-8") as file:
    file.write(combined_text)
print(f"Combined texts saved to {combined_text_file}")

#DataSetShakeSpeare.txt

Combined texts saved to combined_shakespeare.txt


In [2]:
import numpy as np

# Load the combined text
with open(combined_text_file, "r", encoding="utf-8") as file:
    text = file.read()

# Create character-to-index and index-to-character mappings
chars = sorted(set(text))
char_to_idx = {char: idx for idx, char in enumerate(chars)}
idx_to_char = {idx: char for idx, char in enumerate(chars)}  # Fixed here

# Convert text to numerical representation
text_as_int = np.array([char_to_idx[char] for char in text])

# Sequence length and dataset preparation
seq_length = 100  # Number of characters to use as input
examples_per_epoch = len(text) - seq_length

inputs = []
targets = []

for i in range(examples_per_epoch):
    inputs.append(text_as_int[i:i + seq_length])
    targets.append(text_as_int[i + seq_length])

inputs = np.array(inputs)
targets = np.array(targets)


In [3]:
import torch
from torch.utils.data import Dataset, DataLoader

class ShakespeareDataset(Dataset):
    def __init__(self, inputs, targets):
        self.inputs = torch.tensor(inputs, dtype=torch.long)
        self.targets = torch.tensor(targets, dtype=torch.long)

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        return self.inputs[idx], self.targets[idx]

# Create PyTorch Dataset and DataLoader
dataset = ShakespeareDataset(inputs, targets)
batch_size = 64
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)




In [4]:
# Split inputs and targets into training batches
batch_size = 64
buffer_size = 10000  # Shuffle buffer size
dataset = tf.data.Dataset.from_tensor_slices((inputs, targets))
dataset = dataset.shuffle(buffer_size).batch(batch_size, drop_remainder=True)

# Train the model
epochs = 20
history = model.fit(dataset, epochs=epochs)


Epoch 1/20
[1m 503/2350[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m16:09[0m 525ms/step - loss: 3.0407

KeyboardInterrupt: 

In [None]:
def generate_text(model, start_string, num_generate=500):
    input_eval = [char_to_idx[char] for char in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

    text_generated = []

    for _ in range(num_generate):
        predictions = model(input_eval)
        predictions = tf.squeeze(predictions, 0)

        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1, 0].numpy()
        text_generated.append(idx_to_char[predicted_id])

        # Update input for the next prediction
        input_eval = tf.concat([input_eval[:, 1:], [[predicted_id]]], axis=1)

    return start_string + ''.join(text_generated)

# Generate text
seed_text = "To be, or not to be, that is the question:"
generated_text = generate_text(model, seed_text, num_generate=1000)
print(generated_text)


In [None]:
def generate_text_with_temperature(model, start_string, num_generate=500, temperature=1.0):
    input_eval = [char_to_idx[char] for char in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

    text_generated = []

    for _ in range(num_generate):
        predictions = model(input_eval)
        predictions = tf.squeeze(predictions, 0) / temperature

        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1, 0].numpy()
        text_generated.append(idx_to_char[predicted_id])

        input_eval = tf.concat([input_eval[:, 1:], [[predicted_id]]], axis=1)

    return start_string + ''.join(text_generated)


In [None]:
import matplotlib.pyplot as plt

plt.plot(history.history['loss'])
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Training Loss")
plt.show()


In [None]:
model.save("shakespeare_generator.h5")


In [12]:
import tensorflow as tf
import platform

# TensorFlow Devices
devices = tf.config.list_physical_devices()
print("TensorFlow Devices:")
for device in devices:
    print(f"Device Type: {device.device_type}, Name: {device.name}")

# CPU Info
print("\nCPU Details:")
print("Processor:", platform.processor())
print("Architecture:", platform.architecture())
print("Machine:", platform.machine())

TensorFlow Devices:
Device Type: CPU, Name: /physical_device:CPU:0

CPU Details:
Processor: AMD64 Family 25 Model 97 Stepping 2, AuthenticAMD
Architecture: ('64bit', 'WindowsPE')
Machine: AMD64


In [11]:
import tensorflow as tf

# List available GPUs
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print("Available GPUs:")
    for gpu in gpus:
        print(f"  - {gpu}")
else:
    print("No GPU found. Running on CPU.")

import tensorflow as tf
print(tf.__version__)

No GPU found. Running on CPU.
2.18.0


In [10]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())



[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 8680732042859661693
xla_global_id: -1
]


In [9]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())




[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 147273582619563358
xla_global_id: -1
]


In [8]:
import torch
print("Is CUDA available:", torch.cuda.is_available())
print("CUDA Device Count:", torch.cuda.device_count())
print("Current CUDA Device:", torch.cuda.get_device_name(0))


Is CUDA available: True
CUDA Device Count: 1
Current CUDA Device: NVIDIA GeForce RTX 4070 Ti SUPER
