<a href="https://colab.research.google.com/github/bilmark0/Agile-Manufacturing-TDK-/blob/main/CV/Vision_Transformer_model_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import os
import shutil
import random
from google.colab import drive
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
import tensorflow.keras.mixed_precision as mixed_precision
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import matplotlib.pyplot as plt  # For plotting
import seaborn as sns  # For confusion matrix visualization
from sklearn.metrics import confusion_matrix
from google.colab import files  # For file handling in Google Colab

In [2]:
# Upload the kaggle.json file
uploaded = files.upload()

# Move the uploaded file to the .kaggle directory
kaggle_api_path = os.path.expanduser("~/.kaggle")
if not os.path.exists(kaggle_api_path):
    os.makedirs(kaggle_api_path)

# Ensure the file is set with proper permissions
kaggle_json_path = next(iter(uploaded))  # Get the uploaded filename
os.rename(kaggle_json_path, f"{kaggle_api_path}/kaggle.json")
os.chmod(f"{kaggle_api_path}/kaggle.json", 0o600)

# Download the dataset from Kaggle
!kaggle datasets download -d markbilszky/agile-manufacturing-tdk --unzip

print("Dataset downloaded successfully.")

Saving kaggle.json to kaggle.json
Dataset URL: https://www.kaggle.com/datasets/markbilszky/agile-manufacturing-tdk
License(s): unknown
Downloading agile-manufacturing-tdk.zip to /content
100% 1.89G/1.89G [00:25<00:00, 90.3MB/s]
100% 1.89G/1.89G [00:25<00:00, 81.1MB/s]
Dataset downloaded successfully.


In [3]:
# Set your paths here
base_path = './'  # Replace with the directory containing Reference, error_2, and error_3
new_folder_path = os.path.join(base_path, 'training_data')

# Create the new folder
os.makedirs(new_folder_path, exist_ok=True)

# Move the folders
folders_to_move = ['Reference', 'error_2', 'error_3']
for folder_name in folders_to_move:
    shutil.move(os.path.join(base_path, folder_name), new_folder_path)

print("Folders moved successfully!")

Folders moved successfully!


In [4]:
# Path to the training data directory
data_dir = './training_data'

# List of main folders to check for nested duplicates
main_folders = os.listdir(data_dir)

for folder in main_folders:
    folder_path = os.path.join(data_dir, folder)

    # Check if this path is indeed a folder
    if os.path.isdir(folder_path):
        # Path to the nested duplicate folder (if exists)
        nested_folder_path = os.path.join(folder_path, folder)

        # Check if a nested folder with the same name exists
        if os.path.isdir(nested_folder_path):
            # Move all files from nested folder to the main folder
            for filename in os.listdir(nested_folder_path):
                file_path = os.path.join(nested_folder_path, filename)
                target_path = os.path.join(folder_path, filename)

                # Move file to the main folder
                if os.path.isfile(file_path):
                    shutil.move(file_path, target_path)
                    #print(f"Moved {file_path} to {target_path}")

            # Remove the nested duplicate folder after moving its contents
            os.rmdir(nested_folder_path)

print("Duplicate folders cleaned up successfully.")

Duplicate folders cleaned up successfully.


In [5]:
# Load the TensorBoard notebook extension
%load_ext tensorboard
# Clear any logs from previous runs
!rm -rf ./logs/

In [16]:
# Define paths and parameters
data_dir = './training_data'  # Update this with your actual data directory
image_size = (224, 224)  # Resize images to 224x224 for ViT
batch_size = 16  # You can adjust the batch size as needed
num_classes = len(os.listdir(data_dir))  # Number of classes (folders in the data directory)

# Load dataset
train_ds = image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset="training",
    seed=123,
    image_size=image_size,
    batch_size=batch_size
)

val_ds = image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=image_size,
    batch_size=batch_size
)

# Configure dataset for performance
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

# Define Vision Transformer (ViT) architecture
def mlp(x, hidden_units, dropout_rate):
    """Applies MLP layer"""
    for units in hidden_units:
        x = layers.Dense(units, activation='gelu')(x)
        x = layers.Dropout(dropout_rate)(x)
    return x

class Patches(layers.Layer):
    def __init__(self, patch_size):
        super(Patches, self).__init__()
        self.patch_size = patch_size

    def call(self, images):
        batch_size = tf.shape(images)[0]
        patches = tf.image.extract_patches(
            images=images,
            sizes=[1, self.patch_size, self.patch_size, 1],
            strides=[1, self.patch_size, self.patch_size, 1],
            rates=[1, 1, 1, 1],
            padding='VALID'
        )
        patch_dims = patches.shape[-1]
        patches = tf.reshape(patches, [batch_size, -1, patch_dims])
        return patches

class PatchEncoder(layers.Layer):
    def __init__(self, num_patches, projection_dim):
        super(PatchEncoder, self).__init__()
        self.num_patches = num_patches
        self.projection = layers.Dense(projection_dim)
        self.position_embedding = layers.Embedding(input_dim=num_patches, output_dim=projection_dim)

    def call(self, patches):
        batch_size = tf.shape(patches)[0]
        # Adding positional encoding
        encoded = self.projection(patches) + self.position_embedding(tf.range(start=0, limit=self.num_patches, delta=1))
        return encoded

def create_vit_classifier(input_shape=(224, 224, 3), num_classes=3):
    """Builds the Vision Transformer (ViT) model."""
    # Patch creation
    patch_size = 16  # Size of each patch
    num_patches = (input_shape[0] // patch_size) ** 2

    # Input layer
    inputs = layers.Input(shape=input_shape)

    # Create patches
    patches = Patches(patch_size)(inputs)

    # Encode patches
    encoded_patches = PatchEncoder(num_patches, projection_dim)(patches)

    # Transformer blocks
    for _ in range(transformer_layers):
        # Layer normalization 1
        x1 = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)

        # Multi-head self-attention layer
        attention_output = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=projection_dim, dropout=0.1
        )(x1)

        # Skip connection 1
        x2 = layers.Add()([attention_output, x1])  # Use x1 instead of encoded_patches

        # Layer normalization 2
        x3 = layers.LayerNormalization(epsilon=1e-6)(x2)

        # MLP layer
        x3 = mlp(x3, hidden_units=mlp_head_units, dropout_rate=0.1)

        # Skip connection 2
        encoded_patches = layers.Add()([x3, x2])

    # Final layers
    representation = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
    representation = layers.Flatten()(representation)
    representation = layers.Dropout(0.5)(representation)
    features = mlp(representation, hidden_units=mlp_head_units, dropout_rate=0.5)
    logits = layers.Dense(num_classes)(features)

    # Define the Keras model
    model = keras.Model(inputs=inputs, outputs=logits)
    return model

# Model parameters
input_shape = (224, 224, 3)
transformer_layers = 8
num_heads = 8
projection_dim = 64  # Set to the same as the output dimension of the Dense layer
mlp_head_units = [2048, 1024]  # Example MLP units

# Instantiate the model
vit_model = create_vit_classifier(input_shape, num_classes)
vit_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1e-3),
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=["accuracy"]
)

# Print model summary
vit_model.summary()

# Define callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True)
model_checkpoint = ModelCheckpoint(
    'best_model.keras',
    monitor='val_loss',
    save_best_only=True,
    mode='min',
    verbose=1
)

# Train the model with callbacks
history = vit_model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=100,  # You can adjust the number of epochs as needed
    callbacks=[early_stopping, model_checkpoint]
)

print("Training complete. The best model is saved as 'best_model.keras'.")


Found 18903 files belonging to 3 classes.
Using 15123 files for training.
Found 18903 files belonging to 3 classes.
Using 3780 files for validation.


TypeError: missing a required argument: 'value'

In [None]:
model.summary()

In [None]:
print('Rétegek:\n')
for layer in model.layers:
    print("Réteg neve: ", layer.name, ', tanítható: ', layer.trainable)
    print(layer.get_config(),'\n')

In [None]:
!pip3 install keras-visualizer
from keras_visualizer import visualizer
import matplotlib.image as mpimg
visualizer(model, file_format='png', view=True)
img = mpimg.imread('graph.png')
fig = plt.figure(figsize=[20,9])
plt.imshow(img)
plt.axis('off')

In [None]:
# Logging images to TensorBoard
# Logging images to TensorBoard
file_writer = tf.summary.create_file_writer(logdir)

with tf.name_scope("Test") as scope:
    with file_writer.as_default():
        # Reshape the first three images from your test dataset
        img = np.reshape(test_images[0:3], (-1, img_height, img_width, 3))  # Adjust to (num_samples, height, width, channels)
        tf.summary.image("Test Images", img, step=0, max_outputs=3, description="Sample images from the test set")

# Remember to flush the writer to ensure the images are saved
file_writer.flush()

In [None]:
# Get predictions from the model
predictions = model.predict(test_generator, steps=test_steps)
predicted_classes = np.argmax(predictions, axis=1)  # Get the predicted class labels

# Get the true class labels from the test set
true_classes = []
for _, labels in test_generator:
    true_classes.extend(np.argmax(labels, axis=1))  # Assuming labels are one-hot encoded
true_classes = np.array(true_classes)

# Calculate the confusion matrix
cm = confusion_matrix(true_classes, predicted_classes)

# Plot the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Class 0', 'Class 1', 'Class 2'], yticklabels=['Class 0', 'Class 1', 'Class 2'])
plt.title('Confusion Matrix')
plt.xlabel('Predicted Classes')
plt.ylabel('True Classes')
plt.show()

In [None]:
# Load TensorBoard
%load_ext tensorboard

# Start TensorBoard
logdir = "logs/fit/"  # Ensure this matches your logging path
%tensorboard --logdir {logdir}