<a href="https://colab.research.google.com/github/jasminejolly123/OpenGL_Game/blob/main/Another_copy_of_PYTHONASSIGNMENT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import zipfile
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import random
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.linear_model import LogisticRegression
from tensorflow.keras.preprocessing.image import img_to_array
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import regularizers
from tensorflow.keras.layers import Dropout

zip_file = 'dataset2.zip'
output_dir = 'dataset2/'

try:
    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        zip_ref.extractall(output_dir)
    print(f"Main dataset extraction complete. Files extracted to {output_dir}")
except zipfile.BadZipFile:
    print("The file is not a valid zip file!")
except Exception as e:
    print(f"Error during main extraction: {e}")

train_zip_file = os.path.join(output_dir, 'triple_mnist', 'train.zip')
train_dir = os.path.join(output_dir, 'triple_mnist', 'train')

if os.path.exists(train_zip_file):
    try:
        with zipfile.ZipFile(train_zip_file, 'r') as zip_ref:
            zip_ref.extractall(train_dir)
        print(f"Train dataset extraction complete. Files extracted to {train_dir}")
    except zipfile.BadZipFile:
        print(f"The train zip file {train_zip_file} is not a valid zip file!")
    except Exception as e:
        print(f"Error during train extraction: {e}")
else:
    print(f"train.zip file not found at {train_zip_file}. Please ensure the correct path.")

def load_images_from_directory(directory):
    images = []
    labels = []
    for root, dirs, files in os.walk(directory):
        for img_file in files:
            img_path = os.path.join(root, img_file)
            if os.path.isfile(img_path) and img_file.lower().endswith(('.png', '.jpg', '.jpeg')):
                try:
                    img = Image.open(img_path).convert('L')
                    img = img.resize((84, 84))
                    img_array = img_to_array(img)
                    images.append(img_array)

                    label = img_file.split('_')[0]
                    labels.append(label)
                except Exception as e:
                    print(f"Error loading image {img_file}: {e}")
    return np.array(images), np.array(labels)

if os.path.exists(train_dir):
    train_images, train_labels = load_images_from_directory(train_dir)
    train_images = train_images / 255.0

    mlb = MultiLabelBinarizer()
    train_labels = mlb.fit_transform(train_labels)

    y_train_int = np.argmax(train_labels, axis=1)
    y_train = train_labels

    X_train, X_val, y_train, y_val = train_test_split(train_images, y_train_int, test_size=0.2, random_state=42)

    def split_image(image):
        piece1 = image[:, :28]
        piece2 = image[:, 28:56]
        piece3 = image[:, 56:]
        return piece1, piece2, piece3

    X_train_split = []
    X_val_split = []

    for img in X_train:
        piece1, piece2, piece3 = split_image(img)
        X_train_split.append([piece1, piece2, piece3])

    for img in X_val:
        piece1, piece2, piece3 = split_image(img)
        X_val_split.append([piece1, piece2, piece3])

    X_train_split = np.array(X_train_split)
    X_val_split = np.array(X_val_split)

    def build_cnn(input_shape):
        model = models.Sequential([
            layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
            layers.MaxPooling2D(pool_size=(2, 2)),
            layers.Conv2D(64, (3, 3), activation='relu'),
            layers.MaxPooling2D(pool_size=(2, 2)),
            layers.Flatten(),
            layers.Dense(128, activation='relu'),
            layers.Dense(10, activation='softmax')
        ])
        return model

    cnn_models = []
    for i in range(3):
        cnn = build_cnn((28, 84, 1))
        cnn.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
        cnn_models.append(cnn)

    for i, cnn in enumerate(cnn_models):
        print(f"Training CNN model for piece {i+1}")
        cnn.fit(X_train_split[:, i], y_train, epochs=5, validation_data=(X_val_split[:, i], y_val))

    def get_final_prediction(model, image):
        pieces = [image[:, :28], image[:, 28:56], image[:, 56:]]
        predictions = []
        for i, piece in enumerate(pieces):
            piece = np.expand_dims(piece, axis=-1)
            prediction = model[i].predict(np.array([piece]))
            predictions.append(np.argmax(prediction))
        return predictions

    final_predictions = []
    for image in X_val:
        final_predictions.append(get_final_prediction(cnn_models, image))

    final_predictions = np.array(final_predictions)
    accuracy = np.mean(np.all(final_predictions == y_val[:, None], axis=1))
    print(f"Final Model Accuracy: {accuracy * 100:.2f}%")

    X_train_flattened = X_train.reshape(-1, 84 * 84)
    X_val_flattened = X_val.reshape(-1, 84 * 84)

    logistic_model = LogisticRegression(max_iter=1000)
    logistic_model.fit(X_train_flattened, y_train)
    logistic_accuracy = logistic_model.score(X_val_flattened, y_val)
    print(f"Logistic Regression Validation Accuracy: {logistic_accuracy * 100:.2f}%")

    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    fold_no = 1
    for train_index, val_index in kf.split(X_train_flattened):
        print(f"Training fold {fold_no}...")
        X_train_fold, X_val_fold = X_train_flattened[train_index], X_train_flattened[val_index]
        y_train_fold, y_val_fold = y_train[train_index], y_train[val_index]

        logistic_model.fit(X_train_fold, y_train_fold)
        logistic_fold_accuracy = logistic_model.score(X_val_fold, y_val_fold)
        print(f"Fold {fold_no} - Accuracy: {logistic_fold_accuracy * 100:.2f}%")
        fold_no += 1

def build_improved_cnn(input_shape):
    model = models.Sequential([
        layers.Conv2D(64, (3, 3), activation='relu', input_shape=input_shape),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Dropout(0.25),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Dropout(0.25),
        layers.Flatten(),
        layers.Dense(256, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(10, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_model.h5', save_best_only=True, monitor='val_loss')

cnn_model = build_improved_cnn((84, 84, 1))

history = cnn_model.fit(
    X_train, y_train,
    epochs=30,
    validation_data=(X_val, y_val),
    batch_size=64,
    callbacks=[early_stopping, model_checkpoint]
)

plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training vs Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()


def build_generator():
    model = models.Sequential([
        layers.Dense(256, input_dim=100),
        layers.LeakyReLU(alpha=0.2),
        layers.Dense(512),
        layers.LeakyReLU(alpha=0.2),
        layers.Dense(1024),
        layers.LeakyReLU(alpha=0.2),
        layers.Dense(84 * 84, activation='tanh'),
        layers.Reshape((84, 84, 1))
    ])
    return model

def build_discriminator():
    model = models.Sequential([
        layers.Flatten(input_shape=(84, 84, 1)),
        layers.Dense(1024, kernel_regularizer=regularizers.l2(0.01)),
        layers.LeakyReLU(alpha=0.2),
        layers.Dense(512),
        layers.LeakyReLU(alpha=0.2),
        layers.Dense(256),
        layers.LeakyReLU(alpha=0.2),
        layers.Dense(1, activation='sigmoid')
    ])
    return model

def build_gan(generator, discriminator):
    discriminator.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    discriminator.trainable = False

    gan_input = layers.Input(shape=(100,))
    x = generator(gan_input)
    gan_output = discriminator(x)

    gan = models.Model(gan_input, gan_output)
    gan.compile(loss='binary_crossentropy', optimizer='adam')
    return gan

generator = build_generator()
discriminator = build_discriminator()
gan = build_gan(generator, discriminator)

def train_gan(generator, discriminator, gan, epochs=1000, batch_size=64, latent_dim=100):
    half_batch = batch_size // 2

    for epoch in range(epochs):
        idx = np.random.randint(0, X_train.shape[0], half_batch)
        real_images = X_train[idx]
        real_images = real_images.reshape(-1, 84, 84, 1)

        noise = np.random.normal(0, 1, (half_batch, latent_dim))
        generated_images = generator.predict(noise)

        real_labels = np.ones((half_batch, 1))
        fake_labels = np.zeros((half_batch, 1))

        discriminator_loss_real = discriminator.train_on_batch(real_images, real_labels)
        discriminator_loss_fake = discriminator.train_on_batch(generated_images, fake_labels)
        discriminator_loss = 0.5 * np.add(discriminator_loss_real, discriminator_loss_fake)

        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        valid_y = np.ones((batch_size, 1))
        generator_loss = gan.train_on_batch(noise, valid_y)

        print(f"{epoch+1}/{epochs}, Discriminator Loss: {discriminator_loss[0]}, Generator Loss: {generator_loss}")

train_gan(generator, discriminator, gan, epochs=1000, batch_size=64)

def generate_images(generator, num_images=5, latent_dim=100):
    noise = np.random.normal(0, 1, (num_images, latent_dim))
    generated_images = generator.predict(noise)
    return generated_images

generated_images = generate_images(generator, num_images=5)
for i, img in enumerate(generated_images):
    plt.imshow(img.reshape(84, 84), cmap='gray')
    plt.title(f"Generated Image {i+1}")
    plt.show()

synthetic_images = generate_images(generator, num_images=500)
synthetic_images = (synthetic_images + 1) / 2
X_train_augmented = np.concatenate((X_train, synthetic_images), axis=0)
y_train_augmented = np.concatenate((y_train, y_train[:500]), axis=0)
cnn_model_augmented = build_improved_cnn((84, 84, 1))
cnn_model_augmented.fit(
    X_train_augmented, y_train_augmented,
    epochs=30,
    validation_data=(X_val, y_val),
    batch_size=64
)

test_loss, test_acc = cnn_model_augmented.evaluate(X_val, y_val)
print(f"Test accuracy after data augmentation: {test_acc * 100:.2f}%")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step

KeyboardInterrupt: 