<a href="https://colab.research.google.com/github/anishana/Text-Recognition-on-a-MNIST-dataset/blob/main/Pattern_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
! [ ! -z "$COLAB_GPU" ] && pip install torch scikit-learn==0.20.* skorch
!pip install scikit-learn --upgrade
# Run this when opening the notebook for the first time

Collecting scikit-learn==0.20.*
  Downloading scikit_learn-0.20.4-cp37-cp37m-manylinux1_x86_64.whl (5.4 MB)
[K     |████████████████████████████████| 5.4 MB 727 kB/s 
[?25hCollecting skorch
  Downloading skorch-0.11.0-py3-none-any.whl (155 kB)
[K     |████████████████████████████████| 155 kB 50.0 MB/s 
Installing collected packages: scikit-learn, skorch
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.0.1
    Uninstalling scikit-learn-1.0.1:
      Successfully uninstalled scikit-learn-1.0.1
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
imbalanced-learn 0.8.1 requires scikit-learn>=0.24, but you have scikit-learn 0.20.4 which is incompatible.[0m
Successfully installed scikit-learn-0.20.4 skorch-0.11.0
Collecting scikit-learn
  Downloading scikit_learn-1.0.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (

In [None]:
# Run this to mount your drive

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Noise generation code
# This code generates the six types of dataset and stores the results in a dictionary for easy access

import os
import numpy as np
from keras.datasets import mnist
from keras.utils import np_utils
from numpy.testing import assert_array_almost_equal
from imblearn.over_sampling import SMOTE

# Set random seed
np.random.seed(123)


def other_class(n_classes, current_class):
    """
    Returns a list of class indices excluding the class indexed by class_ind
    :param nb_classes: number of classes in the task
    :param class_ind: the class index to be omitted
    :return: one random class that != class_ind
    """
    if current_class < 0 or current_class >= n_classes:
        error_str = "class_ind must be within the range (0, nb_classes - 1)"
        raise ValueError(error_str)

    other_class_list = list(range(n_classes))
    other_class_list.remove(current_class)
    other_class = np.random.choice(other_class_list)
    return other_class

def get_data(asym=False,balance = False,plain = False,determ = False):
    """
    Get training images with specified ratio of syn/ayn label noise
    """
    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    if(balance):
        smote = SMOTE(random_state=42)
        X_train = X_train.reshape(60000,784)
        X_train, y_train = smote.fit_resample(X_train,y_train)

    X_train = X_train.reshape(-1, 28, 28, 1)
    X_test = X_test.reshape(-1, 28, 28, 1)

    X_train = X_train / 255.0
    X_test = X_test / 255.0


    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')

    y_train_clean = np.copy(y_train)

    if(plain):
      if(determ == False):
        # Only apply one hot if we're going to use this on a DL type algorithm
        y_train = np_utils.to_categorical(y_train, 10)
        y_test = np_utils.to_categorical(y_test, 10)
      print("X_train:", X_train.shape)
      print("y_train:", y_train.shape)
      print("X_test:", X_test.shape)
      print("y_test", y_test.shape)
      return X_train, y_train, X_test, y_test

    # generate random noisy labels
    if asym:
        # 1 < - 7, 2 -> 7, 3 -> 8, 5 <-> 6
        source_class = [7, 2, 3, 5, 6]
        target_class = [1, 7, 8, 6, 5]
        
        for s, t in zip(source_class, target_class):
            cls_idx = np.where(y_train_clean == s)[0]
            # print('cls_idx',cls_idx)
            n_noisy = int(40 * cls_idx.shape[0] / 100)
            # print('n_noisy',n_noisy)
            noisy_sample_index = np.random.choice(cls_idx, n_noisy, replace=False)
            y_train[noisy_sample_index] = t
            # print(y_train[noisy_sample_index])
            # print(y_train_clean[noisy_sample_index])
    
    else:
        n_samples = y_train.shape[0]
        n_noisy = int(40 * n_samples / 100)
        class_index = [np.where(y_train_clean == i)[0] for i in range(10)]
        class_noisy = int(n_noisy / 10)

        noisy_idx = []
        for d in range(10):
            noisy_class_index = np.random.choice(class_index[d], class_noisy, replace=False)
            noisy_idx.extend(noisy_class_index)
            

        for i in noisy_idx:
            y_train[i] = other_class(n_classes=10, current_class=y_train[i])
            # print(y_train[noisy_class_index])
            # print(y_train_clean[noisy_class_index])


        # print statistics
        print("Print noisy label generation statistics:")
        for i in range(10):
            n_noisy = np.sum(y_train == i)
            print("Noisy class %s, has %s samples." % (i, n_noisy))


    # one-hot-encode the labels
    if(determ == False):
      y_train = np_utils.to_categorical(y_train, 10)
      y_test = np_utils.to_categorical(y_test, 10)

    print("X_train:", X_train.shape)
    print("y_train:", y_train.shape)
    print("X_test:", X_test.shape)
    print("y_test", y_test.shape)
    return X_train, y_train, X_test, y_test


if __name__ == "__main__":
    list_of_filenames = ['imbalanced','balanced','imbalanced_asym', 'balanced_asym','imbalanced_sym','balanced_sym']
    list_of_noisedata = [get_data(plain = True),get_data(balance = True, plain = True),get_data(asym=True),get_data(asym=True,balance  =True),get_data(),get_data(balance  =True)]
    list_of_noisedata_deterministic = [get_data(plain = True,determ = True),get_data(balance = True, plain = True,determ = True),get_data(asym=True,determ = True),get_data(asym=True,balance  =True,determ = True),get_data(determ = True),get_data(balance  =True,determ = True)]
    mapped_function_dict = dict(zip(list_of_filenames,list_of_noisedata))
    mapped_function_dict_deterministic = dict(zip(list_of_filenames,list_of_noisedata_deterministic))


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
X_train: (60000, 28, 28, 1)
y_train: (60000, 10)
X_test: (10000, 28, 28, 1)
y_test (10000, 10)
X_train: (67420, 28, 28, 1)
y_train: (67420, 10)
X_test: (10000, 28, 28, 1)
y_test (10000, 10)
X_train: (60000, 28, 28, 1)
y_train: (60000, 10)
X_test: (10000, 28, 28, 1)
y_test (10000, 10)
X_train: (67420, 28, 28, 1)
y_train: (67420, 10)
X_test: (10000, 28, 28, 1)
y_test (10000, 10)
Print noisy label generation statistics:
Noisy class 0, has 5977 samples.
Noisy class 1, has 6698 samples.
Noisy class 2, has 5966 samples.
Noisy class 3, has 6122 samples.
Noisy class 4, has 5823 samples.
Noisy class 5, has 5367 samples.
Noisy class 6, has 5959 samples.
Noisy class 7, has 6265 samples.
Noisy class 8, has 5834 samples.
Noisy class 9, has 5989 samples.
X_train: (60000, 28, 28, 1)
y_train: (60000, 10)
X_test: (10000, 28, 28, 1)
y_test (10000, 10)
Print noisy label generation statistics:
Noisy class 0, has 66

In [None]:
# SVM

import math, time 
import matplotlib.pyplot as plt
import numpy as np 
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix,accuracy_score
from sklearn.decomposition import PCA
from google.colab import drive,files
drive.mount('/content/drive')

for noise_type in list_of_filenames:
  X_train, y_train, X_test, y_test = mapped_function_dict_deterministic[noise_type]
  X_train = np.reshape(X_train,(len(X_train),784))
  pca = PCA(n_components = 40, svd_solver='randomized',whiten=True).fit(X_train)
  X_train = pca.transform(X_train)
  model_linear = SVC(kernel='linear')
  model_linear.fit(X_train, y_train)
  X_test = np.reshape(X_test,(10000,784))
  X_test = pca.transform(X_test)
  y_pred = model_linear.predict(X_test)
  output_dataframe=pd.DataFrame(y_pred, columns=['labels'])
  csv_data = output_dataframe.to_csv()
  with open('/content/drive/My Drive/CSE555/'+noise_type+'_svm.csv', 'w') as f:
    print("Wrote",noise_type)
    f.write(csv_data)
  # accuracy
  print("accuracy:", accuracy_score(y_true=y_test, y_pred=y_pred), "\n")

  # cm
  print(confusion_matrix(y_true=y_test, y_pred=y_pred))

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Wrote imbalanced
accuracy: 0.9337 

[[ 967    0    1    0    0    7    3    1    1    0]
 [   0 1119    4    2    0    2    0    1    7    0]
 [   8    4  950   14    9    4   13   11   19    0]
 [   1    2   18  934    3   25    0   11   11    5]
 [   2    0    6    1  937    2    6    3    2   23]
 [   8    6    8   51    5  783    8    1   16    6]
 [   8    3    9    2    7   14  914    0    1    0]
 [   1   11   26    8    8    0    0  957    3   14]
 [   6    5    9   27    7   29   11    5  872    3]
 [   5    8    4   12   34    6    1   25   10  904]]
Wrote balanced
accuracy: 0.9345 

[[ 966    0    1    0    0    7    4    1    1    0]
 [   0 1119    3    3    0    3    0    1    6    0]
 [   7    3  948   14    8    5   15   12   19    1]
 [   0    1   18  932    2   27    1    9   14    6]
 [   1    0    8    1  938    2    6    3    2   21]
 [   

In [None]:
# Logistic

import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix,accuracy_score
for noise_type in list_of_filenames:
  X_train, y_train, X_test, y_test = mapped_function_dict_deterministic[noise_type]
  X_train = np.reshape(X_train,(len(X_train),784))
  logisticRegr = LogisticRegression()

  logisticRegr.fit(X_train, y_train)

  X_test = np.reshape(X_test,(10000,784))

  y_pred = logisticRegr.predict(X_test)
  output_dataframe=pd.DataFrame(y_pred, columns=['labels'])
  csv_data = output_dataframe.to_csv()
  with open('/content/drive/My Drive/CSE555/'+noise_type+'_logistic.csv', 'w') as f:
    print("Wrote",noise_type)
    f.write(csv_data)
  # accuracy
  print("accuracy:", accuracy_score(y_true=y_test, y_pred=y_pred), "\n")

  # cm
  print(confusion_matrix(y_true=y_test, y_pred=y_pred))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


Wrote imbalanced
accuracy: 0.9256 

[[ 959    0    0    3    1    7    5    4    1    0]
 [   0 1112    4    2    0    2    3    2   10    0]
 [   6    9  928   16    8    4   15    7   35    4]
 [   4    1   17  921    0   23    4   11   23    6]
 [   1    1    7    4  914    0   10    4   10   31]
 [  10    2    3   37    8  779   14    5   29    5]
 [   9    3    7    3    8   15  910    2    1    0]
 [   1    9   23    6    7    1    0  950    2   29]
 [   9   10    8   26    8   26   12    7  857   11]
 [   9    8    0   11   23    6    0   19    7  926]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


Wrote balanced
accuracy: 0.926 

[[ 961    0    1    3    0    6    4    4    1    0]
 [   0 1110    4    1    0    1    5    2   12    0]
 [   5   10  923   18    9    3   15    7   36    6]
 [   3    1   16  923    2   23    3   11   21    7]
 [   1    2    5    3  916    0   10    4   10   31]
 [   8    2    2   35    8  788   14    3   28    4]
 [  10    3    7    1    8   16  910    2    1    0]
 [   1    8   20    7    8    2    0  946    3   33]
 [   8    8    6   22    9   27   10    7  863   14]
 [  11    7    1    9   27    6    0   21    7  920]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


Wrote imbalanced_asym
accuracy: 0.8038 

[[ 957    0    1    0    1    8    8    4    1    0]
 [   0 1100    1    2    1    4    3    3   21    0]
 [  11   12  659    8    9   11   14  208   93    7]
 [   3   18    9  655    1   22   12   27  259    4]
 [   1    7    1    1  904    2   13    8    4   41]
 [  11   14    1   17   15  591  134   18   84    7]
 [  13    7    6    0   18  119  772   11   12    0]
 [   4  298    5    3   12    1    2  625   13   65]
 [   7   22    1    3   15   28   19   10  862    7]
 [   8   20    0    1   25    5    2   11   24  913]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


Wrote balanced_asym
accuracy: 0.8143 

[[ 960    0    1    0    1   11    4    2    1    0]
 [   0 1095    2    3    1    3    5    2   24    0]
 [  10   13  671    3   12   17   13  189   99    5]
 [   2   14    6  684    1   23    8   25  245    2]
 [   1    5    0    0  906    8    8    6    8   40]
 [  12   12    0   14   14  651   84   20   77    8]
 [  16    6    4    0   19  116  772   13   11    1]
 [   5  290    5    0   15    2    1  631    9   70]
 [  11   17    1    2   15   27   17   12  863    9]
 [   9   18    0    3   31    3    2   10   23  910]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


Wrote imbalanced_sym
accuracy: 0.8765 

[[ 930    1    2    3    5   10   18    4    6    1]
 [   0 1105    2    2    1    2    4    2   17    0]
 [  15   44  831   25   15    3   22   23   44   10]
 [   3    9   22  876    4   31    7   24   23   11]
 [   0   12    9    2  890    2   10    1    9   47]
 [  10   13    4   46   16  718   19   22   25   19]
 [  11   12   11    3   20   25  870    0    6    0]
 [   3   27   12   10   23    4    2  895    2   50]
 [  12   25   11   26   21   32   16   15  796   20]
 [  11   11    2   13   55   11    3   41    8  854]]
Wrote balanced_sym
accuracy: 0.8872 

[[ 933    1    2    2    4   11   12    3   11    1]
 [   0 1099    2    5    1    2    4    1   21    0]
 [  11   28  855   21   20    5   18   17   48    9]
 [   5    5   19  885    4   36    7   11   25   13]
 [   0    8    6    2  900    2   14    2    7   41]
 [  13   10    1   24   14  762   18    9   29   12]
 [  10    8    6    0   14   25  893    0    2    0]
 [   5   26   17    

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [None]:
# SL

from __future__ import absolute_import
from __future__ import print_function

import os
import numpy as np
import pandas as pd
import keras.backend as K
import argparse
from sklearn.decomposition import PCA
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator
from keras.models import load_model
from tensorflow.keras.optimizers import SGD
from keras.layers import Input, Conv2D, Dense, MaxPooling2D, Dropout, Flatten, Activation, BatchNormalization
from keras.models import Model
import tensorflow as tf
from tensorflow import keras
from keras.callbacks import CSVLogger

def symmetric_cross_entropy(alpha, beta):
    def loss(y_true, y_pred):
        y_true_1 = y_true
        y_pred_1 = y_pred

        y_true_2 = y_true
        y_pred_2 = y_pred

        y_pred_1 = tf.clip_by_value(y_pred_1, 1e-7, 1.0)
        y_true_2 = tf.clip_by_value(y_true_2, 1e-4, 1.0)

        return alpha*tf.reduce_mean(-tf.reduce_sum(y_true_1 * tf.math.log(y_pred_1), axis = -1)) + beta*tf.reduce_mean(-tf.reduce_sum(y_pred_2 * tf.math.log(y_true_2), axis = -1))
    return loss

for noise_type in list_of_filenames: # Replace with whatever slice was run last
  X_train, y_train, X_test, y_test = mapped_function_dict[noise_type]
  image_shape = X_train.shape[1:]
  csv_logger = CSVLogger('/content/drive/MyDrive/CSE555/log_SE_'+noise_type+'.csv', append=True, separator=';')
  img_input = Input(shape=image_shape)

  x = Conv2D(32, (3, 3), padding='same', kernel_initializer="he_normal", name='conv1')(img_input)
  x = BatchNormalization()(x)
  x = Activation('relu')(x)
  x = MaxPooling2D((2, 2), strides=(2, 2), name='pool1')(x)

  x = Conv2D(64, (3, 3), padding='same', kernel_initializer="he_normal", name='conv2')(x)
  x = BatchNormalization()(x)
  x = Activation('relu')(x)
  x = MaxPooling2D((2, 2), strides=(2, 2), name='pool2')(x)

  x = Flatten()(x)

  x = Dense(128, kernel_initializer="he_normal", name='fc1')(x)
  x = BatchNormalization()(x)
  x = Activation('relu', name='lid')(x)
  # x = Dropout(0.2)(x)

  x = Dense(10, kernel_initializer="he_normal")(x)
  x = Activation(tf.nn.softmax)(x)

  model = Model(img_input, x)
  optimizer = SGD(lr=0.1, decay=1e-4, momentum=0.9)
  loss = symmetric_cross_entropy(1.0,1.0)
  model.compile(
  loss=loss,
  optimizer=optimizer,
  metrics=['accuracy']
  )
  datagen = ImageDataGenerator()
  datagen.fit(X_train)

  model.fit_generator(datagen.flow(X_train, y_train, batch_size=128),
                  steps_per_epoch=len(X_train) / 128, epochs=10,
                  validation_data=(X_test, y_test),
                  verbose=1,callbacks=[csv_logger]
                  )
  output_test = model.predict_on_batch(X_test)
  labels = np.argmax(output_test, axis=1)
  output_dataframe = pd.DataFrame(labels)
  csv_data = output_dataframe.to_csv()
  with open('/content/drive/My Drive/CSE555/'+noise_type+'_SE-CNN_predictions.csv', 'w') as f:
    f.write(csv_data)

  super(SGD, self).__init__(name, **kwargs)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
# LDAM
# NOTE: Make sure the file structuring is as described in the path below and Drive is linked!
# Also, the outputs are saved only if there is a certain file structure in Drive.
# Refer to the project report's LDAM section for instructions

!python  /content/drive/My\ Drive/CSE555/LDAM-DRW-master/cifar_train.py --imb_type exp --imb_factor 0.01 --loss_type LDAM --train_rule DRW

In [None]:
# Proposed DL
# TODO: Find and implement a loss function



from keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from keras.models import Sequential,Model
from keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Dropout, Input
from keras.preprocessing.image import ImageDataGenerator

import pandas as pd
for noise_type in list_of_filenames:
  x_train, y_train, x_test, y_test = mapped_function_dict[noise_type]
  model = Sequential()
  model.add(Conv2D(filters=32, kernel_size=(5,5), activation='relu', input_shape=x_train.shape[1:]))
  model.add(Conv2D(filters=32, kernel_size=(5,5), activation='relu'))
  model.add(MaxPool2D(pool_size=(2, 2)))
  model.add(Dropout(rate=0.25))
  model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
  model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
  model.add(MaxPool2D(pool_size=(2, 2)))
  model.add(Dropout(rate=0.25))
  model.add(Flatten())
  model.add(Dense(256, activation='relu'))
  model.add(Dropout(rate=0.5))
  model.add(Dense(10, activation='softmax'))

  model = Sequential([
      Conv2D(filters=32, kernel_size=(5,5), activation='relu', input_shape=x_train.shape[1:]),
      Conv2D(filters=32, kernel_size=(5,5), activation='relu'),
      MaxPool2D(pool_size=(2, 2)),
      Dropout(rate=0.25),
      Conv2D(filters=64, kernel_size=(3,3), activation='relu'),
      Conv2D(filters=64, kernel_size=(3,3), activation='relu'),
      MaxPool2D(pool_size=(2, 2)),
      Dropout(rate=0.25),
      Flatten(),
      Dense(256, activation='relu'),
      Dropout(rate=0.5),
      Dense(10, activation='softmax')
  ])

  inputs = Input(shape=x_train.shape[1:])

  x = Conv2D(filters=32, kernel_size=(5,5), activation='relu')(inputs)
  x = Conv2D(filters=32, kernel_size=(5,5), activation='relu')(x)
  x = MaxPool2D(pool_size=(2, 2))(x)
  x = Dropout(rate=0.25)(x)

  x = Conv2D(filters=64, kernel_size=(3,3), activation='relu')(x)
  x = Conv2D(filters=64, kernel_size=(3,3), activation='relu')(x)
  x = MaxPool2D(pool_size=(2, 2))(x)
  x = Dropout(rate=0.25)(x)

  x = Flatten()(x)
  x = Dense(256, activation='relu')(x)
  x = Dropout(rate=0.5)(x)
  predictions = Dense(10, activation='softmax')(x)

  model = Model(inputs=inputs, outputs=predictions)
  print("Running for",noise_type,"dataset:")
  model.compile(
      loss='categorical_crossentropy', 
      optimizer='adam', 
      metrics=['accuracy']
  )

  # creating datagenerator for augmenting images
  datagen = ImageDataGenerator(
          rotation_range=10,
          zoom_range=0.1,
          width_shift_range=0.1,
          height_shift_range=0.1)

  epochs = 3
  batch_size = 32

  history = model.fit(datagen.flow(x_train, y_train, batch_size=batch_size), epochs=epochs,
                                validation_data=(x_test, y_test), steps_per_epoch=x_train.shape[0]//batch_size)
  output_test = model.predict_on_batch(x_test)
  labels = np.argmax(output_test, axis=1)
  output_dataframe = pd.DataFrame(labels)
  csv_data = output_dataframe.to_csv()
  with open('/content/drive/My Drive/CSE555/'+noise_type+'_proposed-CNN_predictions.csv', 'w') as f:
    f.write(csv_data)

Running for imbalanced dataset:
Epoch 1/3


TypeError: ignored

In [None]:
from sklearn.neural_network import MLPClassifier

In [None]:
# Proposed ML

import math, time 
import matplotlib.pyplot as plt
import numpy as np 
import pandas as pd
from sklearn.metrics import confusion_matrix,accuracy_score
from sklearn.decomposition import PCA
from google.colab import drive,files
drive.mount('/content/drive')

for noise_type in list_of_filenames:
  X_train, y_train, X_test, y_test = mapped_function_dict_deterministic[noise_type]
  X_train = np.reshape(X_train,(len(X_train),784))
  pca = PCA(n_components = 40, svd_solver='randomized',whiten=True).fit(X_train)
  X_train = pca.transform(X_train)
  clf = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(10, 5), random_state=1, max_iter = 1000)
  clf.fit(X_train,y_train)
  X_test = np.reshape(X_test,(10000,784))
  X_test = pca.transform(X_test)
  y_pred = clf.predict(X_test)
  output_dataframe=pd.DataFrame(y_pred, columns=['labels'])
  csv_data = output_dataframe.to_csv()
  with open('/content/drive/My Drive/CSE555/'+noise_type+'_mlp.csv', 'w') as f:
    print("Wrote",noise_type)
    f.write(csv_data)
  # accuracy
  print("accuracy:", accuracy_score(y_true=y_test, y_pred=y_pred), "\n")

  # cm
  print(confusion_matrix(y_true=y_test, y_pred=y_pred))

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


accuracy: 0.9259 

[[ 951    0    9    0    2    5    9    2    2    0]
 [   0 1116    4    2    0    0    4    1    8    0]
 [  17    7  935   12   10    3   16   16   14    2]
 [   1    7   16  914    2   37    1   11   15    6]
 [   1    4    4    0  910    0   14    4    6   39]
 [  13    1    8   44    0  784   16    2   14   10]
 [   6    3   10    0   13   12  905    2    7    0]
 [   3    7   23    7    9    1    0  950    4   24]
 [  10   10    7   15    8   20   18    5  864   17]
 [   9    9    1    4   21    6    1    9   19  930]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


accuracy: 0.9305 

[[ 950    0    7    0    1    4   13    1    4    0]
 [   0 1111    1    6    1    1    1    1   12    1]
 [  15    4  953   10    5    5   11   14   13    2]
 [   0    3   19  928    1   25    0   12   15    7]
 [   1    7    1    0  923    1   13    1    3   32]
 [  10    0    6   31    2  804   13    1   15   10]
 [  12    2    2    0    8   14  913    0    6    1]
 [   5    8   17    6    3    0    0  946    3   40]
 [   4    8   11   14    9   28   12    6  873    9]
 [   4    7    0    7   44   12    1   21    9  904]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


accuracy: 0.8556 

[[ 960    0    0    1    1    6    5    2    3    2]
 [   0 1111    4    1    0    3    2    1   11    2]
 [  13    5  786    5   14    4    9  160   35    1]
 [   7    2    6  535    0   31    1   13  404   11]
 [   1    2    1    0  926    0   11    5    3   33]
 [  10    2    3   17    4  718   84    1   44    9]
 [  22    3    6    1   13   62  842    0    8    1]
 [   6   73    6    0    8    0    1  888   14   32]
 [   3    7    6   17   11   13   12   11  878   16]
 [   9   11    0    3   36    5    3    4   26  912]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


accuracy: 0.8374 

[[ 957    0    0    0    0   11    4    1    3    4]
 [   0 1113    3    1    2    7    0    0    8    1]
 [   9    3  676    7   11    6   11  270   38    1]
 [   0    0   11  566    0   22    0   23  380    8]
 [   1    5    2    0  919    0    8    5    3   39]
 [  20    2    5   17    3  685  106    4   45    5]
 [  10    3    5    0    7  147  781    3    2    0]
 [   5  101    2    0    7    1    1  866   14   31]
 [   4    2    3   10   19   21    9    6  892    8]
 [  10    6    0    3   46    0    0    4   21  919]]
accuracy: 0.8973 

[[ 954    2    4    2    0    6    8    2    2    0]
 [   0 1113    6    5    0    2    2    1    6    0]
 [  13    5  887   20   10    7   32   15   42    1]
 [   4    7   20  885    4   42    1   10   35    2]
 [   6    6    1    0  870    0   28    0    2   69]
 [  12    3    7   54    4  755   19    9   24    5]
 [  17    6   16    0   12   29  875    0    3    0]
 [   1   27   20    2    2    5    5  944    2   20]
 [   4 

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
