<a href="https://colab.research.google.com/github/amotz1/backet_classification/blob/master/MNISTLoader.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import urllib.request
import gzip 
import shutil

In [None]:
datapath = '/content/MnistData/'

In [None]:
if not os.path.exists(datapath):
  os.makedirs(datapath)

In [None]:
 urls = ['https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz',
         'https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz',
         'https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz',
         'https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz']



In [None]:
for url in urls:
    filename = url.split('/')[-1]   # GET FILENAME
    
    if os.path.exists(datapath+filename):
        print(filename, ' already exists')  # CHECK IF FILE EXISTS
    else:
        print('Downloading ',filename)
        urllib.request.urlretrieve (url, datapath+filename) # DOWNLOAD FILE
     

print(datapath+filename)

Downloading  train-images-idx3-ubyte.gz
Downloading  train-labels-idx1-ubyte.gz
Downloading  t10k-images-idx3-ubyte.gz
Downloading  t10k-labels-idx1-ubyte.gz
/content/MnistData/t10k-labels-idx1-ubyte.gz


In [None]:
files = os.listdir(datapath)

In [None]:
print(files)

['train-images-idx3-ubyte.gz', 't10k-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz', 'train-labels-idx1-ubyte.gz']


In [None]:
for file in files:
  if file.endswith('gz'):
    print('extracting file ',file) 
    with gzip.open(datapath+file,'rb') as f_in:
      with open(datapath+file.split('.')[0],'wb') as f_out:
        shutil.copyfileobj(f_in,f_out)

print('extraction completed')
for file in files:
  print('removing archives')
  os.remove(datapath+file)

print('completed to remove archives')



extracting file  train-images-idx3-ubyte.gz
extracting file  t10k-images-idx3-ubyte.gz
extracting file  t10k-labels-idx1-ubyte.gz
extracting file  train-labels-idx1-ubyte.gz
extraction completed
removing archives
removing archives
removing archives
removing archives
completed to remove archives


In [None]:
import os
import codecs
import numpy as np
import torch

In [None]:
def get_int(b):
  return int(codecs.encode(b,'hex'), 16)

In [None]:
mnist_dict = {}

In [None]:
files = os.listdir(datapath)

In [None]:
files

['train-labels-idx1-ubyte',
 't10k-labels-idx1-ubyte',
 't10k-images-idx3-ubyte',
 'train-images-idx3-ubyte']

In [None]:
for file in files:
  if file.endswith('ubyte'):
    print('reading ',file)

    with open(datapath+file, 'rb') as f:
      data = f.read()

      type = get_int(data[:4])
      length = get_int(data[4:8])

      if type == 2051:
        category = 'images'
        num_rows = get_int(data[8:12])
        num_cols = get_int(data[12:16])

        parsed = np.frombuffer(data,dtype='uint8',offset = 16)
        parsed = parsed.reshape(length, num_rows, num_cols)

      elif type == 2049:
        category = 'labels'
        parsed = np.frombuffer(data,dtype='uint8',offset = 8)
        parsed = parsed.reshape(length)
      
      else:
        assert False,'unspecified type'
      
      if length == 60000:
        data_set = 'train'

      elif length == 10000:
        data_set = 'test'

      else:
        assert False, 'unspecified length'

      mnist_dict[data_set + '_' + category] = parsed 



      


reading  train-labels-idx1-ubyte
reading  t10k-labels-idx1-ubyte
reading  t10k-images-idx3-ubyte
reading  train-images-idx3-ubyte


In [None]:
test_images_copy = mnist_dict['test_images'].copy()


In [None]:
train_images_copy = mnist_dict['train_images'].copy()

In [None]:
def create_diffuser(M, N, diffuser_mean = 0.5, diffuser_std = 0.5, dim = 1): # Create Diffuser
    diffuser_transmission = np.random.normal(diffuser_mean, diffuser_std, [M, N])
    for i in range(M):
        for j in range(N):
            diffuser_transmission[i, j] = max(diffuser_transmission[i, j], 0)
            diffuser_transmission[i, j] = min(diffuser_transmission[i, j], 1)
    # diffuser_transmission = torch.from_numpy(diffuser_transmission)
    return diffuser_transmission

In [None]:
diffuser = create_diffuser(28, 28)

In [None]:
for i in range(len(test_images_copy)):
  test_images_copy[i] = test_images_copy[i]@diffuser 

In [None]:
for i in range (len(train_images_copy)):
  train_images_copy[i] = train_images_copy[i]@diffuser 

In [None]:
print((train_images_copy == mnist_dict['train_images']).all())
print((test_images_copy == mnist_dict['test_images']).all())

False
False


In [None]:
mnist_dict['train_images'] = train_images_copy
mnist_dict['test_images'] = test_images_copy

In [None]:
mnist_dict['train_labels'][2]

4

In [None]:
from skimage.io import imsave

In [None]:
datasets = ['train','test']

In [None]:
categimport numpy as npkjljl

In [None]:
for dataset in datasets:   # FOR TRAIN AND TEST SET
    images = mnist_dict[dataset+'_images']   # IMAGES
    labels = mnist_dict[dataset+'_labels']   # LABELS
    no_of_samples = images.shape[0]     # NUBMER OF SAMPLES
    for indx in range (no_of_samples):  # FOR EVERY SAMPLE
        print(dataset, indx)
        image = images[indx]            # GET IMAGE
        label = labels[indx]            # GET LABEL
        if not os.path.exists(datapath+dataset+'/'+str(label)+'/'):    # IF DIRECTORIES DO NOT EXIST THEN 
            os.makedirs (datapath+dataset+'/'+str(label)+'/')       # CREATE TRAIN/TEST DIRECTORY AND CLASS SPECIFIC SUBDIRECTORY
        filenumber = len(os.listdir(datapath+dataset+'/'+str(label)+'/'))  # NUMBER OF FILES IN THE DIRECTORY FOR NAMING THE FILE
        imsave(datapath+dataset+'/'+str(label)+'/%05d.png'%(filenumber),image)  # SAVE T


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
test 5000
test 5001
test 5002
test 5003
test 5004
test 5005
test 5006
test 5007
test 5008
test 5009
test 5010
test 5011
test 5012
test 5013
test 5014
test 5015
test 5016
test 5017
test 5018
test 5019
test 5020
test 5021
test 5022
test 5023
test 5024
test 5025
test 5026
test 5027
test 5028
test 5029
test 5030
test 5031
test 5032
test 5033
test 5034
test 5035
test 5036
test 5037
test 5038
test 5039
test 5040
test 5041
test 5042
test 5043
test 5044
test 5045
test 5046
test 5047
test 5048
test 5049
test 5050
test 5051
test 5052
test 5053
test 5054
test 5055
test 5056
test 5057
test 5058
test 5059
test 5060
test 5061
test 5062
test 5063
test 5064
test 5065
test 5066
test 5067
test 5068
test 5069
test 5070
test 5071
test 5072
test 5073
test 5074
test 5075
test 5076
test 5077
test 5078
test 5079
test 5080
test 5081
test 5082
test 5083
test 5084
test 5085
test 5086
test 5087
test 5088
test 5089
test 5090
test 5091
test 5092
test 

In [None]:
!git clone https://github.com/amotz1/backet_classification.git

Cloning into 'backet_classification'...
remote: Enumerating objects: 179, done.[K
remote: Counting objects: 100% (179/179), done.[K
remote: Compressing objects: 100% (122/122), done.[K
remote: Total 179 (delta 120), reused 114 (delta 55), pack-reused 0[K
Receiving objects: 100% (179/179), 22.12 KiB | 11.06 MiB/s, done.
Resolving deltas: 100% (120/120), done.


In [None]:
pwd

'/content'

In [None]:
cd backet_classification/

/content/backet_classification


In [None]:
pip install wandb --upgrade

Collecting wandb
[?25l  Downloading https://files.pythonhosted.org/packages/d4/f6/91c07f54c2162854f5028aaa13f576ca17a3bc0cf6da02c2ad5baddae128/wandb-0.10.33-py2.py3-none-any.whl (1.8MB)
[K     |▏                               | 10kB 24.3MB/s eta 0:00:01[K     |▍                               | 20kB 32.5MB/s eta 0:00:01[K     |▌                               | 30kB 25.0MB/s eta 0:00:01[K     |▊                               | 40kB 19.2MB/s eta 0:00:01[K     |█                               | 51kB 9.7MB/s eta 0:00:01[K     |█                               | 61kB 11.3MB/s eta 0:00:01[K     |█▎                              | 71kB 10.4MB/s eta 0:00:01[K     |█▍                              | 81kB 11.5MB/s eta 0:00:01[K     |█▋                              | 92kB 11.8MB/s eta 0:00:01[K     |█▉                              | 102kB 9.4MB/s eta 0:00:01[K     |██                              | 112kB 9.4MB/s eta 0:00:01[K     |██▏                             | 122kB 9.4MB

In [None]:
!python3 main.py

[34m[1mwandb[0m: Currently logged in as: [33mamotz[0m (use `wandb login --relogin` to force relogin)
2021-07-01 16:52:13.150271: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0
[34m[1mwandb[0m: Tracking run with wandb version 0.10.33
[34m[1mwandb[0m: Syncing run [33mdry-night-173[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/amotz/backet_classification[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/amotz/backet_classification/runs/1kaal5ed[0m
[34m[1mwandb[0m: Run data is saved locally in /content/backet_classification/wandb/run-20210701_165211-1kaal5ed
[34m[1mwandb[0m: Run `wandb offline` to turn off syncing.

model.fc.requires_grad  Linear(in_features=512, out_features=10, bias=True)
	 fc.weight
	 fc.bias
CNN(
  (cnn): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momen