In [1]:
import os
import urllib.request
import gzip 
import shutil
import scipy.misc

In [2]:
datapath = '/content/MnistData/'

In [3]:
if not os.path.exists(datapath):
  os.makedirs(datapath)

In [4]:
 urls = ['https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz',
         'https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz',
         'https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz',
         'https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz']



In [5]:
for url in urls:
    filename = url.split('/')[-1]   # GET FILENAME
    
    if os.path.exists(datapath+filename):
        print(filename, ' already exists')  # CHECK IF FILE EXISTS
    else:
        print('Downloading ',filename)
        urllib.request.urlretrieve (url, datapath+filename) # DOWNLOAD FILE
     

print(datapath+filename)

Downloading  train-images-idx3-ubyte.gz
Downloading  train-labels-idx1-ubyte.gz
Downloading  t10k-images-idx3-ubyte.gz
Downloading  t10k-labels-idx1-ubyte.gz
/content/MnistData/t10k-labels-idx1-ubyte.gz


In [6]:
files = os.listdir(datapath)

In [7]:
print(files)

['t10k-labels-idx1-ubyte.gz', 'train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz', 't10k-images-idx3-ubyte.gz']


In [8]:
for file in files:
  if file.endswith('gz'):
    print('extracting file ',file) 
    with gzip.open(datapath+file,'rb') as f_in:
      with open(datapath+file.split('.')[0],'wb') as f_out:
        shutil.copyfileobj(f_in,f_out)

print('extraction completed')
for file in files:
  print('removing archives')
  os.remove(datapath+file)

print('completed to remove archives')



extracting file  t10k-labels-idx1-ubyte.gz
extracting file  train-labels-idx1-ubyte.gz
extracting file  train-images-idx3-ubyte.gz
extracting file  t10k-images-idx3-ubyte.gz
extraction completed
removing archives
removing archives
removing archives
removing archives
completed to remove archives


In [9]:
import os
import codecs
import numpy as np
import torch
import torchvision

In [10]:
def get_int(b):
  return int(codecs.encode(b,'hex'), 16)

In [11]:
mnist_dict = {}

In [12]:
files = os.listdir(datapath)

In [13]:
files

['train-images-idx3-ubyte',
 't10k-images-idx3-ubyte',
 'train-labels-idx1-ubyte',
 't10k-labels-idx1-ubyte']

In [14]:
for file in files:
  if file.endswith('ubyte'):
    print('reading ',file)

    with open(datapath+file, 'rb') as f:
      data = f.read()

      type = get_int(data[:4])
      length = get_int(data[4:8])

      if type == 2051:
        category = 'images'
        num_rows = get_int(data[8:12])
        num_cols = get_int(data[12:16])

        parsed = np.frombuffer(data,dtype='uint8',offset = 16)
        parsed = parsed.reshape(length, num_rows, num_cols)

      elif type == 2049:
        category = 'labels'
        parsed = np.frombuffer(data,dtype='uint8',offset = 8)
        parsed = parsed.reshape(length)
      
      else:
        assert False,'unspecified type'
      
      if length == 60000:
        data_set = 'train'

      elif length == 10000:
        data_set = 'test'

      else:
        assert False, 'unspecified length'

      mnist_dict[data_set + '_' + category] = parsed 



      


reading  train-images-idx3-ubyte
reading  t10k-images-idx3-ubyte
reading  train-labels-idx1-ubyte
reading  t10k-labels-idx1-ubyte


In [15]:
test_images_copy = mnist_dict['test_images'].copy()


In [16]:
train_images_copy = mnist_dict['train_images'].copy()

In [17]:
def create_diffuser(M, N, diffuser_mean = 0.5, diffuser_std = 0.5, dim = 1): # Create Diffuser
    diffuser_transmission = np.random.normal(diffuser_mean, diffuser_std, [M, N])
    for i in range(M):
        for j in range(N):
            diffuser_transmission[i, j] = max(diffuser_transmission[i, j], 0)
            diffuser_transmission[i, j] = min(diffuser_transmission[i, j], 1)

    # diffuser_transmission = torch.from_numpy(diffuser_transmission)
    return diffuser_transmission

In [18]:
cr = 10
N = 64

In [19]:
import math
from  PIL import Image
M = math.floor(N/cr)
diffuser = create_diffuser(M, N)
diffuser.shape
test_images_copy_new = np.zeros((10000,64,64))
test_images_copy_new1 = np.zeros((10000,6,64))


In [20]:
test_images_copy_new[0].shape

(64, 64)

In [21]:
for i in range(len(test_images_copy)):
   test_images_copy_new[i] = np.array(Image.fromarray(test_images_copy[i]).resize((64,64)))
   test_images_copy_new1[i] = diffuser@test_images_copy_new[i]

In [22]:
train_images_copy_new = np.zeros((60000,64,64))
train_images_copy_new1 = np.zeros((60000,6,64))

for i in range (len(train_images_copy)):
  train_images_copy_new[i] = np.array(Image.fromarray(train_images_copy[i]).resize((64,64)))
  train_images_copy_new1[i] = diffuser@train_images_copy_new[i]


In [23]:
train_images_copy_new1 = train_images_copy_new1.astype(np.uint8)
test_images_copy_new1 = test_images_copy_new1.astype(np.uint8)

In [24]:
mnist_dict['train_images'] = train_images_copy_new1
mnist_dict['test_images'] = test_images_copy_new1

In [25]:
mnist_dict['train_labels'][2]

4

In [26]:
from skimage.io import imsave

In [27]:
datasets = ['train','test']

In [28]:
import numpy as np

In [29]:
for dataset in datasets:   # FOR TRAIN AND TEST SET
    images = mnist_dict[dataset+'_images']   # IMAGES
    labels = mnist_dict[dataset+'_labels']   # LABELS
    no_of_samples = images.shape[0]     # NUBMER OF SAMPLES
    for indx in range (no_of_samples):  # FOR EVERY SAMPLE
        print(dataset, indx)
        image = images[indx]            # GET IMAGE
        label = labels[indx]            # GET LABEL
        if not os.path.exists(datapath+dataset+'/'+str(label)+'/'):    # IF DIRECTORIES DO NOT EXIST THEN 
            os.makedirs (datapath+dataset+'/'+str(label)+'/')       # CREATE TRAIN/TEST DIRECTORY AND CLASS SPECIFIC SUBDIRECTORY
        filenumber = len(os.listdir(datapath+dataset+'/'+str(label)+'/'))  # NUMBER OF FILES IN THE DIRECTORY FOR NAMING THE FILE
        imsave(datapath+dataset+'/'+str(label)+'/%05d.png'%(filenumber),image)  # SAVE T

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
test 5000
test 5001
test 5002
test 5003
test 5004
test 5005
test 5006
test 5007
test 5008
test 5009
test 5010
test 5011
test 5012
test 5013
test 5014
test 5015
test 5016
test 5017
test 5018
test 5019
test 5020
test 5021
test 5022
test 5023
test 5024
test 5025
test 5026
test 5027
test 5028
test 5029
test 5030
test 5031
test 5032
test 5033
test 5034
test 5035
test 5036
test 5037
test 5038
test 5039
test 5040
test 5041
test 5042
test 5043
test 5044
test 5045
test 5046
test 5047
test 5048
test 5049
test 5050
test 5051
test 5052
test 5053
test 5054
test 5055
test 5056
test 5057
test 5058
test 5059
test 5060
test 5061
test 5062
test 5063
test 5064
test 5065
test 5066
test 5067
test 5068
test 5069
test 5070
test 5071
test 5072
test 5073
test 5074
test 5075
test 5076
test 5077
test 5078
test 5079
test 5080
test 5081
test 5082
test 5083
test 5084
test 5085
test 5086
test 5087
test 5088
test 5089
test 5090
test 5091
test 5092
test 

In [30]:
!git clone https://github.com/amotz1/backet_classification.git

Cloning into 'backet_classification'...
remote: Enumerating objects: 182, done.[K
remote: Counting objects: 100% (182/182), done.[K
remote: Compressing objects: 100% (125/125), done.[K
remote: Total 182 (delta 121), reused 113 (delta 55), pack-reused 0[K
Receiving objects: 100% (182/182), 44.84 KiB | 14.95 MiB/s, done.
Resolving deltas: 100% (121/121), done.


In [31]:
pwd

'/content'

In [32]:
cd backet_classification/

/content/backet_classification


In [33]:
pip install wandb --upgrade

Collecting wandb
  Downloading wandb-0.11.0-py2.py3-none-any.whl (1.8 MB)
[K     |████████████████████████████████| 1.8 MB 33.9 MB/s 
[?25hCollecting urllib3>=1.26.5
  Downloading urllib3-1.26.6-py2.py3-none-any.whl (138 kB)
[K     |████████████████████████████████| 138 kB 66.4 MB/s 
Collecting GitPython>=1.0.0
  Downloading GitPython-3.1.18-py3-none-any.whl (170 kB)
[K     |████████████████████████████████| 170 kB 62.1 MB/s 
[?25hCollecting configparser>=3.8.1
  Downloading configparser-5.0.2-py3-none-any.whl (19 kB)
Collecting docker-pycreds>=0.4.0
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting subprocess32>=3.5.3
  Downloading subprocess32-3.5.4.tar.gz (97 kB)
[K     |████████████████████████████████| 97 kB 8.3 MB/s 
Collecting shortuuid>=0.5.0
  Downloading shortuuid-1.0.1-py3-none-any.whl (7.5 kB)
Collecting pathtools
  Downloading pathtools-0.1.2.tar.gz (11 kB)
Collecting sentry-sdk>=0.4.0
  Downloading sentry_sdk-1.3.0-py2.py3-none-any.whl (133

In [43]:
    train_params = {'num_workers': 0, 'batch_size': 256,'shuffle': True}
    valid_params = {'num_workers': 0, 'batch_size': 256, 'shuffle': True}


    train_transforms = torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Resize((224, 224)),
        ])

    train_generator = torchvision.datasets.ImageFolder('/content/MnistData' + '/' + 'train', train_transforms)
    train, val = torch.utils.data.random_split(train_generator, [48000, 12000])

    train_loader = torch.utils.data.DataLoader(train, pin_memory=True, **train_params)
    valid_loader = torch.utils.data.DataLoader(val, pin_memory=True, **valid_params)

    


In [44]:
len(train_generator)


60000

In [45]:
num_of_pixels = 196608000

total_sum = 0
for batch in train_loader:
  total_sum += batch[0].sum()

mean = total_sum / num_of_pixels

sum_of_squared_error = 0
for batch in train_loader:
  sum_of_squared_error += ((batch[0]- mean).pow(2)).sum()

std = torch.sqrt(sum_of_squared_error / num_of_pixels)

mean, std


(tensor(10.5045), tensor(61.9717))

In [50]:
def get_mean_std(loader):
  channels_sum, channels_squared_sum, num_batches = 0,0,0
  
  for data, _ in loader:
    channels_sum += torch.mean(data, dim=[0,2,3])
    channels_squared_sum += torch.mean(data**2, dim=[0,2,3])
    num_batches += 1

  mean = channels_sum/num_batches
  std = (channels_squared_sum/num_batches - mean**2)**0.5

  return mean, std


In [51]:
mean, std = get_mean_std(train_loader)
mean,std

(tensor([0.2858, 0.2858, 0.2858]), tensor([0.2869, 0.2869, 0.2869]))

In [53]:
 !python3 main.py

[34m[1mwandb[0m: Currently logged in as: [33mamotz[0m (use `wandb login --relogin` to force relogin)
2021-07-22 18:40:01.639825: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0
[34m[1mwandb[0m: Tracking run with wandb version 0.11.0
[34m[1mwandb[0m: Syncing run [33mclear-yogurt-206[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/amotz/backet_classification[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/amotz/backet_classification/runs/2oo0my5o[0m
[34m[1mwandb[0m: Run data is saved locally in /content/backet_classification/wandb/run-20210722_183959-2oo0my5o
[34m[1mwandb[0m: Run `wandb offline` to turn off syncing.

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
epoch = 1  train_loss =  2.589768648147583  accuracy = 0.15078125
epoch = 1  train_loss =  2.3083041965961457  accuracy = 0.204296875
epoch = 1  train_loss =  2.199414