### Split the cifar-100 dataset
1. Randomly select 100 images out of 500 training images as validation set for each fine class, the rest 400 as training set.
2. Randomly spilt the training set $S$ as two disjoint sets for each fine class, while the first subset $s_{coarse}$ has only coarse labels, while the second $s_{fine}$ has fine labels as well. 

In [1]:
cifar100 = torch.load('./data/cifar100_whitened_coarse_fine_int.t7')
print(cifar100)

{
  trainData : 
    {
      data : DoubleTensor - size: 50000x3x32x32
      size : function: 0x40b7da80
      labelCoarse : IntTensor - size: 50000
      labelFine : IntTensor - size: 50000
    }
  testData : 
    {
      data : DoubleTensor - size: 10000x3x32x32
      size : function: 0x40074fb8
      labelCoarse : IntTensor - size: 10000
      labelFine : IntTensor - size: 10000
    }
}


In [16]:
print(cifar100.trainData.labelCoarse:sum())
print(cifar100.trainData.labelCoarse:sum() / 2500)
print((1+20)*20 / 2)

525000	
210	
210	


In [6]:
print(cifar100.trainData.labelFine:sum())
print(cifar100.trainData.labelFine:sum() / 500 )
print((1+100)*100 / 2)

2525000	
5050	
5050	


In [9]:
torch.manualSeed(2)
fine_perm = torch.randperm(500)
cifar100.trainData['indicator'] = torch.IntTensor(50000,2):fill(1)  -- {1,1} both fine and coarse {0,1} only coarse
for i=1,100 do
   count = 0
   for j=1,50000 do
       if cifar100.trainData.labelFine[j] == i then
          count = count + 1
          if fine_perm[count] <= 250 then 
                cifar100.trainData.indicator[j][1] = 0  -- 
          end
       end         
   end        
end

In [10]:
cifar100

{
  trainData : 
    {
      data : DoubleTensor - size: 50000x3x32x32
      labelCoarse : IntTensor - size: 50000
      size : function: 0x40909a80
      indicator : IntTensor - size: 50000x2
      labelFine : IntTensor - size: 50000
    }
  testData : 
    {
      data : DoubleTensor - size: 10000x3x32x32
      size : function: 0x410e2fb8
      labelCoarse : IntTensor - size: 10000
      labelFine : IntTensor - size: 10000
    }
}


In [14]:
cifar100.trainData.indicator[{{10000,10010},{}}]

 0  1
 0  1
 0  1
 0  1
 0  1
 1  1
 1  1
 0  1
 1  1
 0  1
 0  1
[torch.IntTensor of size 11x2]



In [17]:
c = 0
for i=1,50000 do
    if cifar100.trainData.indicator[i][1] == 0 then c = c+1 end
end
print(c)

25000	


In [21]:
c = 0
for i=1,50000 do
    if cifar100.trainData.labelFine[i] == 6 then
       if cifar100.trainData.indicator[i][1] == 1 then c = c+1 end
    end
end
print(c)

250	


#### This is used for tune the hyperparameters in the model.

- Initialization

In [2]:
-- Initialize fine dataset
train_fine_tmp = {
    data = torch.DoubleTensor(20000,3,32,32):fill(0),
    labelCoarse = torch.IntTensor(20000):fill(0),
    labelFine = torch.IntTensor(20000):fill(0),
    indicator = torch.IntTensor(20000,2):fill(1),
    size = function () return 20000 end
} --20000

-- Initialize coarse dataset
train_coarse_tmp = {
    data = torch.DoubleTensor(20000,3,32,32):fill(0),
    labelCoarse = torch.IntTensor(20000):fill(0),
    labelFine = torch.IntTensor(20000):fill(0),
    indicator = torch.IntTensor(20000,2):fill(1),
    size = function () return 20000 end
} --20000
train_coarse_tmp.indicator[{{}, {1}}]:fill(0)
train_coarse_tmp.indicator[{{}, {2}}]:fill(1)

-- Initialize val dataset
val_tmp = {
    data = torch.DoubleTensor(10000,3,32,32):fill(0),
    labelCoarse = torch.IntTensor(10000):fill(0),
    labelFine = torch.IntTensor(10000):fill(0),
    indicator = torch.IntTensor(10000,2):fill(1),
    size = function () return 10000 end
} --10000

- Assign values

In [9]:
torch.manualSeed(2)
fine_perm = torch.randperm(500)
-- {1,1} both fine and coarse {0,1} only coarse
val_count = 0
train_fine_count = 0
train_coarse_count = 0
for i=1,100 do
   count = 0
   for j=1,50000 do
       if cifar100.trainData.labelFine[j] == i then
          count = count + 1
          if fine_perm[count] <= 50 or fine_perm[count] > 450 then -- validation set
             val_count = val_count + 1
             val_tmp.data[val_count] = cifar100.trainData.data[j]
             val_tmp.labelFine[val_count] = cifar100.trainData.labelFine[j]
             val_tmp.labelCoarse[val_count] = cifar100.trainData.labelCoarse[j]
          elseif fine_perm[count] > 50 and fine_perm[count] <= 250 then -- coarse set
             train_coarse_count = train_coarse_count + 1
             train_coarse_tmp.data[train_coarse_count] = cifar100.trainData.data[j]
             train_coarse_tmp.labelFine[train_coarse_count] = cifar100.trainData.labelFine[j]
             train_coarse_tmp.labelCoarse[train_coarse_count] = cifar100.trainData.labelCoarse[j]
          else -- fine set
             train_fine_count = train_fine_count + 1
             train_fine_tmp.data[train_fine_count] = cifar100.trainData.data[j]
             train_fine_tmp.labelFine[train_fine_count] = cifar100.trainData.labelFine[j]
             train_fine_tmp.labelCoarse[train_fine_count] = cifar100.trainData.labelCoarse[j]   
          end
       end         
   end        
end

In [10]:
train_coarse_tmp

{
  data : DoubleTensor - size: 20000x3x32x32
  labelCoarse : IntTensor - size: 20000
  size : function: 0x40d1f450
  indicator : IntTensor - size: 20000x2
  labelFine : IntTensor - size: 20000
}


In [11]:
train_fine_tmp

{
  data : DoubleTensor - size: 20000x3x32x32
  labelCoarse : IntTensor - size: 20000
  size : function: 0x417bff28
  indicator : IntTensor - size: 20000x2
  labelFine : IntTensor - size: 20000
}


In [12]:
val_tmp

{
  data : DoubleTensor - size: 10000x3x32x32
  labelCoarse : IntTensor - size: 10000
  size : function: 0x40d1f258
  indicator : IntTensor - size: 10000x2
  labelFine : IntTensor - size: 10000
}


In [13]:
print(train_fine_tmp.labelCoarse:sum())
print(train_fine_tmp.labelCoarse:size())


210000	
 20000
[torch.LongStorage of size 1]



In [16]:
print(train_coarse_tmp.labelCoarse:sum())
print(train_coarse_tmp.labelCoarse:size())

210000	
 20000
[torch.LongStorage of size 1]



In [26]:
print(val_tmp.labelCoarse:sum())
print(val_tmp.labelCoarse:sum() / 500 )
print((1+20)*20 / 2)

105000	
210	
210	


In [19]:
print(train_coarse_tmp.labelFine:sum())
print(train_coarse_tmp.labelFine:sum() / 200 )
print((1+100)*100 / 2)

1010000	
5050	
5050	


In [20]:
print(train_fine_tmp.labelFine:sum())
print(train_fine_tmp.labelFine:sum() / 200 )
print((1+100)*100 / 2)

1010000	
5050	
5050	


In [24]:
print(train_fine_tmp.labelCoarse:sum())
print(train_fine_tmp.labelCoarse:sum() / 1000 )
print((1+20)*20 / 2)

210000	
210	
210	


#### Save *_tmp datasets

In [33]:
torch.save('./data/val_tmp.t7', val_tmp)
torch.save('./data/train_fine_tmp.t7', train_fine_tmp)
torch.save('./data/train_coarse_tmp.t7', train_coarse_tmp)

### Final training sets
- Fix the tuned hyperparameters and train on the whole 50000 images

In [27]:
-- Initialize fine dataset
train_fine = {
    data = torch.DoubleTensor(25000,3,32,32):fill(0),
    labelCoarse = torch.IntTensor(25000):fill(0),
    labelFine = torch.IntTensor(25000):fill(0),
    indicator = torch.IntTensor(25000,2):fill(1),
    size = function () return 25000 end
} --20000

-- Initialize coarse dataset
train_coarse = {
    data = torch.DoubleTensor(25000,3,32,32):fill(0),
    labelCoarse = torch.IntTensor(25000):fill(0),
    labelFine = torch.IntTensor(25000):fill(0),
    indicator = torch.IntTensor(25000,2):fill(1),
    size = function () return 25000 end
} --20000
train_coarse_tmp.indicator[{{}, {1}}]:fill(0)
train_coarse_tmp.indicator[{{}, {2}}]:fill(1)

torch.manualSeed(2)
fine_perm = torch.randperm(500)
-- {1,1} both fine and coarse {0,1} only coarse
val_count = 0
train_fine_count = 0
train_coarse_count = 0
for i=1,100 do
   count = 0
   for j=1,50000 do
       if cifar100.trainData.labelFine[j] == i then
          count = count + 1
          if fine_perm[count] <= 250 then -- coarse set
             train_coarse_count = train_coarse_count + 1
             train_coarse.data[train_coarse_count] = cifar100.trainData.data[j]
             train_coarse.labelFine[train_coarse_count] = cifar100.trainData.labelFine[j]
             train_coarse.labelCoarse[train_coarse_count] = cifar100.trainData.labelCoarse[j]
          else -- fine set
             train_fine_count = train_fine_count + 1
             train_fine.data[train_fine_count] = cifar100.trainData.data[j]
             train_fine.labelFine[train_fine_count] = cifar100.trainData.labelFine[j]
             train_fine.labelCoarse[train_fine_count] = cifar100.trainData.labelCoarse[j]   
          end
       end         
   end        
end

In [28]:
train_fine

{
  data : DoubleTensor - size: 25000x3x32x32
  labelCoarse : IntTensor - size: 25000
  size : function: 0x40b881a8
  indicator : IntTensor - size: 25000x2
  labelFine : IntTensor - size: 25000
}


In [29]:
train_fine.labelCoarse:sum()

262500	


In [30]:
train_coarse.labelCoarse:sum()

262500	


In [31]:
train_fine.labelFine:sum()

1262500	


In [32]:
train_coarse.labelFine:sum()

1262500	


#### save final train dataset

In [34]:
cifar100.testData['indicator'] = torch.IntTensor(10000,2):fill(1)

In [35]:
torch.save('./data/test.t7', cifar100.testData)
torch.save('./data/train_fine.t7', train_fine)
torch.save('./data/train_coarse.t7', train_coarse)