In [4]:
import torch
from torch.autograd import Variable
from torch.nn.parameter import Parameter
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.nn.init as init
import torchvision
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler

In [5]:
import os
import numpy as np
import pandas as pd
import random
from PIL import Image
from sklearn.metrics import average_precision_score
import pylab as pl
from IPython import display
from skimage.transform import resize
import datetime

In [6]:
class FineGrainedDataset(Dataset):
    
    def __init__(self, csv_path,picture_path, picturesize, transform=True):
        """
        Construct Fine Grained Dataset.
        
        Args
            csv_path: Path to CSV file.
            picturesize: Dimensions of sample images.
            transform: 
        """
        self.transform = transform
        self.picturesize = picturesize
        
        # construct dataframe with onehot notation
        self.df = pd.get_dummies(pd.read_csv(csv_path))
        # create ID column with image file names
        self.df['id'] = self.df['image_id'].apply(lambda x: str(x)) + "_" + self.df['tag_id'].apply(lambda x: str(x)) + ".png"
        # only use class columns and id column
        self.df = self.df.iloc[:, 10:]
        
        #replace -1 with 0
        self.df = self.df.replace(to_replace=-1, value=0)
        
        self.path = picture_path
            
        
    def __getitem__(self, idx):
        """
        Support integer indexing on dataset (range 0 to len(self)).
        """
        sample_id = self.df['id'].iloc[idx]
        
        # get target values for sample
        targets = self.df.loc[idx, self.df.columns != 'id'].values.astype('float32')
        
        # get sample image
        image = np.asarray(Image.open(self.path + sample_id))
        
        # transform sample
        if self.transform:
            img = image
            img_normalized = np.copy(img)
            img_normalized = img_normalized / 255.
            
            img_resized = resize(img_normalized, output_shape=(self.picturesize,self.picturesize), mode='reflect', anti_aliasing=True)[:,:,:3]
            
            sample = {'id': sample_id, 'targets': torch.from_numpy(targets), 'image': torch.from_numpy(img_resized)}
        else:
            sample = {'id': sample_id, 'targets': targets, 'image': image}
        
        return sample
    
    def __len__(self):
        """
        Get length of dataset.
        """
        return len(self.df)
    

In [7]:
dataset = FineGrainedDataset(csv_path='./dataset_v2/train.csv',picture_path='./dataset_v2/root/train/cropped/', picturesize=224, transform=True)
dataset.df.head()

Unnamed: 0,sunroof,luggage_carrier,open_cargo_area,enclosed_cab,spare_wheel,wrecked,flatbed,ladder,enclosed_box,soft_shell_box,...,sub_class_van,color_black,color_blue,color_green,color_other,color_red,color_silver/grey,color_white,color_yellow,id
0,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,16490_15036.png
1,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,1,0,0,0,16490_31658.png
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,17122_26971.png
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,12193_19301.png
4,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,1,0,0,0,12193_35906.png


In [8]:
from collections import Counter

In [18]:
array = dataset.df.iloc[:,:-1].values

In [23]:
occurances = np.bincount(array)
#Counter(L)

ValueError: object too deep for desired array

In [21]:
L = array.tolist()

In [29]:

# Python3 program to find the most  
# frequent element in an array. 
  
def mostFrequent(arr, n): 
  
    # Sort the array 
    arr.sort() 
  
    # find the max frequency using 
    # linear traversal 
    max_count = 1; res = arr[0]; curr_count = 1
      
    for i in range(1, n):  
        if (arr[i] == arr[i - 1]).all(): 
            curr_count += 1
              
        else : 
            if (curr_count > max_count):  
                max_count = curr_count 
                res = arr[i - 1] 
              
            curr_count = 1
      
    # If last element is most frequent 
    if (curr_count > max_count): 
      
        max_count = curr_count 
        res = arr[n - 1] 
      
    return res 
  
# Driver Code 
arr = dataset.df.iloc[:,:-1].values
n = len(arr) 
print(mostFrequent(arr, n)) 
  
# This code is contributed by Smitha Dinesh Semwal. 


[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1]


In [45]:
A = dataset.df.iloc[:,:-1].values
#arr.sort(axis=0)

In [58]:
A = np.array([[0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0],
     [0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0],
     [0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0]])




SyntaxError: invalid syntax (<ipython-input-58-1cd117ad5060>, line 1)

In [52]:
A.shape

(11617, 37)

In [56]:
axis = 0
u, indices = np.unique(arr, return_inverse=True)
answer = u[np.argmax(np.apply_along_axis(np.bincount, axis, indices.reshape(arr.shape),
                                None, np.max(indices) + 1), axis=axis)]

In [47]:
(unique[maxpos],counts[maxpos])

(0, 391934)

In [57]:
pd.Series(answer)

0     0
1     0
2     0
3     0
4     0
5     0
6     0
7     0
8     0
9     0
10    0
11    0
12    0
13    1
14    0
15    0
16    0
17    0
18    0
19    0
20    0
21    0
22    0
23    0
24    0
25    0
26    0
27    0
28    0
29    0
30    0
31    0
32    0
33    0
34    0
35    0
36    0
dtype: int64