In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
import gc
import cv2 as cv
import cv2
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
from PIL import Image

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [6]:
cats = os.listdir('./fruit_vegetable/train/')
path = './fruit_vegetable/'

In [7]:
len(cats)

36

In [8]:
def extract_data(dataset_name,path_name):
    path = path_name
    data=[]
    im_w = 224
    im_h = 224
    for x in range(len(cats)):
        sub_path = path + dataset_name + '/' + cats[x] + '/'
        for y in os.listdir(sub_path):        
            img_path = sub_path + y  
            last = img_path[-12:]
            imag = cv2.imread(img_path)  
            if last == 'Image_56.jpg':
                continue
            if last == 'Image_96.jpg': 
                continue
            img_from_ar = Image.fromarray(imag, 'RGB')
            resized_image = img_from_ar.resize((im_w, im_h))
            data.append([np.array(resized_image),x])
    return data

In [9]:
train = extract_data('train', path)

In [10]:
test = extract_data('test', path)

In [None]:
import torch
net = torch.hub.load('pytorch/vision:v0.6.0', 'alexnet', pretrained = True)
for params in net.parameters():
    params.requires_grad=False
net.eval()

In [None]:
net.classifier[-3] = torch.nn.Linear(in_features=4096, out_features=4096, bias=True)
net.classifier[-1] = torch.nn.Linear(in_features=4096, out_features=1000, bias=True)
net = torch.nn.Sequential(net,torch.nn.Linear(in_features=1000,out_features=36,bias=True),torch.nn.Softmax(dim=1))

In [None]:
import torch
import torchvision.transforms as transforms

# Normalize training set together with augmentation
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.507, 0.487, 0.441], std=[0.267, 0.256, 0.276])
])

# Normalize test set same as training set without augmentation
transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.507, 0.487, 0.441], std=[0.267, 0.256, 0.276])
])

batch_size=16

In [None]:
from torch.utils.data import Dataset, DataLoader

class ImageDataset(Dataset):
    """Face Landmarks dataset."""
    def __init__(self,dataset):
        self.dataset = dataset
    def __len__(self):
        return len(self.dataset)
    def __getitem__(self, idx):
        return self.dataset[idx]

trainset = ImageDataset(train)
testset = ImageDataset(test)

In [None]:
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

def generate_batch(batch):
    images = [transform(torch.from_numpy(x[0])) for x in batch]
    label = [x[1] for x in batch]
    images = [t.numpy() for t in images]
    images = torch.Tensor(images)
    label = torch.Tensor(label)
    label = label.to(dtype=torch.long)
    return images,label

trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2,collate_fn = generate_batch)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2,collate_fn = generate_batch)

In [None]:
import torch.optim as optim
import torch.nn as nn

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(),lr = 0.001,momentum = 0.9)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

net.to(device)
criterion.to(device)

In [None]:
import matplotlib.pyplot as plt
train_losses = []
validation_losses = []
for epoch in range(25):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[0].to(device), data[1].to(device)
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs,labels)
        loss.backward()
        optimizer.step()
        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
    train_losses.append((running_loss*32)/len(trainloader))
    running_loss = 0.0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images,labels = images.to(device),labels.to(device)
            outputs = net(images)
            loss = criterion(outputs,labels)
            running_loss += loss.item()
    validation_losses.append((running_loss*32)/len(testloader))
    print("{} Epoch done".format(epoch))

In [None]:
print('Finished Training')
plt.plot(train_losses,label = 'train')
plt.legend()

In [None]:
plt.plot(validation_losses,label = 'validation')
plt.legend()

In [None]:
PATH = './alexnet_fine_tuned.pth'
torch.save(net.state_dict(), PATH)
temp_net = net

In [None]:
PATH = '../input/alexnet-fined-tuned/alexnet_fine_tuned.pth'
# net = Net()
net.load_state_dict(torch.load(PATH))
temp_net = net

In [None]:
net = net[:-2]

In [None]:
import pandas as pd

In [None]:
def make_data(dataloader):
    train_features,train_labels = [],[]
    with torch.no_grad():
        for data in dataloader:
            images, labels = data
            images,labels = images.to(device),labels.to(device)
            output = net(images)
            train_features.extend(output)
            train_labels.extend(labels)
    return train_features,train_labels

In [None]:
device1 = torch.device('cpu')

In [None]:
train_features,train_labels = make_data(trainloader)

In [None]:
test_features,test_labels = make_data(testloader)

In [None]:
training_data = [t.to(device1).numpy() for t in train_features]
labels = [t.to(device1).numpy() for t in train_labels]
testing_data = [t.to(device1).numpy() for t in train_features]
test_labels = [t.to(device1).numpy() for t in train_labels]

In [None]:
training_data,labels = pd.DataFrame(training_data),pd.DataFrame(labels)

In [None]:
testing_data,testing_labels = pd.DataFrame(testing_data),pd.DataFrame(test_labels)

In [None]:
training_data.to_csv('./training_data.csv',index=False)
labels.to_csv('./labels.csv',index=False)
testing_data.to_csv('./testing_data.csv',index=False)
testing_labels.to_csv('./test_labels.csv',index=False)

In [None]:
train = pd.concat([training_data,testing_data])
y = pd.concat([labels,testing_labels])

In [None]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(train,y,shuffle=True,random_state=2021,test_size=0.1)

In [None]:
from sklearn.svm import SVC

svc = SVC(kernel='linear')
svc.fit(X_train,y_train)

In [None]:
from sklearn.metrics import accuracy_score

y_pred = svc.predict(X_test)
print(accuracy_score(y_test,y_pred))

In [None]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(max_iter = 1000,l1_ratio=0)
lr.fit(X_train,y_train)

In [None]:
from sklearn.metrics import accuracy_score

y_pred = lr.predict(X_test)
print(accuracy_score(y_test,y_pred))