### IMPORTS

In [34]:
import os
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from skimage.io import imread, imshow
from skimage.transform import resize
from skimage.feature import hog

from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC

from sklearn.metrics import accuracy_score

import warnings
warnings.filterwarnings('ignore')

### LOADING CSV

In [3]:
DATA = pd.read_csv('train.csv')

In [4]:
DATA.head()

Unnamed: 0,filename,label
0,C50P11thinF_IMG_20150724_114951_cell_148.png,malaria
1,C112P73ThinF_IMG_20150930_131659_cell_94.png,healthy
2,C59P20thinF_IMG_20150803_111333_cell_144.png,malaria
3,C158P119ThinF_IMG_20151115_181136_cell_202.png,malaria
4,C81P42ThinF_IMG_20150817_120524_cell_190.png,malaria


### TRAIN / TEST SPLIT

In [8]:
DATA["l"] = LabelEncoder().fit_transform(train_df['label'])

In [10]:
train_files, val_files, train_labels, val_labels = train_test_split(DATA['filename'], DATA['l'], test_size=0.2, random_state=42 ,shuffle=True)

### MODEL

In [25]:
def clas(ml_model, xtr, xvl):
    model = ml_model
    model.fit(xtr, train_labels)

    train_pred = model.predict(xtr)
    val_pred = model.predict(xvl)
    
    acc_train = accuracy_score(train_labels, train_pred)
    acc_test = accuracy_score(val_labels, val_pred)

    print('Acc Train {} Acc Test {}'.format(acc_train, acc_test))

### 1. MODEL BUILDING USING PIXEL FEATURES

In [16]:
IMG_DIMS=(40,40)
train_features_pixel=[]

for i in train_files:
    image = imread(os.path.join("cell_images",i))
    image = resize(image,IMG_DIMS)
    features = np.reshape(image,(IMG_DIMS[0]*IMG_DIMS[1]*3))
    train_features_pixel.append(features)

In [18]:
train_features_pixel = np.array(train_features_pixel)

In [20]:
IMG_DIMS=(40,40)
val_features_pixel=[]

for i in val_files:
    image = imread(os.path.join("cell_images",i))
    image = resize(image,IMG_DIMS)
    features = np.reshape(image,(IMG_DIMS[0]*IMG_DIMS[1]*3))
    val_features_pixel.append(features)

In [21]:
val_features_pixel = np.array(val_features_pixel)

#### LOGISTIC REGRESSION

In [32]:
clas(LogisticRegression(max_iter = 200), train_features_pixel, val_features_pixel)

Acc Train 0.7444919647485744 Acc Test 0.6788491446345256


#### LINEAR SVM

In [33]:
clas(LinearSVC(), train_features_pixel, val_features_pixel)

Acc Train 0.7700233281493002 Acc Test 0.6697770865733541


### 1. MODEL BUILDING USING HOG FEATURES

In [35]:
IMG_DIMS = (128,64)
train_features_hog = []

for i in train_files:
    image = imread(os.path.join("cell_images",i))
    image = resize(image,IMG_DIMS)
    features, hog_image = hog(image, orientations=9, pixels_per_cell=(8, 8),
                              cells_per_block=(2, 2), visualize=True, multichannel=True)
    hog_features = np.reshape(features,(features.shape[0]))
    train_features_hog.append(hog_features)

In [47]:
train_features_hog = np.array(train_features_hog)

In [49]:
IMG_DIMS = (128,64)
val_features_hog = []

for i in val_files:
    image = imread(os.path.join("cell_images",i))
    image = resize(image,IMG_DIMS)
    features, hog_image = hog(image, orientations=9, pixels_per_cell=(8, 8),
                              cells_per_block=(2, 2), visualize=True, multichannel=True)
    hog_features = np.reshape(features,(features.shape[0]))
    val_features_hog.append(hog_features)

In [50]:
val_features_hog = np.array(val_features_hog)

#### LOGISTIC REGRESSION

In [51]:
clas(LogisticRegression(max_iter = 200), train_features_hog, val_features_hog)

Acc Train 0.8982633488854329 Acc Test 0.8208916537065837


#### LINEAR SVM

In [52]:
clas(LinearSVC(), train_features_hog, val_features_hog)

Acc Train 0.9261275272161742 Acc Test 0.8113011923276309
