In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.svm import SVC
import os
from scipy import misc
from sklearn.model_selection import KFold
from tqdm import tqdm
import pickle

In [2]:
model_file = './saved_models/hindi_models.sav'
train_val_dir = os.getcwd() + "/Train_val/"
test = os.getcwd() + "/Test/"

In [3]:
def get_data(directory):
    X = []
    y = []
    l = 0
    labels = {}
    for label in os.listdir(directory):
        labels[label] = l
        for sample in os.listdir(directory + label):
            img = misc.imread(directory + label + "/" + sample)
            X.append(img)
            y.append(l)
        l += 1
        
    X = np.asarray(X)
    y = np.asarray(y)
    X = np.vstack([img.reshape(-1, ) for img in X])
    
    p = np.random.permutation(len(X))
    return X[p], y[p], labels

In [4]:
def svm(X_train, y_train):
    try:
        loaded_model = pickle.load(open(model_file, 'rb'))
        print('local model returned')
        return loaded_model

    except FileNotFoundError:
        models = []
        k = 5
        kf = KFold(n_splits=k)
        for train, val in tqdm(kf.split(X_train)):
            X_fold = X_train[train]
            y_fold = y_train[train]
            clf = SVC(kernel='rbf')
            clf.fit(X_fold, y_fold)

            models.append([[train, val], clf])
        pickle.dump(models, open(model_file, 'wb'))
        print('Fresh models returned')
        return models

In [5]:
X_train, y_train, labels = get_data(train_val_dir)
print(labels)

`imread` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imread`` instead.
  if __name__ == '__main__':


{'character_4_gha': 0, 'character_1_ka': 1, 'character_2_kha': 2, 'character_5_kna': 3, 'character_3_ga': 4}


In [6]:
models = svm(X_train, y_train)

local model returned


In [7]:
train_accuracy_collection = []
val_accuracy_collection = []
test_accuracy_collection = []

X_test, y_test, _ = get_data(test)

for model in tqdm(models):
    fold, cv_model = model

    train = fold[0]
    val = fold[1]
    X_fold = X_train[train]
    y_fold = y_train[train]

    clf = cv_model

    train_accuracy_collection.append(clf.score(X_fold, y_fold))

    X_val = X_train[val]
    y_val = y_train[val]
    val_accuracy_collection.append(clf.score(X_val, y_val))
    
    test_accuracy_collection.append(clf.score(X_test, y_test))

`imread` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imread`` instead.
  if __name__ == '__main__':
100%|██████████| 5/5 [06:25<00:00, 77.03s/it]


In [11]:
print(train_accuracy_collection)
print(val_accuracy_collection)
print(test_accuracy_collection)

[0.8402941176470589, 0.8366176470588236, 0.8391176470588235, 0.8348529411764706, 0.8354411764705882]
[0.8288235294117647, 0.8423529411764706, 0.8311764705882353, 0.85, 0.8482352941176471]
[0.625, 0.661, 0.655, 0.671, 0.665]
