In [4]:
import emnist
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow_datasets as tfds
import sklearn
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA
%matplotlib inline

In [3]:
!pip show tensorflow_datasets
!pip install emnist
!pip install tensorflow_datasets

Collecting tensorflow_datasets
  Downloading tensorflow_datasets-4.9.2-py3-none-any.whl (5.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.4/5.4 MB[0m [31m15.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting etils[enp,epath]>=0.9.0
  Downloading etils-1.3.0-py3-none-any.whl (126 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.4/126.4 kB[0m [31m22.7 MB/s[0m eta [36m0:00:00[0m
Collecting promise
  Downloading promise-2.3.tar.gz (19 kB)
  Preparing metadata (setup.py) ... [?25ldone
Collecting tensorflow-metadata
  Downloading tensorflow_metadata-1.13.1-py3-none-any.whl (28 kB)
Collecting dm-tree
  Downloading dm_tree-0.1.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (152 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m152.8/152.8 kB[0m [31m24.6 MB/s[0m eta [36m0:00:00[0m
Collecting toml
  Downloading toml-0.10.2-py2.py3-none-any.whl (16 kB)
Collecting array-record
  Downloading arr

In [32]:
(ds_train, ds_test), info = tfds.load('omniglot', split=['train', 'test'], with_info=True)

In [33]:
df_train = tfds.as_dataframe(ds_train, info)
df_test  = tfds.as_dataframe(ds_test, info)

In [34]:
train_images = np.stack(df_train['image'])
train_images.shape

(19280, 105, 105, 3)

In [35]:
df_train.columns

Index(['alphabet', 'alphabet_char_id', 'image', 'label'], dtype='object')

In [None]:
df_test['']

In [36]:
df_train[['alphabet_char_id', 'label']].loc[np.where((df_train['alphabet'] == 27) & (df_train['alphabet_char_id'] == 23))]

Unnamed: 0,alphabet_char_id,label
1464,23,618
1619,23,618
1875,23,618
3247,23,618
4126,23,618
5234,23,618
6579,23,618
9827,23,618
11257,23,618
13097,23,618


In [37]:
''' Data handling general functions '''

def separate_fewshot(test_images, test_labels, n=1):
    oneshot_data = []
    classify_data = []
    for label in np.unique(test_labels):
        for num in np.random.choice(np.where(test_labels == label)[0], n, False):
            oneshot_data.append(num)
    temp = set(oneshot_data)
    for i in range(len(test_labels)):
        if not i in temp: classify_data.append(i)
    oneshot_images = test_images[oneshot_data]
    oneshot_labels = test_labels[oneshot_data]
    classify_images = test_images[classify_data]
    classify_labels = test_labels[classify_data]
    return oneshot_images, oneshot_labels, classify_images, classify_labels

In [73]:
''' Functions with linear methods '''

def test_PCA(train_images, train_labels, oneshot_images, oneshot_labels, classify_images, classify_labels, 
             n, n_components = 32, verbose=False, train=1):
    
    if verbose: print("======= PCA method: Training and evaluating ... =======")
    if verbose: print("Learning background ...")
    pca = PCA(n_components=n_components)
    pca.fit(X=train_images)

    if verbose: print("Vectorizing ...")
    oneshot_images = pca.transform(oneshot_images)
    classify_images = pca.transform(classify_images)

    if verbose: print("Learning oneshot ...")
    nn = min(train, 5)
    neigh = KNeighborsClassifier(n_neighbors = nn)
    neigh.fit(classify_images, classify_labels)

    if verbose: print("Predicting ...")
    pred = neigh.predict(oneshot_images)

    if verbose:
        print("Accuracy: ", np.sum(pred == oneshot_labels)/len(oneshot_labels))
        print("======= PCA method: Finished =======")

    return np.sum(pred == classify_labels)/len(classify_labels)

def test_LDA(train_images, train_labels, oneshot_images, oneshot_labels, classify_images, classify_labels, 
             n, n_components = 20, verbose=True, train=1):
    
    if verbose: print("======= LDA method: Training and evaluating ... =======")
    if verbose: print("Learning background ...")
    lda = LDA(n_components=n_components)
    lda.fit(X=train_images, y = train_labels)

    if verbose: print("Vectorizing ...")
    oneshot_images = lda.transform(oneshot_images)
    classify_images = lda.transform(classify_images)

    if verbose: print("Learning oneshot ...")
    nn = min(train, 5)
    neigh = KNeighborsClassifier(n_neighbors = nn)
    neigh.fit(classify_images, classify_labels)

    if verbose: print("Predicting ...")
    pred = neigh.predict(oneshot_images)

    if verbose:
        print("Accuracy: ", np.sum(pred == oneshot_labels)/len(oneshot_labels))
        print("======= LDA method: Finished =======")

    return np.sum(pred == classify_labels)/len(classify_labels)

In [39]:
for image in df_train['image']:
    assert(image.shape == (105, 105, 3))

In [66]:
N = 1
train_images, train_labels = np.stack(df_train['image']).reshape(-1, 105 * 105 * 3), df_train['label'].to_numpy()
test_images, test_labels  = np.stack(df_test['image']).reshape(-1, 105 * 105 * 3), df_test['label'].to_numpy()
oneshot_images, oneshot_labels, \
    classify_images, classify_labels = separate_fewshot(test_images, test_labels, n = N)

In [67]:
test_PCA(train_images, train_labels, oneshot_images, oneshot_labels, classify_images, classify_labels, n = N, verbose=True)

Learning background ...
Vectorizing ...
Learning oneshot ...
Predicting ...
Accuracy:  0.3171471927162367


  return np.sum(pred == classify_labels)/len(classify_labels)


0.0

In [81]:
unique_labels = df_train['label'].unique()
subsample_index = []
c = 4
for label in unique_labels:
    for ind in np.random.choice(np.where(df_train['label'] == label)[0], c, False):
        subsample_index.append(ind)
subsample_index = np.array(subsample_index)
train_images_lda = train_images[subsample_index]
train_labels_lda = train_labels[subsample_index]

In [82]:
train_images_lda.shape

(3856, 33075)

In [83]:
test_LDA(train_images_lda, train_labels_lda, oneshot_images, oneshot_labels, classify_images, classify_labels, n = N)

Learning background ...
Vectorizing ...
Learning oneshot ...
Predicting ...
Accuracy:  0.01669195751138088


  return np.sum(pred == classify_labels)/len(classify_labels)


0.0