In [12]:
from hopfield_modern.hopfield_modern import Hopfield

In [13]:
import os
import random
import numpy as np
import pandas as pd
from typing import Callable
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm_notebook

In [14]:
def norm(prediction, data):
    t = prediction - data
    # print(np.linalg.norm(t))
    return  -np.linalg.norm(t)
    # return np.sum(np.abs(data))
    
    
def norm2(prediction, data):
    t = (prediction - data)
    return 1/(np.linalg.norm(t) + 1)


In [15]:
def softmax(data):
    t = np.exp(data - np.max(data))
    return t/np.sum(t)

In [16]:
def load_dir(dataset: list, path: str, label: bool, sort=True):
    dataset.sort(key=lambda e: e["id"])
    if dataset:
        index = dataset[-1]["id"]+1
    else:
        index = 0
    fnames = [(path+"/Red/"+i[:2]+"_Red.txt", path+"/Green/"+i[:2]+"_Green.txt",path+"/Blue/"+i[:2]+"_Blue.txt") for i in [i for i in os.walk(path)][1][2]]
    for fname in fnames:
        person = {"id": index}
        person["label"] = label
        with open(fname[0],"r") as rfile, open(fname[1],"r") as gfile, open(fname[2],"r") as bfile:
            person["r"] = [float(i) for i in rfile.readlines()[1:]]
            person["g"] = [float(i) for i in gfile.readlines()[1:]]
            person["b"] = [float(i) for i in bfile.readlines()[1:]]
        
        if sort:
            person["r"].sort()
            person["g"].sort()            
            person["b"].sort()
        
        index+=1
        dataset.append(person)
        
def load_dataset(dataset: list, path: str, pathpos: str, pathneg: str, sort=True):
    load_dir(dataset, path+"/"+pathpos, True, sort=sort)
    load_dir(dataset, path+"/"+pathneg, False, sort=sort)    
        
        
# def get_filter(dataset: list, filter: str):
#     if filter!="r" and filter!="g" and filter!="b":
#         raise ValueError('Wrong filter: must be "r", "g", "b"')
#     new_dataset = []
#     for person in dataset:
#         new_dataset.append({"id": person["id"], "label": person["label"], "data": person[filter]})
    
#     return new_dataset

def show_plot_by_filter(dataset: list, filter: str):
    plt.clf()
    if filter!="r" and filter!="g" and filter!="b":
        raise ValueError('Wrong filter: must be "r", "g", "b"')
    for i in np.arange(0,len(dataset), 1):
        color="green"
        if dataset[i]["label"]:
            color="red"
        for j in dataset[i][filter]:
            plt.plot(i,j, "o", markersize=0.5, color=color)
    plt.show()

    
def binarize_person(person: dict, precision=1e-3, up=1.8, down=0.2, radius=0, flatten=True, filters=3):
    person_data = np.zeros((3, int((up-down)//precision)+1), np.float32)[0:filters]
    
    dots  = (
        ((np.array(person["r"]).clip(down, up)-down)//precision).astype(np.uint32),
        ((np.array(person["g"]).clip(down, up)-down)//precision).astype(np.uint32),
        ((np.array(person["b"]).clip(down, up)-down)//precision).astype(np.uint32))[0:filters]

    
    for color_index in range(filters):
        for dot in dots[color_index]:
            person_data[color_index][max(0, dot-radius) : min(dot+radius+1, person_data.shape[1])].fill(1)
    
    if flatten:
        return {"id": person["id"], "label": person["label"], "data": person_data.flatten()}
    else:
        return {"id": person["id"], "label": person["label"], "data": person_data}

def binarize_person_by_filter(person: dict, precision=1e-3, up=1.8, down=0.2, radius=0, flatten=True, filter='g'):
    person_data = np.zeros((int((up-down)//precision)+1), np.float32)
    
    dots  = ((np.array(person[filter]).clip(down, up)-down)//precision).astype(np.uint32)

    
    for dot in dots:
        person_data[max(0, dot-radius) : min(dot+radius+1, person_data.shape[0])].fill(1)
    
    if flatten:
        return {"id": person["id"], "label": person["label"], "data": person_data.flatten()}
    else:
        return {"id": person["id"], "label": person["label"], "data": person_data}





def show_plot_avg_by_filter(dataset: list, filter: str):
    plt.clf()
    if filter!="r" and filter!="g" and filter!="b":
        raise ValueError('Wrong filter: must be "r", "g", "b"')
    for person in dataset:
        
        if person["label"]:
            color="red"
            x = 0
        else:
            color="green"
            x = 1
             
        for y in person[filter]:
            plt.plot(x,y, "o", markersize=0.5, color=color)
    plt.show()
    


In [17]:
def max_score_i(dataset: np.ndarray, X: np.ndarray, i: int, score_fn: Callable):
    # if i==0:
    #     max_i = 1
    # else:
    #     max_i = 0
    max_i = 0
    # print(f"len {len(dataset)}")
    # for j in (k for k in range(len(dataset)) if k!=i):
    for j in range(len(dataset)):
        if score_fn(X,dataset[max_i]) <= score_fn(X,dataset[j]):
            max_i = j
            
    return max_i

In [18]:
def get_stats(dataset, channel):
    mx = max([len(p[channel]) for p in dataset])
    out = np.zeros(mx+1, dtype=int)
    
    for p in dataset:
        out[:len(p[channel])] += 1
        
    return out
    

In [19]:
def get_mask_of_min_dims(dataset, channel, ndims):
    mask = np.zeros(len(dataset), dtype=bool)
    for index in range(len(dataset)):
        if len(dataset[index][channel])>=ndims:
            mask[index] = True
    
    return mask
    

In [20]:
dataset_full = []
load_dataset(dataset_full, "C:/Users/User/Desktop/Data/Data", "BC", "Control", sort=True)

In [21]:
# random.shuffle(dataset_full)


# train, test = dataset_full[:int(len(dataset_full)*train_ratio)], dataset_full[int(len(dataset_full)*train_ratio):]
# print(f"Train set: {len(train)}")
# print(f"Test set: {len(test)}")

In [23]:
#N = 40 # const in test 28
channel = 'b'
num_of_dims = 23 #55
# assert np.sum(get_mask_of_min_dims(train, channel, num_of_dims)) == len(train)
# assert np.sum(get_mask_of_min_dims(test, channel, num_of_dims)) == len(test)

In [24]:
random_dims = np.array([{'data': random.sample(person[channel], num_of_dims),
                                'label': person['label'],
                                'id': person['id']
                                } for person in dataset_full])



In [35]:

result = []


image_scaling_test = [10]
tests_per_generation = 100



for image_scaling in (image_scaling_test):
    print(image_scaling)
    one_scale = []
    
    
    for test_iteration in tqdm_notebook(range(tests_per_generation)):
        one_test = []
        
        dataset_dim = np.array([{'data': random.sample(person[channel], num_of_dims),
                                'label': person['label'],
                                'id': person['id']
                                } for person in dataset_full])

        _images = np.array([i['data'] for i in dataset_dim])
        _images = np.array([np.concatenate([i]*image_scaling) for i in _images])


        for p in _images:
            p.sort()
            
        _images -= _images.min()
        _images /= _images.max()
        # dataset_color = softmax(dataset_color) # not!!!

        _images = _images*2-1
        
        

        for i in range(len(dataset_full)):
            
            _mask = np.ones(len(_images), dtype=bool)
            _mask[i] = False

            _X = _images[i].copy()
            memory = _images[_mask].copy()
            
            model = Hopfield(memory)
            out = model.run(_X, 5)
            max_i = max_score_i(dataset=_images, X=out, i=i, score_fn=norm)
            
            
            if dataset_dim[max_i]["label"]==dataset_full[i]["label"]:
                one_test.append(1)
            else:
                one_test.append(0)
    
        one_scale.append(np.average(one_test))
    result.append((image_scaling, _images.shape[0]/_images.shape[1], np.average(one_scale)))


dataframe = pd.DataFrame(result)
dataframe.to_csv("results.csv", index=False)
dataframe

10


  0%|          | 0/100 [00:00<?, ?it/s]

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 230 is different from 96)