In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
import matplotlib.pyplot as plt
import generator

In [None]:
shape = (100, 100, 100)
porosity = np.array([0.5])
blobiness = np.array([1, 2, 5])
noise_method = 'poisson'
noise = np.array([1e2, 1e3, 1e5])
num_of_angles = 180
tag = 'train'


def generate_phantoms(preview=True):

    for p in porosity:
        for b in blobiness:
            for n in noise:
                generator.create_phantom_and_process(shape, p, b, n, num_of_angles, tag, preview=preview, noise_method=noise_method)

                
generate_phantoms()

In [None]:
import data_manager as dm
data_info = dm.show_data_info()
data_info

In [None]:
import pixel_counter as pc

tag = 'train'
dim = 3

im_lenght = np.prod(shape)
coeff = 100
sample_lenght = (im_lenght/coeff).astype(np.int)
indices = (np.random.rand(sample_lenght) * im_lenght).astype(np.int)


def scatter_plot(x, y, colors, title):
    plt.figure(figsize=(10, 10))
    plt.xlim(0, 1)
    plt.ylim(0, 1)
    plt.scatter(x, y, marker='.', c=colors)
    plt.title(title)

    
def get_value_from_data_info(key, data_info, index):
    return data_info[key][index[0]]
    
    
def get_params_from_data_info(data_info, index):

    porosity = get_value_from_data_info('porosity', data_info, index)
    blobiness = get_value_from_data_info('blobiness', data_info, index)
    noise = get_value_from_data_info('noise', data_info, index)

    return porosity, blobiness, noise

    
def count_npa_vs_pp():

    for index, data_id in np.ndenumerate(data_info['id_indx']):
        
        porosity, blobiness, noise = get_params_from_data_info(data_info, index)
        title = f'porosity { porosity }, blobiness { blobiness }, noise { noise }'
        
        pp, npa, op = pc.count_neighbor_average_array_and_save(dim, data_id, tag)

        # pp — proc_phantom
        # npa — neighbor_pixel_average
        # op — orig_phantom

        pp_part = np.take(pp, indices)
        npa_part = np.take(npa, indices)
        op_part = np.take(op, indices)
        colors = ['red' if el else 'blue' for el in op_part]
        scatter_plot(pp_part, npa_part, colors, title)
        
        
count_npa_vs_pp()

In [None]:
tag = 'test'
generate_phantoms(preview=False)

In [None]:
data_info = dm.show_data_info()
data_info

In [None]:
from sklearn import preprocessing
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
import pandas as pd
from sklearn.metrics import jaccard_score


dim = 3

train_data = data_info.query('tag == "train"')
test_data = data_info.query('tag == "test"')


def train_and_test():
    
    for index, data_id in np.ndenumerate(train_data['id_indx']):
        
        train_datum = train_data.query(f'id_indx == "{ data_id }"')
        test_datum = test_data.query(f'id_indx == "{ data_id }"')

        if train_datum.empty or test_datum.empty:
            print(f'can\'t train&test model at index { data_id }' )
            continue
        
        porosity, blobiness, noise = get_params_from_data_info(train_data, index)
        title = f'porosity { porosity }, blobiness { blobiness }, noise { noise }'
        
        train_pixels_df = dm.get_data(dimension=dim, id_indx=data_id, what_to_return='csv', tag='train')
        X_train = np.asarray(train_pixels_df[['neighbor_average', 'proc_phantom_pixel_values']])
        Y_train = np.asarray(train_pixels_df['pixel_real_value'])
        train_scaler = preprocessing.StandardScaler()
        train_scaler.fit(X_train)
        train_scaler.transform(X_train)
        LR = LogisticRegression(C=1, solver='liblinear').fit(X_train, Y_train)
        
        print(f'train: { title }')
        print(f'mean: { train_scaler.mean_ }, var: { train_scaler.var_ }, samples_seen: { train_scaler.n_samples_seen_ }')
        
        print('test:')
        pp, npa, op = pc.count_neighbor_average_array_and_save(dim, data_id, 'test')
        
        test_pixels_df = dm.get_data(dimension=dim, id_indx=data_id, what_to_return='csv', tag='test')
        X_test = np.asarray(test_pixels_df[['neighbor_average', 'proc_phantom_pixel_values']])
        Y_test = np.asarray(test_pixels_df['pixel_real_value'])
        test_scaler = preprocessing.StandardScaler()
        test_scaler.fit(X_train)
        test_scaler.transform(X_train)

        print(f'mean: { test_scaler.mean_ }, var: { test_scaler.var_ }, samples_seen: { test_scaler.n_samples_seen_ }')
        
        Y_predict = LR.predict(X_test)
        print(LR.coef_, LR.intercept_, LR.classes_)
        print(f'prediction score: { LR.score(X_test, Y_test) }')
        print(f'jaccard score: { jaccard_score(Y_predict, Y_test) }')

        coef = LR.coef_
        intercept = LR.intercept_
        x = X_test[:, 1]
        y = X_test[:, 0]

        def line(x0):
            return (-(x0 * coef[0, 1]) - intercept[0]) / coef[0, 0]

        xmin, xmax = x.min(), x.max()
        
        x_part = np.take(x, indices)
        y_part = np.take(y, indices)
        Y_test_part = np.take(Y_test, indices)
        colors = ['red' if el else 'blue' for el in Y_test_part]
        scatter_plot(x_part, y_part, colors, title)
        plt.plot([xmin, xmax], [line(xmin), line(xmax)], color='black')


train_and_test()