In [None]:
import CV_utils as cvu
import numpy as np
import matplotlib.pyplot as plt
import sklearn.decomposition as sd
from sklearn.feature_extraction.image import extract_patches_2d
from sklearn.feature_extraction.image import reconstruct_from_patches_2d
import time
import pandas as pd
import multiprocessing
%matplotlib inline

## READ FRAMES

In [None]:
l_b_frames, l_bgs_frames, l_c_frames, l_gs_frames = cvu.read_frames('Video/representative/Lightness/Video attached to the ICMI 2017 Paper - Lightness.mp4')
f_b_frames, f_bgs_frames, f_c_frames, f_gs_frames = cvu.read_frames('Video/representative/Fragility/Video attached to the ICMI 2017 Paper - Fragility.mp4')

## DENSITY FRAME CALCULATION

In [None]:
def normalised_binary_density(frames, frame_size, batch_size=10, n_overlap=0, n_skip=0):
    
    batch_list = [] # return value: collection of position density images grouped by batches
    batch_density = np.zeros(frame_size) # single position density image referring to a batch of frames
    overlap_frames = np.zeros(frame_size) # position density image of the previous batch (overlapping between batches)
    
    skip = 0 # consecutive frames skip counter
    i = 0    # seen frames counter (used for the overlapping purpose)
        
    for frame in frames:
        
        # skip consecutive frames, pick one every n_skip
        if skip != 0 and skip <= n_skip:
            skip += 1
            continue
        else:
            skip = 0
        
        # overlapping zone: start storing frame values in order to consider them also for the next batch
        if i >= batch_size-n_overlap:
            overlap_frames += frame/255
        
        batch_density += frame/255
        
        i += 1
        
        # if it has already seen batch_size imgs
        if i%batch_size == 0:
            
            batch_list.append(batch_density/i)
            batch_density = overlap_frames # start from the stored frames
            
            i = n_overlap
            overlap_frames = np.zeros(frame_size)
        
        skip += 1
            
    return batch_list

In [None]:
def plot_density(batch_list, label):
    
    for i, batch in enumerate(batch_list):
        
        plt.title(label+' '+str(i))
        #plt.subplot(np.ceil(len(batch_list)/2), 2, i+1)
        plt.subplot(len(batch_list), 1, i+1)
        plt.imshow(batch, vmin = 0, vmax = 1)
        plt.colorbar()
        plt.tight_layout()

In [None]:
plt.figure(figsize=(10,10))
batch_list = normalised_binary_density(l_b_frames, frame_size=l_b_frames[0].shape, batch_size=len(l_b_frames))
plot_density(batch_list, 'Lightness')

In [None]:
plt.figure(figsize=(80,80))
batch_list = normalised_binary_density(l_b_frames, frame_size=l_b_frames[0].shape, batch_size=10, n_overlap=5, n_skip=3)
plot_density(batch_list, 'Lightness')

In [None]:
plt.figure(figsize=(10,10))
batch_list = normalised_binary_density(f_b_frames, frame_size=f_b_frames[0].shape, batch_size=len(f_b_frames))
plot_density(batch_list, 'Fragility')

In [None]:
plt.figure(figsize=(80,80))
batch_list = normalised_binary_density(f_b_frames, frame_size=f_b_frames[0].shape, batch_size=10, n_overlap=5, n_skip=3)
plot_density(batch_list, 'Fragility')

## DENSITY MANIPULATION and DATA LOADING

In [None]:
def density_estimation_per_frame(df):
    
    density = []
    
    for r in df:
        d = np.sum(np.square(r)) #integral over the columns of the squared function
        if d != 0:
            density.append(d)
            
    #print(len(density))
    return sum(density)/len(density)

In [None]:
def dataset_density_feature(df_list): #for the moment I commented the columns approach
    
    #column_approach = []
    row_approach = []

    for df in df_list:
    #    column_approach.append(density_estimation_per_frame(df.T))
        row_approach.append(density_estimation_per_frame(df))
    
    #density_by_columns = np.sum(np.square(column_approach))/len(df_list)
    density_by_rows =  np.sum(np.square(row_approach))/len(df_list)

    
    return density_by_rows#, density_by_columns

In [None]:
def density_dataset(videos_frames_list, tot_video_df_list, batch_size, n_overlap, n_skip, verbose = False):
        
    video_approach_density = []
    
    v = 0
    tot = videos_frames_list.shape[0]

    for video_frames in videos_frames_list.iterrows():
        
        #cleaning the None added during the dump saving
        video_frames = video_frames[1].tolist()
        video_frames = [x for x in video_frames if x is not None]
        
        tot_video_df = tot_video_df_list.iloc[v, 0]
        
        v += 1
        
        if verbose:
            print("Creating row of video {}/{}".format(v, tot))
        
        density_list = []
        #computation of the density frame by frame (global approach)
        density_list.append(density_estimation_per_frame(tot_video_df))
        
        for i, n in enumerate(n_overlap):
            
            if verbose:
                print(".... processing the approach {}/{}".format(i+1, len(n_overlap)))
            
            #computation of the density at different granularity levels
            df_list = normalised_binary_density(video_frames, frame_size=video_frames[0].shape, batch_size=batch_size[i], n_overlap=n, n_skip=n_skip[i])

            #density_by_row,
            density_by_columns = dataset_density_feature(df_list)
            #density_list.append(density_by_row)
            density_list.append(density_by_columns)

        
        video_approach_density.append(density_list)
        
        
        
        
    return np.asarray(video_approach_density)

## REAL DATASET

### for simple regression

In [None]:
def create_density_dataset(batch_size, n_overlap, n_skip, verbose = False):

    # checking input data's consistency
    if len(n_overlap) != len(n_skip) or len(n_overlap) != len(batch_size) or len(batch_size) != len(n_skip):
        raise Exception("batch_size, n_overlap and n_skip must have the same size. {}, {} and {} respectively instead".format(len(batch_size), len(n_overlap), len(n_skip)))

    j = 0

    while j < 15:

        j+=1
        
        if verbose:
            print("Extracting data for the videos from {} to {}".format((j-1)*10+1, j*10))
        
        #data extraction
        df_video_frames = pd.read_pickle('Video/binaryFrame_pkl/videos_frames_'+str(j)+'.pkl')
        df_density_frames = pd.read_pickle('Video/densityFrame_pkl/videos_density_'+str(j)+'.pkl')

        
        # computation of the subdataset for each subset of the videos and then their inline concatenation
        if j == 1:

            X = density_dataset(df_video_frames, df_density_frames, batch_size, n_overlap, n_skip, verbose = verbose)

        else:

            X = np.concatenate((X, density_dataset(df_video_frames, df_density_frames, batch_size, n_overlap, n_skip, verbose = verbose)), axis = 0)
            
    return X

In [None]:
# dataset's creation parameters
batch_size = [100, 50, 50, 30, 30, 20, 20, 10, 10]
n_overlap = [30, 10, 7, 5, 15, 5, 10, 5, 3]
n_skip = [0, 0, 2, 3, 1, 3, 2, 4, 5]

In [None]:
#call for dataset creation
X = create_density_dataset(batch_size, n_overlap, n_skip, verbose = True)
df = pd.DataFrame(X)
df.to_csv("datasets/density_dataset.csv", index=False)

## REGRESSION

### DATA LOADING

In [None]:
#mark loading

names, Y = cvu.load_marks()

Y = Y.as_matrix().astype(float)
Y_l = Y[:,0]
Y_f = Y[:,1]

names = sorted(names)

In [None]:
X = pd.read_csv("datasets/density_datasetV2.csv").values

#deleting of the entrace with issues
delIdx = np.where(np.isnan(X))[0]
X = np.delete(X, delIdx, axis=0)
Y = np.delete(Y, delIdx, axis=0)

Y_l = Y[:, 0]
Y_f = Y[:, 1]

In [None]:
X_tr, Y_l_tr, Y_f_tr, X_ts, Y_l_ts, Y_f_ts = cvu.random_sampling(X, Y_l, Y_f)

### TESTING

RLS

In [None]:
alphas = {'alpha': list(np.arange(0.001, 1, 0.007))}

In [None]:
# LIGHTNESS
cvu.print_results('RLS',cvu.rlsCV_regression(alphas, X_tr, X_ts, Y_l_tr, Y_l_ts))

In [None]:
# FRAGILITY
cvu.print_results('RLS', cvu.rlsCV_regression(alphas, X_tr, X_ts, Y_f_tr, Y_f_ts))

LASSO

In [None]:
alphas = {'alpha': list(np.arange(0.001, 1, 0.007))}

In [None]:
alphas = np.arange(0.001, 1, 0.007)

In [None]:
#LIGHTNESS
#cvu.print_results('LASSO', cvu.lassoCV_regression(alphas, X_tr, X_ts, Y_l_tr, Y_l_ts))
alpha, mean_err, var_err, coef, y_pred = cvu.lassoCV_regression(alphas, X_tr, X_ts, Y_l_tr, Y_l_ts)
print(mean_err)
print(var_err)
print(coef)
print(y_pred)

In [None]:
#FRAGILITY
#cvu.print_results('LASSO', cvu.lasosCV_regression(alphas, X_tr, X_ts, Y_f_tr, Y_f_ts))
alpha, mean_err, var_err, coef, y_pred = cvu.lassoCV_regression(alphas, X_tr, X_ts, Y_f_tr, Y_f_ts)
print(mean_err)
print(var_err)
print(coef)
print(y_pred)

SVM

In [None]:
c = {'C':list(np.arange(0.0001, 0.1, 0.01))}

In [None]:
#LIGHTNESS
cvu.print_results('SVM', cvu.svmCV_regression(c, X_tr, X_ts, Y_l_tr, Y_l_ts))

In [None]:
#FRAGILITY
cvu.print_results('SVM', cvu.svmCV_regression(c, X_tr, X_ts, Y_f_tr, Y_f_ts))

KERNEL

In [None]:
alphas = {'alpha': list(np.arange(0.001, 1, 0.007))}
kernel = "sigmoid"

In [None]:
#LIGHTNESS
cvu.print_results('RIDGE KERNEL', cvu.ridgeKernelCV_regression(alphas, X_tr, X_ts, Y_l_tr, Y_l_ts, kernel = kernel))

In [None]:
#FRAGILITY
cvu.print_results('RIDGE KERNEL', cvu.ridgeKernelCV_regression(alphas, X_tr, X_ts, Y_f_tr, Y_f_ts))

TREE

In [None]:
ris = cvu.tree_regression(X_tr, X_ts, Y_l_tr, Y_l_ts)

In [None]:
ris = cvu.tree_regression(X_tr, X_ts, Y_f_tr, Y_f_ts)

In [None]:
print(ris[0], ris[1])
print(ris[2])

### DICTIONARY LEARNING

In [None]:
# function to learn the dictionary from the reference image
def learn_reference_image(ref_image, patch_size = 7, patches = 0.02, n_atoms = 25, sparsity = 1, max_iteration = 500, verbose = False):
    
    if verbose:
        start = time.time()
        print("Extracting the patches")
        
    
    ref_image, _ = cvu.cut_relevant_part(ref_image)
    D = extract_patches_2d(ref_image, patch_size, max_patches = patches)
    D = D.reshape(D.shape[0], -1)

    D -= np.mean(D, axis = 0)
    D /= np.std(D, axis = 0)
    
    if verbose:
        print("Complete. Time spent: %s" % (time.time()-start))
    
    dl = sd.MiniBatchDictionaryLearning(n_components = n_atoms, alpha = sparsity, n_iter=max_iteration)
    
    if verbose:
        start = time.time()
        print("Learning atoms")
    
    atoms = dl.fit(D).components_
    
    if verbose:
        print("Complete. Time spent: %s" % (time.time()-start))
        
        #plt.figure(figsize=(4.2, 4))
        #for i, comp in enumerate(atoms[:100]):
        #    plt.subplot(10, 10, i + 1)
        #    plt.imshow(comp.reshape(patch_size),
        #               interpolation='nearest')
        #    plt.xticks(())
        #    plt.yticks(())
        #plt.suptitle('Dictionary learned from \n' +
        #             '%d patches' % (len(D)),
        #             fontsize=16)
        #plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)
        
    
    
    return dl, atoms, ref_image.shape[1]

In [None]:
# function to clean (cutting) the image to reproduce and to compute its encoding
def reconstruct_image(image_to_analyze, dl, atoms, patch_size, width = None, non_zero_components = 6, verbose = False):
    
    if verbose:
        start = time.time()
        print("Extracting the patches")
    
    #image cleaning from usless information
    image_to_analyze, _ = cvu.cut_relevant_part(image_to_analyze, width)
    I = extract_patches_2d(image_to_analyze, patch_size)
    I = I.reshape(I.shape[0], -1) 
    intercept = np.mean(I, axis=0) 
    I -= intercept
    
    if verbose:
        print("Complete. Time spent: %s" % (time.time()-start))
        start = time.time()
        print("Transforming the image (could require some minutes...)")

    #reconstruction = ref_image.copy()
    dl.set_params(transform_algorithm='omp',transform_n_nonzero_coefs=non_zero_components)
    code = dl.transform(I)
    reconstruction = None
    
    if verbose:
        print("Complete. Time spent: %s" % (time.time()-start))
        #start = time.time()
        #print("Reconstructing the image")
        
        #patches = np.dot(code, atoms)

        # recombine the image
        #patches += intercept #denormalize
        #patches = patches.reshape(len(I), *patch_size)
        #reconstruction = reconstruct_from_patches_2d(patches, (image_to_analyze.shape[0], image_to_analyze.shape[1] ))
    

        #print("Complete. Time spent: %s" % (time.time()-start))
    
    return image_to_analyze, reconstruction, code

In [None]:
# function to get the encoding the image to reproduce and that shows the result compared with the original input
def get_encoding(dl, atoms, image_to_analyze, patch_size,  width = None, verbose = False):
    
    image_to_analyze, image_reconstructed, encoding = reconstruct_image(image_to_analyze, dl, atoms, patch_size, width = width, verbose = verbose)
    
    #if verbose:
    #
    #    plt.figure(figsize=(10,10))
    #    plt.title('Reconstructed image')
    #    plt.imshow(image_reconstructed, vmin=0, vmax=1, interpolation='nearest')

    #    plt.figure(figsize=(10,10))
    #    plt.title('Starting image')
    #    plt.imshow(image_to_analyze)
    
    return encoding

In [None]:
#child process to parallelize the density frames encoding
def child_process(return_dict, index, dl, atoms, image_to_analyze, patch_size, width, verbose = False):
    
    encoding = get_encoding(dl, atoms, image_to_analyze, patch_size, width = width, verbose = verbose)
    #reshaping used to flatten the enconding along one row of the dataset
    encoding = encoding.reshape(-1)
    return_dict[index] = encoding

In [None]:
# function to create the dataset matrix
def create_dataset_from_atoms(image_to_analyze_list, dl, atoms, patch_size, width, verbose = False):
    
    # preparing the final matrix
    encoding_list = np.zeros((len(image_to_analyze_list), 
                              (width-patch_size[0]+1)*(image_to_analyze_list.iloc[0,0].shape[0]-patch_size[0]+1)*(atoms.shape[0])))

    p_idx = 0
    
    if verbose:
        tot = image_to_analyze_list.shape[0]
    
    jobs = []
    
    #-----------------------------
    manager = multiprocessing.Manager()
    return_dict = manager.dict()
    #-----------------------------
    
    for image_to_analyze in image_to_analyze_list.iterrows():
        
        image_to_analyze = image_to_analyze[1][0]
        
        p_idx += 1
        
            
        #------------------------------------------------- 
        # getting the flattened encoding of each density frame
        process = multiprocessing.Process(target=child_process, args=[return_dict, p_idx, dl, atoms, image_to_analyze, patch_size, width, verbose])
        jobs.append(process)
        process.start()    
        #-------------------------------------------------


    for i, p in enumerate(jobs):
        p.join()
    
    for i, p in enumerate(jobs):
        encoding = return_dict[i+1]
        #updating the matrix with a new entry
        encoding_list[i, :] = encoding

    return dl, atoms, encoding_list

In [None]:
#function to create subdatasets
def create_sub_datasets(ref_image, patch_size, path_to_read, path_to_write, verbose = False):
    
    j = 0

    print("Learning the reference image\n")

    # getting the dictionary learner object, the dictionary and the width (info to clean further density frames)
    dl, atoms, width = learn_reference_image(ref_image, patch_size, verbose = verbose)

    print("\nLearning complete")
    
    while j < 15:

        j+=1

        print("Extracting data for the videos from {} to {}".format((j-1)*10+1, j*10))
        df_density_frames = pd.read_pickle(path_to_read+str(j)+'.pkl')
   

        # computing the sub-datasets with batches of 10 videos 
        _, _, X = create_dataset_from_atoms(df_density_frames, dl, atoms, patch_size, width = width, verbose = verbose)
        df = pd.DataFrame(X)

        if verbose:
            print("Saving the dataset for the videos from {} to {}".format((j-1)*10+1, j*10))
            start = time.time()

        #saving the sub-datset
        df.to_csv(path_to_write+str(j)+".csv", index=False)

        if verbose:
            print("Complete. Time spent: %s" % (time.time()-start))

In [None]:
# instruction to correctly create the dataset
patch_size = (7,7)
ref_image = normalised_binary_density(f_b_frames, frame_size=f_b_frames[0].shape, batch_size=len(f_b_frames))[0]
create_sub_datasets(ref_image, patch_size, 'Video/densityFrame_pkl/videos_density_', "datasets/dictLearn_frag/dictionary_learning_fragility_dataset_", verbose = True)
#cvu.merge_dataset("dataset/dictionary_learning_lightness_dataset", "dataset/dictionary_learning_lightness_dataset.csv", verbose = True)

### TEMPORARY DATASET REARRANGEMENT

In [None]:
#function to get the (indexes of the) usless columns of a sub-datast 
def get_child(dic, index, path_to_read, range_videos, verbose = False):
    
    if verbose:
        print("Extracting data for the videos from {} to {}".format((j-1)*10+1, j*10))
        start = time.time()
            
    X = pd.read_csv(path_to_read+str(j)+".csv").as_matrix()
    
    if verbose:
        print("Complete for the videos from {} to {}".format((j-1)*10+1, j*10))
        print("Time spent: %s" % (time.time()-start))
        print("Getting non relevant features from the videos from {} to {}".format((j-1)*10+1, j*10))
        start = time.time()
        
    dic[index] = cvu.get_non_relevant_features(X)
    
    if verbose: 
        print("Complete for the videos from {} to {}".format((j-1)*10+1, j*10))
        print("Time spent: %s" % (time.time()-start))

In [None]:
#function to delete the usless columns of a sub-datast 
def cut_child(j, path_to_read, range_videos, indexes, verbose = False):
    
    if verbose:
        print("Extracting data for the videos from {} to {}".format((j-1)*10+1, j*10))
        start = time.time()
        
    X = pd.read_csv(path_to_read+str(j)+".csv").as_matrix()

    if verbose:
        print("Complete for the videos from {} to {}".format((j-1)*10+1, j*10))
        print("Time spent: %s" % (time.time()-start))
        print("Cutting non relevant features from the videos from {} to {}".format((j-1)*10+1, j*10))
        start = time.time()
        
        
    cvu.cut_non_relevant_features(X, indexes)

    if verbose: print("Rewriting data for the videos from {} to {}".format((j-1)*10+1, j*10))

    df = pd.DataFrame(X)
    df.to_csv(path_to_read+str(j)+".csv", index=False)
    
    if verbose: 
        print("Complete for the videos from {} to {}".format((j-1)*10+1, j*10))
        print("Time spent: %s" % (time.time()-start))

In [None]:
# function to rearrange the columns of each sub-dataset in order to group them atom by atom
def rearrange_child(j, path_to_read, range_videos, verbose = False):
    
    if verbose: 
            print("Extracting data for the videos from {} to {}".format((j-1)*10+1, j*10))
            start = time.time()
            
    X = pd.read_csv(path_to_read+str(j)+".csv")
    X = X.values()
    
    if verbose:
            print("Complete. Time spent: %s" % (time.time()-start))
            print("Rearranging the dataset for the videos from {} to {}".format((j-1)*10+1, j*10))
            start = time.time()
    
    new_X = np.zeros(X.shape)

    col_per_atom = X.shape[1]//25
    # rearranging
    for r in range(25):
        for c in range(col_per_atom):
            new_X[:,r*col_per_atom+c] = X[:, c*25+r]

    if verbose: print("Rewriting data for the videos from {} to {}".format((j-1)*10+1, j*10))

    df = pd.DataFrame(new_X)
    df.to_csv(path_to_read+str(j)+".csv", index=False)
    
    if verbose: 
        print("Complete for the videos from {} to {}".format((j-1)*10+1, j*10))
        print("Time spent: %s" % (time.time()-start))

In [None]:
#function to get the list of the (indexes of the) usless columns of the datast 
def get_non_relevant_features(proc, path_to_read, range_videos, verbose = False):
    
    list_non_relevant_features = []
    jobs = []
    manager = multiprocessing.Manager()
    return_dict = manager.dict()
    
    for j in range_videos:
        
        if j != 0 and j%proc == 0:
            
            for p in jobs:
                p.join()
                
            for i, p in enumerate(jobs):
                list_non_relevant_features.append(return_dict[i])
            
            jobs = []
            return_dict =  manager.dict()
            
        
        process = multiprocessing.Process(target=get_child, args=[return_dict, j, path_to_read, range_videos, verbose])
        jobs.append(process)
        process.start()
        
        if j != range_videos[-1]:
            for p in jobs:
                p.join()
                
            for i, p in enumerate(jobs):
                list_non_relevant_features.append(return_dict[i])
            
            jobs = []
            
    return list_non_relevant_features

In [None]:
#function to delete the usless columns of the datast 
def cut_non_relevant_features(proc, path_to_read, range_videos, indexes, verbose = False):
    
    jobs = []
    
    for j in range_videos:
        
        if j != 0 and j%proc == 0:
            
            for p in jobs:
                p.join()
            
            jobs = []

        process = multiprocessing.Process(target=cut_child, args=[j, path_to_read, range_videos, indexes, verbose])
        jobs.append(process)
        process.start()
            
        if j == range_videos[-1]:
            
            for p in jobs:
                p.join()
            
            jobs = []


In [None]:
# function to rearrange the columns of the dataset in order to group them atom by atom
def rearrange_datasets(proc, path_to_read, range_videos, verbose = False):
    jobs = []
    
    for j in range_videos:
        
        if j != 0 and j%proc == 0:
            
            for p in jobs:
                p.join()
            
            jobs = []
            
        
        process = multiprocessing.Process(target=rearrange_child, args=[j, path_to_read, range_videos, verbose])
        jobs.append(process)
        process.start()
        
        if j == range_videos[-1]:
            
            for p in jobs:
                p.join()
            
            jobs = []

In [None]:
rearrange_datasets(2, "datasets/dictLearn_frag/dictionary_learning_fragility_dataset_", range(1,16), verbose = True)

In [None]:
rearrange_datasets(2, "datasets/dictLearn_light/dictionary_learning_lightness_dataset", range(2,16), verbose = True)

In [None]:
list_non_relevant_features = get_non_relevant_features(2, "datasets/dictLearn_frag/dictionary_learning_fragility_dataset_", range(1,16), verbose = True)

intersection = set(list_non_relevant_features[-1])

for l in list_non_relevant_features[:-1]: intersection = set(l) & intersection
    
intersection = list(intersection)

print(len(intersection))

In [None]:
cut_non_relevant_features(2, "datasets/dictLearn_frag/dictionary_learning_fragility_dataset_", range(1,16), intersection, verbose = True)

In [None]:
cvu.merge_dataset("datasets/dictLearn_frag/dictionary_learning_fragility_dataset_", "datasets/dictionary_learning_fragility_dataset.csv", verbose = True)

In [None]:
def rearrange_datasets_v2(path_to_read, range_videos, verbose = False):
    
    for j in range_videos:

        X_list =[]
        
        if verbose: 
            print("Extracting data for the videos from {} to {}".format((j-1)*10+1, j*10))
            start = time.time()

        chunksize = 1
        for X in pd.read_csv(path_to_read+str(j)+".csv", chunksize=chunksize):
            #X = chunk.values()
            
            if verbose:
                print("Rearranging the dataset for the videos from {} to {}".format((j-1)*10+1, j*10))
                

            new_X = np.zeros(X.shape)
            print(X.shape)

            col_per_atom = X.shape[1]//25
            for r in range(25):
                for c in range(col_per_atom):
                    new_X[r*col_per_atom+c] = X[c*25+r]
                    
            X_list.append(new_X)

        if verbose:
            print("Complete. Time spent: %s" % (time.time()-start))
            print("Rewriting data for the videos from {} to {}".format((j-1)*10+1, j*10))
            start = time.time()

       
        df = pd.DataFrame(np.asmatris(X_list))
        df.to_csv(path_to_read+str(j)+".csv", index=False)

        if verbose: print("Complete. Time spent: %s" % (time.time()-start))

In [None]:
rearrange_datasets_v2("datasets/dictLearn_light/dictionary_learning_lightness_dataset", range(2,16), verbose = True)

## REGRESSION

### LOADING

In [None]:
#mark loading

names, Y = cvu.load_marks()

Y = Y.as_matrix().astype(float)
Y_l = Y[:,0]
Y_f = Y[:,1]

names = sorted(names)

In [None]:
X = pd.read_csv("datasets/density_datasetV2.csv").values

#deleting of the entrace with issues
delIdx = np.where(np.isnan(X))[0]
X = np.delete(X, delIdx, axis=0)
Y = np.delete(Y, delIdx, axis=0)

Y_l = Y[:, 0]
Y_f = Y[:, 1]

In [None]:
X_tr, Y_l_tr, Y_f_tr, X_ts, Y_l_ts, Y_f_ts = cvu.random_sampling(X, Y_l, Y_f)

### TESTING

In [None]:
alphas = {'alpha': list(np.arange(0.001, 1, 0.007))}

# LIGHTNESS
cvu.print_results('RLS', cvu.rlsCV_regression(alphas, X_tr, X_ts, Y_l_tr, Y_l_ts))

# FRAGILITY
cvu.print_results('RLS', cvu.rlsCV_regression(alphas, X_tr, X_ts, Y_f_tr, Y_f_ts))