# Divide the scan into different brain sections 

In [2]:
from itertools import chain
import os
import numpy as np
current_dir = os.getcwd()

# let's see how it goes
npy_file_path = os.path.join(current_dir, 'ihb.npy')

all_data = np.load(npy_file_path)

In [3]:
all_data.shape

(320, 10, 246)

In [4]:
import pandas as pd
num_nans = [np.isnan(all_data[i]).sum() for i in range(all_data.shape[0])]
nan_values = pd.Series(num_nans)
nan_values.value_counts()

0      160
460    160
Name: count, dtype: int64

In [5]:
g1 = [all_data[i] for i in range(all_data.shape[0]) if np.isnan(all_data[i]).sum() == 0]
g2 = [all_data[i] for i in range(all_data.shape[0]) if np.isnan(all_data[i]).sum() == 460]

g2_nan_distribution = np.zeros(shape=(len(g2), len(g2)))

for i1, element1 in enumerate(g2):
	for i2, element2 in enumerate(g2):
		g2_nan_distribution[i1][i2] = all(np.sum(np.isnan(element1), axis=1) == np.sum(np.isnan(element2), axis=1))

g2 = [scan[:, :-46] for scan in g2]

# The first step: group scans of the same scan but with different smoothing

In [6]:
# we know for a fact that we have 160 samples belonging to 20 subjects each having

# let's calculate the auto correlation of each time sequence in each of scan
from typing import Union, List
from scipy.signal import correlate2d, correlate

def autocorrelation_stats(scan: np.ndarray, aggregate:bool=True) -> Union[List, float]:	
	assert len(scan) == 10
	auto_correlations =  [float(correlate(scan[i:], scan[:-i])) for i in range(1, 6)]
	if aggregate:
		return np.mean(auto_correlations)
	return auto_correlations

def build_ac_pairs(scans: List[np.ndarray]) -> set:
	auto_corrs = np.zeros(shape=(len(scans), len(scans)))

	for i1, element1 in enumerate(scans):
		for i2, element2 in enumerate(scans):
			auto_corrs[i1][i2] = correlate2d(element1, element2, "valid").item()	
	
	# for each row, row[0] represents the closest index to scan[i] in terms of auto correlation
	# row[1] represents the same index
	paired_scans_by_ac = np.argsort(auto_corrs, axis=-1)[:, -2:]

	pairs = set()

	for i in range(len(scans)):
		assert paired_scans_by_ac[i, 1] == i, "check the code"
		closest_scan_index = paired_scans_by_ac[i, 0]
		if paired_scans_by_ac[closest_scan_index, 0] == i and (closest_scan_index, i) not in pairs:
			pairs.add((i, closest_scan_index)) 

	return pairs

In [7]:
g1_pairs, g2_pairs = build_ac_pairs(g1), build_ac_pairs(g2)

In [8]:
len(g1_pairs), len(g2_pairs)

(80, 80)

In [9]:
def unified_segment_rep(scans: List[np.ndarray], pairs_indices: set) -> List[np.ndarray]:
	avg_segments = []
	for i1, i2 in pairs_indices:
		s1, s2  = scans[i1], scans[i2]
		if s1.shape != s2.shape:
			raise ValueError("Make sure the code is correct. found pairs with different shapes")
		avg_segments.append((s1 + s2) / 2)
	return avg_segments

In [10]:
avg_g1, avg_g2 = unified_segment_rep(g1, g1_pairs), unified_segment_rep(g2, g2_pairs)

In [11]:
def compute_auto_corr_concatenation(seg1: np.ndarray, seg2: np.ndarray):
	assert seg1.shape == seg2.shape, "both segments same shape"
	c_seg1 = np.concatenate([seg1, seg2], axis=0)
	c_seg2 = np.concatenate([seg2, seg1], axis=0)

	assert c_seg1.shape[0] == 2 * seg1.shape[0] and c_seg1.shape[1] == seg1.shape[1], "concatenation correct"
	assert c_seg2.shape[0] == 2 * seg2.shape[0] and c_seg2.shape[1] == seg2.shape[1], "concatenation correct"

	c1 = np.mean([correlate2d(c_seg1[i:i + len(seg1), :], seg1, "valid").item() for i in range(len(seg1))])
	c2 = np.mean([correlate2d(c_seg2[i:i + len(seg1), :], seg2, "valid").item() for i in range(len(seg1))])
	return max(c1, c2)


# Reduce 80 to 20

In [None]:
# let's see fi 