In [None]:
import os

def get_random_samples_by_label(
    base: np.ndarray,
    label: int,
    n_samples: Optional[int] = 0,
) -> np.ndarray:
    samples = base[base[:, -1] == label]

    if n_samples:
        indexes = np.random.choice(samples.shape[0], n_samples, replace=False)
        return samples[indexes]

    return samples



def get_random_samples(base: np.ndarray, qtd_segments: int) -> np.ndarray:
    if qtd_segments:
        qtd_segments = int(qtd_segments / 2)
    negatives = get_random_samples_by_label(base=base, label=0, n_samples=qtd_segments)
    positives = get_random_samples_by_label(base=base, label=1, n_samples=qtd_segments)
    base_ready = np.vstack((positives, negatives))
    return base_ready


def extract_segments_bins() -> np.ndarray:
    directory = "./segments_bins"
    
     params = {
        "m_bins": 25,
        "qtd_segments": False,
        "segment_size": 25,
        "range_hist": (0, 2500),
    }
    
    
    segment_size = params.get("segment_size")
    m_bins = params.get("m_bins")
    range_hist = params.get("range_hist")
    qtd_segments = params.get("qtd_segments")

    if not os.path.exists(directory):
        os.makedirs(directory)

    filename = f"mb{m_bins}_ss{segment_size}_rh{range_hist[0]}-{range_hist[1]}.npy"

    path = os.path.join(directory, filename)
    
    print(path)

    if os.path.isfile(path):
        segment_bins = np.load(path)
        return get_random_samples(segment_bins, qtd_segments)
    

def extract_segments_tq() -> np.ndarray:
    directory = "./tq_segments"
    
    if not os.path.exists(directory):
        os.makedirs(directory)

    filename = f"tq_segments.npy"

    path = os.path.join(directory, filename)

    if os.path.isfile(path):
        segment_bins = np.load(path)
        return get_random_samples(segment_bins, False)


def classifier_ensembled():

    cross_validation = StratifiedKFold(n_splits=10, shuffle=True)


    
    y = base[:, -1]
    X = base[:, 0:-1]

    y_pred1 = []
    y_pred2 = []
    
    for train_index, test_index in cross_validation.split(X, y):
        x_train_fold, x_test_fold = X[train_index], X[test_index]
        y_train_fold, y_test_fold = y[train_index], y[test_index]
        
        model1.fit(x_train_fold, y_train_fold)
        y_pred1.append(model1.predict_proba(x_test_fold))
        
        model2.fit(x_train_fold, y_train_fold)
        y_pred2.append(model2.predict(x_test_fold))
        
        
    y_pred1 = [item for sublist in y_pred1 for item in sublist]
    y_pred2 = [item for sublist in y_pred2 for item in sublist]

    combined_predictions = soft_voting([y_pred1, y_pred2])
    
    return combined_predictions