In [17]:
import os
import cv2
import numpy as np
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
import csv
from skimage import feature
import pandas as pd

In [26]:
#Filtro
def apply_clahe_rgb(img):
    # convertir a LAB y aplicar CLAHE en L
    lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    l2 = clahe.apply(l)
    lab2 = cv2.merge((l2, a, b))
    return cv2.cvtColor(lab2, cv2.COLOR_LAB2BGR)

def process_image(args):
    input_path, output_path = args
    try:
        img = cv2.imread(input_path)
        if img is None:
            return (os.path.basename(input_path), False, "No se pudo cargar la imagen")
        
        img_filtered = apply_clahe_rgb(img)
        
        img_resized = cv2.resize(img_filtered, (100,100), interpolation=cv2.INTER_AREA)
        compression = [cv2.IMWRITE_PNG_COMPRESSION, 9]
        cv2.imwrite(output_path, img_resized, compression)

        return (os.path.basename(input_path), True)
    
    except Exception as e:
        return (os.path.basename(input_path), False, str(e))
    
def process_imgs_parallel(input_path, output_path, n_workers=None):
    Path(output_path).mkdir(parents=True, exist_ok=True)
    img_files = list(Path(input_path).rglob('*.png'))
    
    folders = {}
    for img in img_files:
        folder = img.parent.name
        folders[folder] = folders.get(folder, 0) + 1

    args_list = []
    for img_path in img_files:
        relative_path = img_path.relative_to(input_path)
        output_dir = Path(output_path) / relative_path
        output_dir.parent.mkdir(parents=True, exist_ok=True)

        args_list.append((str(img_path),
                        str(output_dir)))
        
    if(n_workers==None):
        n_workers = min(32, (os.cpu_count() or 4) * 4)
    
    total = len(args_list)
    success = 0
    failed = 0
    with ThreadPoolExecutor(max_workers=n_workers) as executor:
        print(f"Procesando {str(total)} imagenes")
        with tqdm(total=total, desc='Progreso', unit="img") as bar:
            for result in executor.map(process_image, args_list):
                if result[1]:
                    success += 1
                else:
                    failed += 1
                    print(result[2])
                bar.update(1)
    print(f"Exito: {success}")
    print(f"Error: {failed}")
    

    

In [28]:
process_imgs_parallel('COVID-19_Radiography_Dataset', 'Imgs')

Procesando 21165 imagenes


Progreso: 100%|██████████| 21165/21165 [00:21<00:00, 978.82img/s] 

Exito: 21165
Error: 0





In [7]:
class LocalBinaryPatterns:
    def __init__(self, radius, num_points):
        self.num_points = num_points
        self.radius = radius

    def describe(self, image, eps=1e-7):
        lbp = feature.local_binary_pattern(image, self.num_points, self.radius, method="uniform")
        (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, self.num_points + 3),
                                 range=(0, self.num_points + 2))
		# normalizar
        hist = hist.astype("float")
        hist /= (hist.sum() + eps)
		
        return hist

In [15]:
def hist_to_csv(input_path, output_file):
    img_files = list(Path(input_path).rglob('*.png'))
    descriptor = LocalBinaryPatterns(8, 24)
    header = True
    with open(output_file, 'w', newline='') as f:
        writer = csv.writer(f)

        for img_path in img_files:
            img = cv2.imread(img_path)
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            hist = descriptor.describe(gray)

            if header:
                pixels = len(hist)
                headers = ['target'] + [f'hist_{j}' for j in range(pixels)]
                writer.writerow(headers)
                header = False
            
            img_name = os.path.basename(img_path)
            target = img_name.split('-')[0]

            row = [target] + hist.tolist()
            writer.writerow(row)
    print('Histogramas creados')

In [16]:
hist_to_csv('Imgs', 'COVID-19_DF.csv')

Histogramas creados


In [26]:
df = pd.read_csv('COVID-19_DF.csv')
targets = pd.DataFrame(df.iloc[:, 0])
target_values = pd.DataFrame(data=targets.value_counts())

display(target_values)

df.head(10)

Unnamed: 0_level_0,count
target,Unnamed: 1_level_1
Normal,10192
Lung_Opacity,6012
COVID,3616
Viral Pneumonia,1345


Unnamed: 0,target,hist_0,hist_1,hist_2,hist_3,hist_4,hist_5,hist_6,hist_7,hist_8,...,hist_16,hist_17,hist_18,hist_19,hist_20,hist_21,hist_22,hist_23,hist_24,hist_25
0,COVID,0.0333,0.0219,0.0143,0.0188,0.0188,0.0151,0.018,0.0226,0.0269,...,0.0187,0.0179,0.015,0.0102,0.0067,0.0082,0.0098,0.0086,0.0142,0.346
1,COVID,0.0396,0.0414,0.0235,0.0085,0.0065,0.0048,0.0084,0.0088,0.0124,...,0.0076,0.0128,0.0081,0.0091,0.0071,0.0116,0.0133,0.016,0.0201,0.5386
2,COVID,0.0275,0.0353,0.0247,0.0177,0.0111,0.0106,0.0144,0.0188,0.0243,...,0.0154,0.0144,0.007,0.0097,0.0109,0.0156,0.0122,0.0102,0.0167,0.3783
3,COVID,0.0204,0.0211,0.0247,0.0155,0.013,0.0152,0.0168,0.0261,0.0343,...,0.0155,0.0207,0.0133,0.0137,0.0078,0.0082,0.0053,0.0074,0.0158,0.3042
4,COVID,0.0343,0.0307,0.0113,0.0074,0.0084,0.0126,0.0118,0.0136,0.0151,...,0.0149,0.0251,0.0095,0.0112,0.0065,0.0136,0.0093,0.0148,0.043,0.4499
5,COVID,0.0255,0.0295,0.0161,0.0113,0.0106,0.0134,0.0132,0.0155,0.016,...,0.0104,0.0118,0.0096,0.0085,0.0075,0.0118,0.0129,0.0188,0.0262,0.543
6,COVID,0.0327,0.0305,0.0171,0.0123,0.0067,0.0065,0.0113,0.0136,0.02,...,0.0104,0.0093,0.0067,0.0066,0.0036,0.0078,0.0096,0.0145,0.0229,0.4996
7,COVID,0.0471,0.0424,0.0119,0.0062,0.0028,0.0047,0.0045,0.0083,0.0095,...,0.0063,0.0152,0.0083,0.0105,0.0055,0.0107,0.0099,0.0116,0.0242,0.5769
8,COVID,0.0336,0.0388,0.0207,0.0144,0.0057,0.0053,0.0054,0.0082,0.0094,...,0.0147,0.0125,0.0064,0.0061,0.0058,0.008,0.0096,0.0135,0.0258,0.5163
9,COVID,0.0408,0.0382,0.0199,0.0115,0.0113,0.0097,0.0103,0.0124,0.0142,...,0.01,0.0109,0.0089,0.0109,0.0073,0.0099,0.0111,0.0145,0.0193,0.5353
