In [None]:
from google.colab import userdata
username = userdata.get('KAGGLE_USER')
key = userdata.get('KAGGLE_KEY')
# Echo the credentials into the kaggle.json file
!mkdir -p ~/.kaggle
!echo '{{"username":"{username}","key":"{key}"}}' > ~/.kaggle/kaggle.json
!chmod 600 /root/.kaggle/kaggle.json

In [None]:
# Download dataset
!kaggle competitions download -c liver-ultrasound-detection

In [None]:
!unzip /content/liver-ultrasound-detection.zip && rm -rf /content/liver-ultrasound-detection.zip

---

# Read img

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Load the image: machine negative
image_tag = 19793
image_path = f'/content/train/train/images/{image_tag}.jpg'
image = cv2.imread(image_path, cv2.COLOR_BGR2RGB)
if type(image) == type(None):
  image_path = f'/content/val/val/images/{image_tag}.jpg'
  image = cv2.imread(image_path, cv2.COLOR_BGR2RGB)

height, width = image.shape[0], image.shape[1]
plt.imshow(image)
plt.show()

In [None]:
# Load the image: machine positive
image_tag = 93078
image_path = f'/content/train/train/images/{image_tag}.jpg'
image = cv2.imread(image_path, cv2.COLOR_BGR2RGB)
if type(image) == type(None):
  image_path = f'/content/val/val/images/{image_tag}.jpg'
  image = cv2.imread(image_path, cv2.COLOR_BGR2RGB)

height, width = image.shape[0], image.shape[1]
plt.imshow(image)
plt.show()

In [None]:
# Load the image: mobile negative
image_tag = 3544
image_path = f'/content/train/train/images/{image_tag}.jpg'
image = cv2.imread(image_path, cv2.COLOR_BGR2RGB)
if type(image) == type(None):
  image_path = f'/content/val/val/images/{image_tag}.jpg'
  image = cv2.imread(image_path, cv2.COLOR_BGR2RGB)

height, width = image.shape[0], image.shape[1]
plt.imshow(image)
plt.show()

In [None]:
# Load the image: mobile positive
image_tag = 128189
image_path = f'/content/train/train/images/{image_tag}.jpg'
image = cv2.imread(image_path, cv2.COLOR_BGR2RGB)
if type(image) == type(None):
  image_path = f'/content/val/val/images/{image_tag}.jpg'
  image = cv2.imread(image_path, cv2.COLOR_BGR2RGB)

height, width = image.shape[0], image.shape[1]
plt.imshow(image)
plt.show()

## add bounding box

In [None]:
def add_boxes(image, tag):
  image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
  train_annotation_path = f'/content/train/train/annotations/{tag}.txt'
  test_annotation_path = f'/content/val/val/annotations/{tag}.txt'

  try:
    # Read the YOLO annotation file
    with open(train_annotation_path, 'r') as file:
      lines = file.readlines()
  except:

    with open(test_annotation_path, 'r') as file:
      lines = file.readlines()

  for line in lines:
    print(line)
    # Split the line into components
    components = line.strip().split()
    class_id = int(components[0])
    x_center = float(components[1])
    y_center = float(components[2])
    bbox_width = float(components[3])
    bbox_height = float(components[4])

    # Convert normalized coordinates to pixel coordinates
    x_center_pixel = int(x_center * width)
    y_center_pixel = int(y_center * height)
    bbox_width_pixel = int(bbox_width * width)
    bbox_height_pixel = int(bbox_height * height)

    # Calculate the top-left and bottom-right corners of the bounding box
    top_left = (x_center_pixel - bbox_width_pixel // 2, y_center_pixel - bbox_height_pixel // 2)
    bottom_right = (x_center_pixel + bbox_width_pixel // 2, y_center_pixel + bbox_height_pixel // 2)

    # Draw the rectangle on the image
    color = (0, 255, 0)  # Green color for bounding box
    thickness = 3
    cv2.rectangle(image, top_left, bottom_right, color, thickness)

    text_color = (255, 255, 255)
    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = min(width, height) / 1000.0
    font_thickness = max(1, int(font_scale * 2))
    text = str(class_id)
    text_size, _ = cv2.getTextSize(text, font, font_scale, font_thickness)
    text_origin = (top_left[0]+ (bbox_width_pixel//2)-10, top_left[1] - 5)
    # Ensure text is within image bounds
    text_origin = (max(text_origin[0], 0), max(text_origin[1], text_size[1]))
    cv2.putText(image, text, text_origin, font, font_scale, text_color, font_thickness)

    # Convert the image from BGR to RGB format for displaying with Matplotlib
  image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
  return image_rgb

## try to infer

In [None]:
plt.imshow(add_boxes(cv2.cvtColor(image, cv2.COLOR_RGB2GRAY), image_tag), cmap='gray')
plt.show()

In [None]:
plt.imshow(add_boxes(cv2.cvtColor(image, cv2.COLOR_RGB2GRAY), image_tag), cmap='gray')
plt.show()

# Image Enhancement

In [None]:
gray_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
plt.imshow(gray_image, cmap='gray')
plt.show()

## Invert black to white

In [None]:
def invert_lib(image_file, with_plot=True, gray_scale=True):
    cmap_val = None if not gray_scale else 'gray'

    image_i = cv2.bitwise_not(image_file)  # image_i = 255 - image_src

    if with_plot:
        fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(10, 20))

        ax1.axis("off")
        ax1.title.set_text('Original')

        ax2.axis("off")
        ax2.title.set_text("Inverted")

        ax1.imshow(image_file, cmap=cmap_val)
        ax2.imshow(image_i, cmap=cmap_val)
        return True
    return image_i

invert_lib(gray_image)

In [None]:
inv_img = invert_lib(gray_image, with_plot=False)
inv_img.shape

- white to black(RGB)

In [None]:
plt.imshow(cv2.bitwise_and(image, image, inv_img), cmap='gray')
plt.show()

In [None]:
cv2.bitwise_and(image, image, inv_img).shape

## Intensity Transformation

### auto scaling

In [None]:
def auto_scale_intensity(image):
    # Convert the image to float32 type for precision
    image_float = image.astype(np.float32)

    # Compute the minimum and maximum pixel values
    min_val = np.min(image_float)
    max_val = np.max(image_float)

    # Scale the intensity to the range [0, 255]
    scaled_image = 255 * (image_float - min_val) / (max_val - min_val)

    # Convert back to uint8 type
    scaled_image = scaled_image.astype(np.uint8)

    return scaled_image

# Apply auto-scaling
intenT_img = auto_scale_intensity(gray_image)
plt.imshow(intenT_img, cmap='gray')
plt.show()

In [None]:
intenT_img

### contrast Stretching

In [None]:
def contrast_stretching(image):
    # Compute the minimum and maximum pixel values
    min_val = np.min(image)
    max_val = np.max(image)

    # Apply contrast stretching formula
    stretched_image = 255 * (image - min_val) / (max_val - min_val)

    # Convert back to uint8 type
    stretched_image = np.uint8(stretched_image)

    return stretched_image

intenT_img = contrast_stretching(gray_image)

plt.imshow(intenT_img, cmap='gray')
plt.show()

In [None]:
intenT_img

In [None]:
intenT_img = contrast_stretching(inv_img)

plt.imshow(intenT_img, cmap='gray')
plt.show()

In [None]:
intenT_img

### BIMEF

In [None]:
def MSR(grayscale_image, sigma_list):
    """
    Multi-Scale Retinex (MSR) for grayscale images.
    """
    # Convert the image to float32
    grayscale_image = grayscale_image.astype(np.float32) / 255.0

    # Logarithmic transformation
    log_image = np.log1p(grayscale_image)

    # Apply Gaussian blurs with different sigmas
    blurred_images = [cv2.GaussianBlur(log_image, (0, 0), sigma) for sigma in sigma_list]

    # Retinex (subtract the blurred image from the original)
    retinex = np.zeros_like(grayscale_image)
    for blurred in blurred_images:
        retinex += log_image - np.log1p(blurred)

    retinex /= len(sigma_list)

    # Normalize the result to [0, 255]
    msr = (retinex - np.min(retinex)) / (np.max(retinex) - np.min(retinex)) * 255
    msr = np.uint8(msr)

    return msr

# Apply MSRCR with different sigma values for multi-scale retinex
sigma_list = [15, 80, 250]
intenT_img = MSR(gray_image, sigma_list)

plt.imshow(intenT_img, cmap='gray')
plt.show()

In [None]:
intenT_img

In [None]:
# black -> white

intenT_img = MSR(inv_img, sigma_list)

plt.imshow(intenT_img, cmap='gray')
plt.show()

In [None]:
intenT_img

### gamma Correction

In [None]:
def gamma_correction(image, gamma):
    # Build a lookup table mapping pixel values [0, 255] to their adjusted gamma values
    invGamma = 1.0 / gamma
    table = np.array([(i / 255.0) ** invGamma * 255 for i in np.arange(0, 256)]).astype("uint8")

    # Apply gamma correction using the lookup table
    corrected_image = cv2.LUT(image, table)

    return corrected_image

In [None]:
# Set the gamma value
gamma_value = 1  # You can adjust this value as needed

# Apply gamma correction
intenT_img = gamma_correction(gray_image, gamma_value)

plt.imshow(intenT_img, cmap='gray')
plt.show()

In [None]:
# Set the gamma value
gamma_value = 1  # You can adjust this value as needed

# Apply gamma correction
intenT_img = gamma_correction(inv_img, gamma_value)

plt.imshow(intenT_img, cmap='gray')
plt.show()

In [None]:
# Set the gamma value
gamma_value = 3  # You can adjust this value as needed

# Apply gamma correction
intenT_img = gamma_correction(gray_image, gamma_value)

plt.imshow(intenT_img, cmap='gray')
plt.show()

In [None]:
# Set the gamma value
gamma_value = 3  # You can adjust this value as needed

# Apply gamma correction
intenT_img = gamma_correction(inv_img, gamma_value)

plt.imshow(intenT_img, cmap='gray')
plt.show()

In [None]:
# Set the gamma value
gamma_value = -1  # You can adjust this value as needed

# Apply gamma correction
intenT_img = gamma_correction(gray_image, gamma_value)

plt.imshow(add_boxes(intenT_img, image_tag), cmap='gray')
plt.show()

In [None]:
# Set the gamma value
gamma_value = -1  # You can adjust this value as needed

# Apply gamma correction
intenT_img = gamma_correction(inv_img, gamma_value)

plt.imshow(add_boxes(intenT_img, image_tag), cmap='gray')
plt.show()

In [None]:
gamma_value = -3  # You can adjust this value as needed

# Apply gamma correction
intenT_img = gamma_correction(gray_image, gamma_value)

plt.imshow(add_boxes(intenT_img, image_tag), cmap='gray')
plt.show()

In [None]:
# Set the gamma value
gamma_value = -3  # You can adjust this value as needed

# Apply gamma correction
intenT_img = gamma_correction(inv_img, gamma_value)

plt.imshow(add_boxes(intenT_img, image_tag), cmap='gray')
plt.show()

### log Transform

In [None]:
def log_transform(image):
    # Apply logarithmic transformation to the image
    log_image = np.log1p(image.astype(np.float32))

    # Normalize the transformed image to [0, 255]
    log_image = (log_image - np.min(log_image)) / (np.max(log_image) - np.min(log_image)) * 255

    # Convert back to uint8 type
    log_image = np.uint8(log_image)

    return log_image

In [None]:
intenT_img = log_transform(gray_image)

plt.imshow(add_boxes(intenT_img, image_tag), cmap='gray')
plt.show()

In [None]:
intenT_img = log_transform(inv_img)

plt.imshow(add_boxes(intenT_img, image_tag), cmap='gray')
plt.show()

In [None]:
intenT_img = log_transform(gray_image)

plt.imshow(add_boxes(intenT_img, image_tag), cmap='gray')
plt.show()

In [None]:
intenT_img = log_transform(inv_img)

plt.imshow(add_boxes(intenT_img, image_tag), cmap='gray')
plt.show()

## CEUS (Fake by GPT)

In [None]:
def simulate_ceus(image, enhancement_factor=2):
    # Convert image to float32 for processing
    image_float = image.astype(np.float32)

    # Apply enhancement by boosting pixel values
    enhanced_image = image_float * enhancement_factor

    # Normalize the enhanced image to [0, 255] range
    enhanced_image = np.clip(enhanced_image, 0, 255).astype(np.uint8)

    return enhanced_image

intenT_img = simulate_ceus(gray_image)

plt.imshow(add_boxes(intenT_img, image_tag), cmap='gray')
plt.show()

In [None]:
intenT_img = simulate_ceus(inv_img)

plt.imshow(add_boxes(intenT_img, image_tag), cmap='gray')
plt.show()

In [None]:
intenT_img = simulate_ceus(gray_image)

plt.imshow(add_boxes(intenT_img, image_tag), cmap='gray')
plt.show()

In [None]:
intenT_img = simulate_ceus(inv_img)

plt.imshow(add_boxes(intenT_img, image_tag), cmap='gray')
plt.show()

## Histogram Processing

### Histogram Equalization

In [None]:
plt.imshow(cv2.equalizeHist(gray_image), cmap='gray')
plt.show()

### CLAHE

- machine positive

In [None]:
# Step 2: Create a CLAHE object
clahe = cv2.createCLAHE(clipLimit=4.0, tileGridSize=(8, 8))

# Step 3: Apply CLAHE to the grayscale image
clahe_image = clahe.apply(gray_image)
plt.imshow(clahe_image, cmap='gray')
plt.show()

- mobile positive

In [None]:
# Step 2: Create a CLAHE object
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))

# Step 3: Apply CLAHE to the grayscale image
clahe_image = clahe.apply(gray_image)
plt.imshow(clahe_image, cmap='gray')
plt.show()

In [None]:
clahe_image, clahe_image.shape

## Spatial Filter

### Smoothing (Blurring)

#### Averaging

In [None]:
blur = cv2.blur(gray_image, (5,5))   # kernel of 5x5 size

plt.subplot(121),plt.imshow(gray_image, cmap='gray'),plt.title('Original')
plt.xticks([]), plt.yticks([])
plt.subplot(122),plt.imshow(blur, cmap='gray'),plt.title('Blurred')
plt.xticks([]), plt.yticks([])
plt.show()

#### Gaussian Blurring

In [None]:
blur = cv2.blur(gray_image, (5,5), 0.2)   # kernel of 5x5 size

plt.subplot(121),plt.imshow(gray_image, cmap='gray'),plt.title('Original')
plt.xticks([]), plt.yticks([])
plt.subplot(122),plt.imshow(blur, cmap='gray'),plt.title('Blurred')
plt.xticks([]), plt.yticks([])
plt.show()

#### Median Blurring

In [None]:
blur = cv2.medianBlur(gray_image, 5, 0.2)   # kernel of 5x5 size

plt.subplot(121),plt.imshow(gray_image, cmap='gray'),plt.title('Original')
plt.xticks([]), plt.yticks([])
plt.subplot(122),plt.imshow(blur, cmap='gray'),plt.title('Blurred')
plt.xticks([]), plt.yticks([])
plt.show()

#### Bilateral Filtering

In [None]:
blur = cv2.bilateralFilter(gray_image,9,75,75)   # kernel of 5x5 size

plt.subplot(121),plt.imshow(gray_image, cmap='gray'),plt.title('Original')
plt.xticks([]), plt.yticks([])
plt.subplot(122),plt.imshow(blur, cmap='gray'),plt.title('Blurred')
plt.xticks([]), plt.yticks([])
plt.show()

### Sharpening

In [None]:
plt.imshow(cv2.Laplacian(gray_image,cv2.CV_64F), cmap='gray')
plt.show()

# P' Pim filter & dataset

In [None]:
def extract_ultrasound(image, imtype='mobile'):
  if imtype == 'machine' or imtype == 'mobile':
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    print(np.mean(gray))
    ret,th1 = cv2.threshold(gray, np.mean(gray), 255 ,cv2.THRESH_BINARY)
    # ret,th1 = cv2.threshold(gray, np.mean(gray),255 ,cv2.THRESH_BINARY)
    im_floodfill = th1.copy()
    h, w = im_floodfill.shape[:2]
    mask_floodfill = np.zeros((h+2, w+2), np.uint8)
    cv2.floodFill(im_floodfill, mask_floodfill, (0,0), 255)
    filled_mask = cv2.bitwise_or(th1, cv2.bitwise_not(im_floodfill))

    # Smooth the left edge
    contours, _ = cv2.findContours(filled_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    for contour in contours:
        if cv2.contourArea(contour) > 500:  # Only consider significant contours
            epsilon = 0.01 * cv2.arcLength(contour, True)
            approx = cv2.approxPolyDP(contour, epsilon, True)
            hull = cv2.convexHull(approx)
            cv2.drawContours(filled_mask, [hull], 0, (255), thickness=cv2.FILLED)

    flipped_mask = cv2.flip(filled_mask, 1)
    sum_mask = filled_mask + flipped_mask
    im_floodfill = sum_mask.copy()
    h, w = im_floodfill.shape[:2]
    mask_floodfill = np.zeros((h+2, w+2), np.uint8)
    cv2.floodFill(im_floodfill, mask_floodfill, (0,0), 255)
    filled_mask = cv2.bitwise_or(sum_mask, cv2.bitwise_not(im_floodfill))
    cleaned_mask = cv2.morphologyEx(filled_mask, cv2.MORPH_OPEN, np.ones((5, 5), np.uint8))
    contours, _ = cv2.findContours(cleaned_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    hull_mask = np.zeros_like(gray, dtype=np.uint8)
    if contours:
        largest_contour = max(contours, key=cv2.contourArea)
        hull = cv2.convexHull(largest_contour)
        cv2.drawContours(hull_mask, [hull], -1, 255, thickness=cv2.FILLED)
    masked_image = cv2.bitwise_and(image, image, mask=hull_mask)

  else: #Gital's generated US

    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    adapthresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
                                              cv2.THRESH_BINARY, 199, 2)
    ret, mask = cv2.threshold(adapthresh, 0, 255, cv2.THRESH_BINARY )# for generated mobile-like
    kernel = np.ones((5, 5), np.uint8)
    opened_mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
    num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(opened_mask, connectivity=8)
    largest_component = 1 + np.argmax(stats[1:, cv2.CC_STAT_AREA])
    largest_mask = np.zeros_like(opened_mask)
    largest_mask[labels == largest_component] = 255
    im_floodfill = largest_mask.copy()
    h, w = im_floodfill.shape[:2]
    mask_floodfill = np.zeros((h+2, w+2), np.uint8)
    cv2.floodFill(im_floodfill, mask_floodfill, (0,0), 255)
    filled_mask = cv2.bitwise_or(largest_mask, cv2.bitwise_not(im_floodfill))
    contours, _ = cv2.findContours(filled_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    for contour in contours:
        if cv2.contourArea(contour) > 500:  # Only consider significant contours
            epsilon = 0.01 * cv2.arcLength(contour, True)
            approx = cv2.approxPolyDP(contour, epsilon, True)
            hull = cv2.convexHull(approx)
            cv2.drawContours(filled_mask, [hull], 0, (255), thickness=cv2.FILLED)
    masked_image = cv2.bitwise_and(image, image, mask=filled_mask)

  return masked_image

In [None]:
def display_image(image, title, cmap='gray'):
    """Displays an image using matplotlib."""
    plt.imshow(image, cmap=cmap)
    plt.title(title)
    plt.axis('off')
    plt.show()

In [None]:
import os

def append_files_in_directory(directory_path):
    # List all items in the directory
    all_items = os.listdir(directory_path)
    # Filter out only files (not directories)
    files = [os.path.join(directory_path, item) for item in all_items if os.path.isfile(os.path.join(directory_path, item))]
    return files

def check_twin(img_path):
  # Get dimensions
  image = cv2.imread(img_path)
  image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
  height, width = image.shape

  # Calculate the dimensions for the crop
  crop_width = width // 3
  crop_height = height // 2
  left = (width - crop_width) // 2
  top = 0  # Start from the top

  # Define the crop box
  right = left + crop_width
  bottom = top + crop_height

  # Crop the image
  cropped_image = image[top:bottom, left:right]

  # return
  if np.mean(cropped_image) >= np.mean(image):
    return "once"
  else:
    return "twice"

train_ls = append_files_in_directory('/content/train/train/images')
val_ls = append_files_in_directory('/content/val/val/images')
test_ls = append_files_in_directory('/content/test/test/images')

twin_train = pd.Series(train_ls).progress_apply(check_twin)
twin_val = pd.Series(val_ls).progress_apply(check_twin)
twin_test = pd.Series(test_ls).progress_apply(check_twin)

In [None]:
twin_train.value_counts()

In [None]:
twin_val.value_counts()

In [None]:
twin_test.value_counts()

In [None]:
tw_df_train = pd.DataFrame(data={"file": train_ls, 'twin': twin_train})
tw_df_val = pd.DataFrame(data={"file": val_ls, 'twin': twin_val})
tw_df_test = pd.DataFrame(data={"file": test_ls, 'twin': twin_test})

In [None]:
tw_df_train = tw_df_train[tw_df_train['twin'] == "twice"]
tw_df_val = tw_df_val[tw_df_val['twin'] == "twice"]
tw_df_test = tw_df_test[tw_df_test['twin'] == "twice"]

In [None]:
df_tw = pd.concat([tw_df_train, tw_df_val, tw_df_test])
df_tw

In [None]:
df_tw.to_csv('twin_liver.csv', index=False)

In [None]:
image_path = '/content/train/train/images/1713.jpg'

image = cv2.imread(image_path)
display_image(image, 'mobile train', cmap=None)

test = extract_ultrasound(image, imtype='mobile')
display_image(test, 'twin tail', cmap=None)

In [None]:
# Get dimensions
height, width, _ = image.shape

# Calculate the dimensions for the crop
crop_width = width // 3
crop_height = height // 2
left = (width - crop_width) // 2
top = 0  # Start from the top

# Define the crop box
right = left + crop_width
bottom = top + crop_height

# Crop the image
cropped_image = image[top:bottom, left:right]
display_image(cropped_image, 'crob twin', cmap=None)
np.mean(cropped_image)

In [None]:
image_path = '/content/train/train/images/4483.jpg'

image = cv2.imread(image_path)
display_image(image, 'mobile train', cmap=None)

test = extract_ultrasound(image, imtype='mobile')
display_image(test, 'output', cmap=None)

In [None]:
image_path = '/content/val/val/images/100.jpg'

image = cv2.imread(image_path)
display_image(image, 'mobile val', cmap=None)

test = extract_ultrasound(image, imtype='mobile')
display_image(test, 'output', cmap=None)

In [None]:
image_path = '/content/test/test/images/10001.jpg'

image = cv2.imread(image_path)
display_image(image, 'machine test', cmap=None)

test = extract_ultrasound(image, imtype='machine')
display_image(test, 'output', cmap=None)

In [None]:
image_path = '/content/val/val/images/20584.jpg'

image = cv2.imread(image_path)
display_image(image, 'machine val')

test = extract_ultrasound(image, imtype='machine')
display_image(test, 'val', cmap=None)

In [None]:
image_path = '/content/train/train/images/103315.jpg'

image = cv2.imread(image_path)
display_image(image, 'machine train')

test = extract_ultrasound(image, imtype='machine')
display_image(test, 'val', cmap=None)

# Export Data

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import shutil
import pandas as pd

In [None]:
!mkdir ganset512_t38k_v200
!mkdir ganset512_t38k_v200/train
!mkdir ganset512_t38k_v200/train/machine
!mkdir ganset512_t38k_v200/train/mobile
!mkdir ganset512_t38k_v200/val
!mkdir ganset512_t38k_v200/val/machine
!mkdir ganset512_t38k_v200/val/mobile

## ganset512_t38k_v200

In [None]:
import os
import shutil
from PIL import Image
from sklearn.model_selection import train_test_split
from tqdm import tqdm

# Define paths
pim_path = '/content/drive/MyDrive/superAI_lv2/hackathon/liver_detec_ultrasound/512x512'  # Adjust this path if needed

# Define categories
categories = ['machine_images', 'mobile_images']

# Process each category
for category in categories:
    category_path = os.path.join(pim_path, category)
    image_files = [f for f in os.listdir(category_path) if os.path.isfile(os.path.join(category_path, f))]

    # Split data into 95% train and 5% test
    train_files, val_files = train_test_split(image_files, test_size=0.05, shuffle=True, random_state=888)
    kind = 'machine' if 'machine' in category else 'mobile'

    # Resize and move train images
    for i, file_name in tqdm(enumerate(train_files)):
        src_path = os.path.join(category_path, file_name)
        dst_path = os.path.join('/content/ganset512_t38k_v200/train/', kind, file_name)

        image = cv2.imread(src_path, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, (512, 512))
        cv2.imwrite(dst_path, image)

    # Resize and move test images
    for i, file_name in tqdm(enumerate(val_files)):
        src_path = os.path.join(category_path, file_name)
        dst_path = os.path.join('/content/ganset512_t38k_v200/val/', kind, file_name)

        image = cv2.imread(src_path, cv2.COLOR_BGR2RGB)
        image = cv2.rsieze(image, (512, 512))
        cv2.imwrite(dst_path, image)

print("Data restructuring and resizing complete.")

In [None]:
import shutil

# Define the folder to be zipped and the output zip file path
folder_to_zip = f'/content/ganset512_t38k_v200'
output_zip_file = f'/content/drive/MyDrive/superAI_lv2/hackathon/liver_detec_ultrasound/ganset512_t38k_v200'

# Create a zip file from the folder
shutil.make_archive(output_zip_file, 'zip', folder_to_zip)

print("Folder zipped successfully!")

## gan_dataset

In [None]:
def get_image_tag(string):
  return string.split('.')[0]

mapping_df = pd.read_csv('/content/mapping2.csv')
mapping_df['image_tag'] = mapping_df['Image File'].apply(get_image_tag)
mapping_df

In [None]:
machine_df = mapping_df[mapping_df.Source == 'machine']
mobile_df = mapping_df[mapping_df.Source == 'mobile']
machine_df.shape, mobile_df.shape

In [None]:
def image_process(image):
  if len(image.shape) == 3:
    gray_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
  else:
    gray_image = image

  # blurred_image = cv2.GaussianBlur(gray_image, (5, 5), 0.2)

  # clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
  # clahe_image = clahe.apply(blurred_image)
  return gray_image

In [None]:
def make_file(image_tag):

  image_path = f'/content/train/train/images/{image_tag}.jpg'
  image = cv2.imread(image_path, cv2.COLOR_BGR2RGB)
  if type(image) == type(None):
    image_path = f'/content/val/val/images/{image_tag}.jpg'
    image = cv2.imread(image_path, cv2.COLOR_BGR2RGB)

  processed_image = image_process(image)
  kind = 'train' if 'train' in image_path else 'val'

  #Write image
  image_outpath = f"/content/gan_dataset/{kind}/mobile/{image_tag}.jpg"
  cv2.imwrite(image_outpath, image)

  #Write text
  # annotation_path = f'/content/{kind}/{kind}/annotations/{image_tag}.txt'
  # destination_folder = f'/content/smooth_clahe/{kind}/labels/{image_tag}.txt'

  # try:
  #   shutil.copy(annotation_path, destination_folder)
  # except:
  #   print(image_tag)

print("Train process successfully!")

In [None]:
machine_df.image_tag.apply(make_file)

In [None]:
mobile_df.image_tag.apply(make_file)

In [None]:
import os

def count_files_in_directory(directory_path):
    # List all items in the directory
    all_items = os.listdir(directory_path)
    # Filter out only files (not directories)
    files = [item for item in all_items if os.path.isfile(os.path.join(directory_path, item))]
    return len(files)

# Example usage
directory_path = '/content/drive/MyDrive/superAI_lv2/hackathon/liver_detec_ultrasound/512x512/machine_images'
print(f"Number of files: {count_files_in_directory(directory_path)}")

directory_path = '/content/drive/MyDrive/superAI_lv2/hackathon/liver_detec_ultrasound/512x512/mobile_images'
print(f"Number of files: {count_files_in_directory(directory_path)}")

In [None]:
import shutil

# Define the folder to be zipped and the output zip file path
folder_to_zip = f'/content/gan_dataset'
output_zip_file = f'/content/drive/MyDrive/superAI_lv2/hackathon/liver_detec_ultrasound/gan_dataset'

# Create a zip file from the folder
shutil.make_archive(output_zip_file, 'zip', folder_to_zip)

print("Folder zipped successfully!")

In [None]:
from google.colab import drive
drive.mount('/content/drive')