In [None]:
#@title 🔬 Deep-MAP v1.0
#@markdown **Welcome!** This notebook is a tool for automatically detecting and analyzing microplastics in microscope images.
#@markdown ---
#@markdown ### **How to Use**
#@markdown 1.  **Prepare Your Files**: Prepare your image files in a ZIP archive.
#@markdown     * **For a single area**: Place the images directly in the root of the ZIP file.
#@markdown     * **For multiple areas**: Organize images into separate folders for each area, then add these folders to the ZIP archive (e.g., `Nagoya_River/`, `Indonesia_River/`).
#@markdown 2.  **Run the Cells**: Execute the cells in order from top to bottom.
#@markdown     * You can run all cells at once by selecting `Runtime` > `Run all`.
#@markdown     * For faster processing, select `Runtime` > `Change runtime type` > `T4 GPU`.
#@markdown 3.  **Input Information**: After uploading the file, fill in the form with the required analysis information.
#@markdown 4.  **Download Results**: Once all processing is complete, the **analysis results in an Excel file** and the **annotated images** will be downloaded automatically.

In [None]:
#@title 1. Prepare Packages and Model
#@markdown This cell installs required packages and downloads the pre-trained model.
!pip install openpyxl ultralytics scikit-image ipywidgets -q
import os
import zipfile
import shutil
from google.colab import files
import cv2
import numpy as np
import pandas as pd
from ultralytics import YOLO
from tqdm.notebook import tqdm
import glob
from IPython.display import display, clear_output
import ipywidgets as widgets

print("✅ Packages are ready.")
# --- Model Download ---
model_filename = "best.pt"
file_id = "1QC6I9zhcdSGjI6LWYTrbvkn-FyfxRyuk"
if not os.path.exists(model_filename):
  print("Downloading model...")
  !gdown --id {file_id}
  print("✅ Model download complete.")
else:
  print("✅ Model already exists.")

In [None]:
#@title 2. Upload Image ZIP File
#@markdown Click the "Choose Files" button to upload the ZIP file containing your images.
upload_dir = 'uploads'
global analysis_root_dir
analysis_root_dir = ''

if os.path.exists(upload_dir): shutil.rmtree(upload_dir)
os.makedirs(upload_dir, exist_ok=True)

print('Please upload the ZIP file containing your images.')
uploaded = files.upload()

if not uploaded:
    print('\n⚠️ No file was uploaded.')
else:
    zip_filename = next(iter(uploaded))
    with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
        zip_ref.extractall(upload_dir)
    os.remove(zip_filename)
    if os.path.exists(os.path.join(upload_dir, '__MACOSX')):
        shutil.rmtree(os.path.join(upload_dir, '__MACOSX'))

    # Automatically find the main data folder inside 'uploads'
    extracted_items = os.listdir(upload_dir)
    if len(extracted_items) == 1 and os.path.isdir(os.path.join(upload_dir, extracted_items[0])):
        analysis_root_dir = os.path.join(upload_dir, extracted_items[0])
        print(f"\n✅ Files extracted. Main data folder found at: '{analysis_root_dir}'")
    else:
        analysis_root_dir = upload_dir
        print(f'\n✅ Files extracted and ready in the "{analysis_root_dir}" directory.')

In [None]:
#@title 3. Input Analysis Information
#@markdown Please enter the information for each detected sample area.
#@markdown **Sample collection areas (subdirectories) are detected automatically.**

# --- Auto-detect subdirectories (sample areas) from the correct root folder ---
try:
    if not analysis_root_dir or not os.path.exists(analysis_root_dir):
         print("⚠️ Error: Analysis root directory not found. Please run Cell 2 again to upload files.")
    else:
        sub_dirs = [d for d in os.listdir(analysis_root_dir) if os.path.isdir(os.path.join(analysis_root_dir, d))]
        all_images_in_root = glob.glob(os.path.join(analysis_root_dir, '*.[pP][nN][gG]')) + \
                             glob.glob(os.path.join(analysis_root_dir, '*.[jJ][pP]*[gG]'))
        if not sub_dirs and all_images_in_root:
            sub_dirs = ['Default_Area']
        print(f"Detected sample areas: {', '.join(sub_dirs)}\n")

        # --- Create input forms for each area ---
        style = {'description_width': '150px'}
        layout = widgets.Layout(width='400px')
        area_widgets = {}
        all_forms = []

        for area in sub_dirs:
            cat_widget = widgets.Text(description=f"Categories (e.g., River):", style=style, layout=layout)
            lat_widget = widgets.Text(description=f"Latitude:", style=style, layout=layout)
            lon_widget = widgets.Text(description=f"Longitude:", style=style, layout=layout)
            volume_widget = widgets.FloatText(description=f"Sample Volume (L):", value=1.0, style=style, layout=layout)

            area_widgets[area] = {
                'categories': cat_widget,
                'latitude': lat_widget,
                'longitude': lon_widget,
                'sample_volume': volume_widget
            }

            area_form = widgets.VBox([
                widgets.HTML(f"<b>Information for Area: {area}</b>"),
                cat_widget,
                lat_widget,
                lon_widget,
                volume_widget
            ])
            all_forms.append(area_form)

        # --- Global settings ---
        global_settings_form = widgets.VBox([
            widgets.HTML(f"<hr><b>Global Settings (for all areas)</b>"),
            widgets.FloatText(description="Scale: 1 pixel = ? μm:", value=1.0, style=style, layout=layout),
            widgets.FloatSlider(description="Confidence Threshold:", min=0.05, max=1.0, step=0.05, value=0.25, style=style, layout=layout)
        ])
        scale_widget = global_settings_form.children[1]
        conf_widget = global_settings_form.children[2]
        all_forms.append(global_settings_form)

        # --- Button and submission ---
        button = widgets.Button(description="Confirm All Information and Start Analysis", button_style='success', layout=widgets.Layout(width='90%'))
        global metadata
        metadata = {'areas': {}}

        def on_button_click(b):
            for area in sub_dirs:
                metadata['areas'][area] = {
                    'categories': area_widgets[area]['categories'].value, # ★★★ Get value
                    'latitude': area_widgets[area]['latitude'].value,
                    'longitude': area_widgets[area]['longitude'].value,
                    'sample_volume': area_widgets[area]['sample_volume'].value if area_widgets[area]['sample_volume'].value > 0 else None,
                }
            metadata['scale'] = scale_widget.value if scale_widget.value > 0 else None
            metadata['conf'] = conf_widget.value

            clear_output(wait=True)
            print("✅ Starting analysis with the following information:")
            print(f"  - Global Scale: {metadata['scale']}")
            print(f"  - Global Confidence: {metadata['conf']}")
            for area, data in metadata['areas'].items():
                print(f"  --- Area: {area} ---")
                for k, v in data.items():
                    print(f"    - {k}: {v}")
            run_analysis_and_export()

        button.on_click(on_button_click)
        display(widgets.VBox(all_forms + [button]))

except NameError:
    print("⚠️ Error: `analysis_root_dir` is not defined. Please run Cell 2 again to upload files.")

In [None]:
#@title 4. Run Analysis and Export Results
#@markdown This cell will perform the analysis on all your images and automatically download the results.

def run_analysis_and_export():
    from ultralytics.utils.plotting import Annotator, colors
    def get_color_advanced(image, mask, k=3, s_thresh=50, v_thresh=50, fallback_threshold=0.1):
        # 1. Get all pixels within the mask first
        all_pixels = image[np.where(mask > 0)]
        if len(all_pixels) == 0:
            return 'Unknown'

        # 2. Try to extract vibrant pixels
        hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        vibrant_mask = cv2.bitwise_and(
            cv2.inRange(hsv_image[:,:,1], s_thresh, 255),
            cv2.inRange(hsv_image[:,:,2], v_thresh, 255)
        )
        final_mask = cv2.bitwise_and(mask, vibrant_mask)
        vibrant_pixels = image[np.where(final_mask > 0)]

        # 3. Check the ratio of vibrant pixels
        if len(vibrant_pixels) < (len(all_pixels) * fallback_threshold):
            mean_bgr = np.mean(all_pixels, axis=0)
        else:
            if len(vibrant_pixels) < k:
                mean_bgr = np.mean(vibrant_pixels, axis=0)
            else:
                pixels = np.float32(vibrant_pixels)
                criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
                _, labels, centers = cv2.kmeans(pixels, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)
                _, counts = np.unique(labels, return_counts=True)
                mean_bgr = centers[np.argmax(counts)]

        # 4. Determine color name by comparing the representative color to reference colors
        color_references = {
            'black': [25, 25, 25], 'white': [230, 230, 230], 'red': [0, 0, 200],
            'green': [0, 180, 0], 'blue': [200, 0, 0]
        }
        min_dist, detected_color = float('inf'), 'Unknown'
        for color_name, bgr_value in color_references.items():
            dist = np.linalg.norm(mean_bgr - np.array(bgr_value))
            if dist < min_dist:
                min_dist, detected_color = dist, color_name

        return detected_color

    try:
        model = YOLO(model_filename)
        print("\n✅ Model loaded successfully.")
    except Exception as e:
        print(f"⚠️ Failed to load model: {e}")
        return

    all_results = []
    conf_threshold = metadata.get('conf', 0.25)
    print(f"Confidence threshold for analysis: {conf_threshold}")

    results_img_dir = 'results_images'
    if os.path.exists(results_img_dir): shutil.rmtree(results_img_dir)
    os.makedirs(results_img_dir)

    particle_id_counter = 1

    for area_name in sub_dirs:
        print(f"\n--- Processing images for area: '{area_name}' ---")
        current_image_dir = os.path.join(analysis_root_dir, area_name) if area_name != 'Default_Area' else analysis_root_dir
        area_results_dir = os.path.join(results_img_dir, area_name)
        os.makedirs(area_results_dir, exist_ok=True)

        image_files = glob.glob(os.path.join(current_image_dir, '*.[pP][nN][gG]')) + glob.glob(os.path.join(current_image_dir, '*.[jJ][pP]*[gG]'))

        for image_path in tqdm(image_files, desc=f"Processing {area_name}"):
            img = cv2.imread(image_path)
            if img is None: continue

            results = model(image_path, conf=conf_threshold, verbose=False)[0]

            annotated_img = results.plot(labels=False, conf=False)

            if results.masks is not None:
                for i in range(len(results.boxes)):
                    box = results.boxes[i]
                    cls_idx = int(box.cls)
                    class_name = model.names[cls_idx]
                    confidence = float(box.conf)

                    label = f"ID:{particle_id_counter} {class_name} {confidence:.2f}"

                    xyxy = box.xyxy[0].cpu().numpy().astype(int)
                    x1, y1, x2, y2 = xyxy

                    font_scale = 1.0
                    font_thickness = 2

                    (text_width, text_height), baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, font_scale, font_thickness)
                    cv2.rectangle(annotated_img, (x1, y1), (x1 + text_width, y1 - text_height - baseline), colors(cls_idx, True), -1)
                    cv2.putText(annotated_img, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), font_thickness)

                    mask_data = results.masks.data[i]
                    mask = cv2.resize(mask_data.cpu().numpy(), (img.shape[1], img.shape[0])).astype(np.uint8)
                    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

                    all_results.append({
                        'particle_id': particle_id_counter,
                        'area_name': area_name, 'image_name': os.path.basename(image_path),
                        'class': class_name,
                        'confidence': confidence,
                        'color': get_color_advanced(img, mask),
                        'area': np.sum(mask),
                        'width': cv2.fitEllipse(contours[0])[1][0] if contours and len(contours[0]) >= 5 else 0
                    })

                    particle_id_counter += 1

            cv2.imwrite(os.path.join(area_results_dir, f"result_{os.path.basename(image_path)}"), annotated_img)

    if not all_results:
        print("\n⚠️ No microplastics were detected in the images.")
    else:
        df_all = pd.DataFrame(all_results)
        scale = metadata.get('scale')
        len_unit = 'um' if scale else 'px'
        area_unit = 'um2' if scale else 'px2'

        df_all[f'width_{len_unit}'] = df_all['width'] * scale if scale else df_all['width']
        df_all[f'area_{area_unit}'] = df_all['area'] * (scale**2) if scale else df_all['area']

        OUTPUT_EXCEL_PATH = f"analysis_results_{pd.Timestamp.now().strftime('%Y%m%d_%H%M')}.xlsx"
        with pd.ExcelWriter(OUTPUT_EXCEL_PATH) as writer:
            all_classes = ['Fragment', 'Fiber', 'Foam', 'Pellet', 'Film']
            all_colors = ['black', 'red', 'green', 'blue', 'white']

            for area_name in df_all['area_name'].unique():
                df_area = df_all[df_all['area_name'] == area_name].copy()

                df_area_raw = df_area[['particle_id', 'image_name', 'class', 'confidence', 'color', f'area_{area_unit}', f'width_{len_unit}']]
                df_area_raw.to_excel(writer, sheet_name=f"{area_name}_raw_data", index=False)

                count_pivot = df_area.pivot_table(index='class', columns='color', values='image_name', aggfunc='size', fill_value=0)
                width_pivot = df_area.pivot_table(index='class', columns='color', values=f'width_{len_unit}', aggfunc='mean', fill_value=0)
                area_pivot = df_area.pivot_table(index='class', columns='color', values=f'area_{area_unit}', aggfunc='mean', fill_value=0)
                count_pivot = count_pivot.reindex(index=all_classes, columns=all_colors, fill_value=0); width_pivot = width_pivot.reindex(index=all_classes, columns=all_colors, fill_value=0); area_pivot = area_pivot.reindex(index=all_classes, columns=all_colors, fill_value=0)
                new_idx = pd.MultiIndex.from_tuples([(c, m) for c in all_classes for m in ['number', 'average_width', 'average_area']], names=['class', 'metric'])
                summary_df = pd.DataFrame(index=new_idx, columns=all_colors)
                for c in all_classes:
                    summary_df.loc[(c, 'number'), :] = count_pivot.loc[c, :]; summary_df.loc[(c, 'average_width'), :] = width_pivot.loc[c, :]; summary_df.loc[(c, 'average_area'), :] = area_pivot.loc[c, :]
                summary_df.to_excel(writer, sheet_name=f"{area_name}_summary")

            summary_list = []
            for area_name in df_all['area_name'].unique():
                df_area = df_all[df_all['area_name'] == area_name]
                total_count = len(df_area)
                area_meta = metadata['areas'].get(area_name, {})
                sample_volume = area_meta.get('sample_volume')
                categories = area_meta.get('categories', '')

                if sample_volume and sample_volume > 0: abundance = total_count / sample_volume
                else: abundance = "N/A (Volume not provided)"

                smp_countA = df_area[df_area[f'width_{len_unit}'].between(1, 100)].shape[0] if scale else "N/A"
                smp_countB = df_area[df_area[f'width_{len_unit}'].between(100, 300)].shape[0] if scale else "N/A"
                smp_countC = df_area[df_area[f'width_{len_unit}'].between(300, 600)].shape[0] if scale else "N/A"
                smp_countD = df_area[df_area[f'width_{len_unit}'].between(600, 999.99)].shape[0] if scale else "N/A"
                lmp_count = df_area[df_area[f'width_{len_unit}'].between(1000, 5000)].shape[0] if scale else "N/A"
                shape_pct = (df_area['class'].value_counts(normalize=True) * 100).to_dict()
                color_pct = (df_area['color'].value_counts(normalize=True) * 100).to_dict()

                summary_list.append({
                    'Location': area_name,
                    'Abundance (particles/L)': f"{abundance:.2f}" if isinstance(abundance, (int, float)) else abundance,
                    'Categories': categories,
                    'Latitude': area_meta.get('latitude', ''), 'Longitude': area_meta.get('longitude', ''),
                    'Sample Volume (L)': sample_volume if sample_volume else "N/A",
                    'Dominant Shape': df_area['class'].mode()[0] if not df_area.empty else "N/A",
                    'Fragment':f"{shape_pct.get('Fragment',0):.0f}%", 'Fiber':f"{shape_pct.get('Fiber',0):.0f}%",
                    'Pellet':f"{shape_pct.get('Pellet',0):.0f}%", 'Foam':f"{shape_pct.get('Foam',0):.0f}%", 'Film':f"{shape_pct.get('Film',0):.0f}%",
                    'Dominant Colour': df_area['color'].mode()[0] if not df_area.empty else "N/A",
                    'Black':f"{color_pct.get('black',0):.0f}%", 'Red':f"{color_pct.get('red',0):.0f}%",
                    'Green':f"{color_pct.get('green',0):.0f}%", 'Blue':f"{color_pct.get('blue',0):.0f}%", 'White':f"{color_pct.get('white',0):.0f}%",
                    'SMP (1-100um)': f"{smp_countA/total_count*100:.0f}%" if scale and total_count>0 else "N/A",
                    'SMP (100-300um)': f"{smp_countB/total_count*100:.0f}%" if scale and total_count>0 else "N/A",
                    'SMP (300-600um)': f"{smp_countC/total_count*100:.0f}%" if scale and total_count>0 else "N/A",
                    'SMP (600-1000um)': f"{smp_countD/total_count*100:.0f}%" if scale and total_count>0 else "N/A",
                    'LMP (1000-5000um)': f"{lmp_count/total_count*100:.0f}%" if scale and total_count>0 else "N/A"
                })

            df_summary_final = pd.DataFrame(summary_list)
            desired_order = ['Location', 'Abundance (particles/L)', 'Latitude', 'Longitude', 'Sample Volume (L)', 'Categories', 'Dominant Shape', 'Fragment', 'Fiber', 'Pellet', 'Foam', 'Film', 'Dominant Colour', 'Black', 'Red', 'Green', 'Blue', 'White', 'SMP (1-100um)','SMP (100-300um)', 'SMP (300-600um)', 'SMP (600-1000um)', 'LMP (1000-5000um)']
            df_summary_final = df_summary_final.reindex(columns=desired_order)
            df_summary_final.to_excel(writer, sheet_name='All_Summary', index=False)

        print(f"\n✅ Analysis complete. Results saved to {OUTPUT_EXCEL_PATH}")
        files.download(OUTPUT_EXCEL_PATH)
        shutil.make_archive('results_images', 'zip', results_img_dir)
        print(f"\n✅ Annotated images saved to results_images.zip")
        files.download('results_images.zip')