#**Indonesia Freshwater Microplastics Prediction**
Welcome! This notebook helps you export variable data necessary for predicting microplastics (MPs) from Indonesia freshwater ecosystem.

------------------------------------
**!!IMPORTANT!!**

Click "Run all" and let the process begin while input the necessary information to generate the final (.csv) file

--------------------------------------------------------------------------------

Interactive prediction map based on Generalized Additive Model (GAM) is available through this link:
http://arrielf.shinyapps.io/mp_prediction_app

*Use (.csv) output file generated through this notebook as an input for the interactive map MPs prediction

In [19]:
#@title *1. Data management (click button to download and load)*
#@markdown This section will automatically load the required dataset for the entire notebook


import os
import ipywidgets as widgets
from IPython.display import display, clear_output

file_ids = {
    "1cdiQ0nlXDD8SgH_d0Izsuf7iyrgf2yQ4": "Agriculture.gpkg",
    "1Ffz2LGwUnJT_nZoCTMczo6O4s-W_AdbJ": "Commercial.gpkg",
    "13sctRU0nzKqKWVebJ78mK2iUDpAcS0Oy": "Forest.gpkg",
    "11t2YNPVr5RV6o8TkLyPvJ4JzB6LC03td": "Housing.gpkg",
    "11RBrGclJC2IDPWXWXfJ6PHboBvyJvK3E": "Population.tif"
}

progress = widgets.IntProgress(min=0, max=len(file_ids), description='Downloading:')
output = widgets.Output()

container = widgets.VBox([progress, output])
display(container)

def download_and_load():
    with output:
        clear_output()
        for i, (file_id, filename) in enumerate(file_ids.items(), 1):
            if not os.path.exists(filename):
                print(f"Downloading {filename}...")
                # Use shell command quietly
                !gdown --id {file_id} --quiet
                print(f"✅ {filename} downloaded.")
            else:
                print(f"✅ {filename} already exists.")
            progress.value = i

        print("\nAll files downloaded or already present.")

        import geopandas as gpd
        global agri, comm, forest, housing, pop_raster_path
        agri = gpd.read_file("Agriculture.gpkg")
        comm = gpd.read_file("Commercial.gpkg")
        forest = gpd.read_file("Forest.gpkg")
        housing = gpd.read_file("Housing.gpkg")
        pop_raster_path = "Population.tif"
        print("All files loaded successfully.")

# Run automatically on display
download_and_load()


VBox(children=(IntProgress(value=0, description='Downloading:', max=5), Output()))

In [20]:
#@title *2. Install Requirements*
#@markdown This section will automatically install the required packages for the entire notebook
import time
import threading
import ipywidgets as widgets
from IPython.display import display, clear_output

progress = widgets.IntProgress(min=0, max=100, description='Installing:', bar_style='info')
log_output = widgets.Output(layout={'border': '1px solid black', 'height': '150px', 'overflow_y': 'auto'})

display(progress, log_output)

def fake_progress():
    for i in range(101):
        time.sleep(0.05)  # Adjust speed here
        progress.value = i

def install_packages():
    with log_output:
        # Run pip install and show all output live
        get_ipython().system('pip install geopandas rasterio rasterstats shapely')

# Start progress bar animation in thread
threading.Thread(target=fake_progress).start()

# Run pip install (this will block but output is shown live in log_output)
install_packages()

progress.bar_style = 'success'
progress.description = 'Done!'


IntProgress(value=0, bar_style='info', description='Installing:')

Output(layout=Layout(border='1px solid black', height='150px', overflow_y='auto'))

In [27]:
#@title *3. Data processing*
#@markdown This section allows you to input sampling locations either by manual entry or by uploading a CSV file.
#@markdown - **Manual Input:** Enter location ID, coordinates, sampling method, category, and a description in a dynamic table.
#@markdown - **Upload CSV:** Upload a CSV file with required columns for bulk input.

#@markdown The notebook will then calculate land use proportions (Commercial, Agriculture, Forest, Housing) within a 1 km buffer around each point and estimate population density using a raster layer.

#@markdown Finally, it generates a processed table ready for download, containing all predictors for further analysis.

#@markdown *Use the widgets to switch input modes and run the processing.*

import io
import time
import base64
import geopandas as gpd
import pandas as pd
import rasterio
from rasterstats import zonal_stats
import ipywidgets as widgets
from IPython.display import display, clear_output

# --- Input mode toggle ---
input_mode = widgets.RadioButtons(
    options=['Manual input', 'Upload CSV'],
    value='Manual input',
    description='Input mode:',
    disabled=False
)

# --- File upload widget for CSV ---
upload_csv = widgets.FileUpload(
    accept='.csv',
    multiple=False,
    description='Upload CSV'
)

# --- 1. Table-style manual input ---
point_count = widgets.BoundedIntText(value=3, min=1, max=20, description='Number of points:')

def create_input_table(n):
    grid = widgets.GridspecLayout(n+1, 6, width='1000px')
    headers = ['Location ID', 'Longitude', 'Latitude', 'Method', 'Categories', 'Location Description']
    for col, head in enumerate(headers):
        grid[0, col] = widgets.Label(value=head, layout=widgets.Layout(justify_content='center', font_weight='bold'))

    id_widgets = []
    lon_widgets = []
    lat_widgets = []
    method_widgets = []
    category_widgets = []
    desc_widgets = []

    for row in range(1, n+1):
        id_w = widgets.Label(value=str(row), layout=widgets.Layout(width='80px'))
        lon_w = widgets.FloatText(layout=widgets.Layout(width='130px'), description='e.g. 106.11', style={'description_width': 'initial'})
        lat_w = widgets.FloatText(layout=widgets.Layout(width='130px'), description='e.g. -6.11', style={'description_width': 'initial'})
        method_w = widgets.Dropdown(options=['Grab', 'Net'], layout=widgets.Layout(width='100px'))
        category_w = widgets.Dropdown(
            options=['River', 'Lake', 'Reservoir', 'Estuary', 'Waste Water Treatment Plant'],
            layout=widgets.Layout(width='200px')
        )
        desc_w = widgets.Text(placeholder='e.g. Densed river, downstream of Brantas', layout=widgets.Layout(width='300px'))

        grid[row, 0] = id_w
        grid[row, 1] = lon_w
        grid[row, 2] = lat_w
        grid[row, 3] = method_w
        grid[row, 4] = category_w
        grid[row, 5] = desc_w

        id_widgets.append(id_w)
        lon_widgets.append(lon_w)
        lat_widgets.append(lat_w)
        method_widgets.append(method_w)
        category_widgets.append(category_w)
        desc_widgets.append(desc_w)

    return grid, id_widgets, lon_widgets, lat_widgets, method_widgets, category_widgets, desc_widgets

grid, id_widgets, lon_widgets, lat_widgets, method_widgets, category_widgets, desc_widgets = create_input_table(point_count.value)

def on_point_count_change(change):
    global grid, id_widgets, lon_widgets, lat_widgets, method_widgets, category_widgets, desc_widgets
    clear_output(wait=True)
    display(input_mode)
    if input_mode.value == 'Manual input':
        display(point_count)
        grid, id_widgets, lon_widgets, lat_widgets, method_widgets, category_widgets, desc_widgets = create_input_table(change['new'])
        display(grid)
        display(process_button, output)

point_count.observe(on_point_count_change, names='value')

# --- Containers for manual and upload input widgets ---
manual_container = widgets.VBox([point_count, grid])
upload_container = widgets.VBox([upload_csv])

# --- Process button and output ---
process_button = widgets.Button(description='Process and Download CSV', button_style='success')
output = widgets.Output()

def on_input_mode_change(change):
    output.clear_output()
    clear_output(wait=True)
    display(input_mode)
    if change['new'] == 'Manual input':
        upload_container.layout.display = 'none'
        manual_container.layout.display = 'flex'
        display(manual_container)
        display(process_button, output)
    else:
        manual_container.layout.display = 'none'
        upload_container.layout.display = 'flex'
        display(upload_container)
        display(process_button, output)

input_mode.observe(on_input_mode_change, names='value')

# Show manual input initially
display(input_mode)
display(manual_container)
display(process_button, output)
upload_container.layout.display = 'none'

# --- Processing and download ---
def process_and_download(b):
    with output:
        output.clear_output()

        # Load input points either from manual input or CSV upload
        if input_mode.value == 'Manual input':
            data = []
            n = point_count.value
            for i in range(n):
                lon = lon_widgets[i].value
                lat = lat_widgets[i].value
                method = method_widgets[i].value
                category = category_widgets[i].value
                loc_id = int(id_widgets[i].value)
                description = desc_widgets[i].value

                if lon is None or lat is None:
                    print(f"Point {i+1} missing longitude or latitude!")
                    return

                data.append({
                    'Location ID': loc_id,
                    'Longitude': lon,
                    'Latitude': lat,
                    'Method': method,
                    'Categories': category,
                    'Description': description
                })
            if not data:
                print("No input points!")
                return
            input_points = pd.DataFrame(data)
        else:  # CSV upload mode
            if not upload_csv.value:
                print("Please upload a CSV file!")
                return
            uploaded_file = list(upload_csv.value.values())[0]
            content = uploaded_file['content']
            content_io = io.BytesIO(content)
            input_points = pd.read_csv(content_io)

            # Required columns except Description (optional)
            required_cols = ['Location ID', 'Longitude', 'Latitude', 'Method', 'Categories']
            for col in required_cols:
                if col not in input_points.columns:
                    print(f"Missing required column: {col}")
                    return

            # Add Description column if missing
            if 'Description' not in input_points.columns:
                input_points['Description'] = ''

        # Check spatial layers and raster loaded
        try:
            agri
            comm
            forest
            housing
            pop_raster_path
        except NameError:
            print("Please load land use layers (agri, comm, forest, housing) and pop_raster_path before processing.")
            return

        points_gdf = gpd.GeoDataFrame(
            input_points,
            geometry=gpd.points_from_xy(input_points.Longitude, input_points.Latitude),
            crs='EPSG:4326'
        )
        points_proj = points_gdf.to_crs('ESRI:54034')
        buffers = points_proj.buffer(1000)

        def landuse_proportion(buffer, landuse_gdf):
            intersection = landuse_gdf.intersection(buffer)
            intersect_area = intersection.area.sum()
            buffer_area = buffer.area
            return (intersect_area / buffer_area) if buffer_area > 0 else 0

        # Progress bar: total steps = land use buffers + 1 population step
        progress = widgets.IntProgress(value=0, min=0, max=len(buffers)+1, description='Calculating:', bar_style='info')
        display(progress)

        results = []
        for i, buffer in enumerate(buffers):
            commercial_prop = landuse_proportion(buffer, comm)
            agriculture_prop = landuse_proportion(buffer, agri)
            forest_prop = landuse_proportion(buffer, forest)
            housing_prop = landuse_proportion(buffer, housing)

            results.append({
                'Location ID': points_proj.iloc[i]['Location ID'],
                'Longitude': points_proj.iloc[i]['Longitude'],
                'Latitude': points_proj.iloc[i]['Latitude'],
                'Categories': points_proj.iloc[i]['Categories'],
                'Method': points_proj.iloc[i]['Method'],
                'Description': points_proj.iloc[i]['Description'],
                'Commercial': commercial_prop,
                'Agriculture': agriculture_prop,
                'Forest': forest_prop,
                'Housing': housing_prop
            })
            progress.value = i + 1
            time.sleep(0.05)  # allow UI to update

        progress.description = 'Calculating Population:'
        with rasterio.open(pop_raster_path) as src:
            pop_stats = zonal_stats(
                buffers, src.read(1), affine=src.transform, stats=['mean'], nodata=src.nodata
            )

        pop_means = [stat['mean'] if stat['mean'] is not None else 0 for stat in pop_stats]

        results_df = pd.DataFrame(results)
        results_df['Population'] = pop_means

        results_df = results_df[['Location ID', 'Longitude', 'Latitude', 'Categories', 'Method', 'Description',
                                 'Commercial', 'Agriculture', 'Forest', 'Housing', 'Population']]

        for col in ['Commercial', 'Agriculture', 'Forest', 'Housing']:
            results_df[col] = results_df[col].round(4)
        results_df['Population'] = results_df['Population'].round(2)

        # Update progress bar to Completed and then hide it
        progress.description = 'Completed!'
        progress.bar_style = 'success'
        time.sleep(0.5)
        progress.layout.display = 'none'

        csv_str = results_df.to_csv(index=False)
        b64 = base64.b64encode(csv_str.encode()).decode()

        # Styled download button
        html_button = f'''
        <a download="processed_predictors.csv"
           href="data:text/csv;base64,{b64}"
           target="_blank"
           style="
               display:inline-block;
               padding:12px 24px;
               font-size:16px;
               font-weight:bold;
               color:white;
               background-color:#28a745;
               border-radius:6px;
               text-decoration:none;
               box-shadow: 2px 2px 5px rgba(0,0,0,0.3);
           ">
           &#128190; Click here to download processed_predictors.csv
        </a>
        '''
        display(widgets.HTML(value=html_button))

process_button.on_click(process_and_download)


RadioButtons(description='Input mode:', options=('Manual input', 'Upload CSV'), value='Manual input')

VBox(children=(BoundedIntText(value=3, description='Number of points:', max=20, min=1), GridspecLayout(childre…

Button(button_style='success', description='Process and Download CSV', style=ButtonStyle())

Output()

###*4. MPs Prediction with Interactive Map*
Once you have the processed (.csv) file from the previous step, you can perform microplastic (MPs) predictions by visiting the following interactive map application:

http://arrielf.shinyapps.io/mp_prediction_app

