# Die analysis

Now we will run a sheet resistance analysis using the device analyses we triggered in the device analysis notebook. Make sure all the analyses from the previous notebook have finished!

In [None]:
import getpass
import json
from pathlib import Path

import gfhub
import matplotlib.pyplot as plt
import numpy as np
from gfhub import nodes
from PIL import Image
from tqdm.auto import tqdm

user = getpass.getuser()

## Client

In [None]:
client = gfhub.Client()

## Die Analysis

This function will aggregate device-level resistance measurements to calculate sheet resistance.

In [None]:
def die_sheet_resistance(
    files: list[Path],
    tags: list[list[str]],
    /,
    *,
    width_key: str = "width",
    length_key: str = "length",
) -> tuple[Path, Path, list[str]]:
    # Load resistance data
    resistances = []
    widths = []
    lengths = []

    for file, file_tags in zip(files, tags):
        data = json.loads(file.read_text())

        # Extract resistance
        resistance = data.get("resistance")
        if resistance is None:
            continue

        # Extract width and length from tags
        width = None
        length = None
        for tag in file_tags:
            if tag.startswith(f"{width_key}:"):
                width = float(tag.split(":", 1)[1])
            elif tag.startswith(f"{length_key}:"):
                length = float(tag.split(":", 1)[1])

        if width is not None and length is not None:
            resistances.append(resistance)
            widths.append(width)
            lengths.append(length)

    if len(resistances) == 0:
        raise ValueError("No valid resistance measurements found")

    # Convert to numpy arrays
    resistances = np.array(resistances)
    widths = np.array(widths)
    lengths = np.array(lengths)

    # Calculate R * W / L for each device
    # This should be constant and equal to sheet resistance
    rw_over_l = resistances * widths / lengths

    # Calculate sheet resistance as mean
    sheet_resistance = np.mean(rw_over_l)
    sheet_resistance_std = np.std(rw_over_l)

    # Create plot
    # Plot 2: Calculated sheet resistance for each device
    plt.scatter(range(len(rw_over_l)), rw_over_l)
    plt.axhline(
        sheet_resistance,
        color="r",
        linestyle="--",
        label=f"Mean = {sheet_resistance:.2e}",
    )
    plt.axhline(
        sheet_resistance + sheet_resistance_std,
        color="orange",
        linestyle=":",
        alpha=0.7,
    )
    plt.axhline(
        sheet_resistance - sheet_resistance_std,
        color="orange",
        linestyle=":",
        alpha=0.7,
        label=f"±1σ = {sheet_resistance_std:.2e}",
    )
    plt.xlabel("Device Index")
    plt.ylabel("Sheet Resistance (Ω/sq)")
    plt.legend()
    plt.grid(True)
    plot_path = files[0].parent / "die_sheet_resistance.png"
    plt.savefig(plot_path, bbox_inches="tight", dpi=100)
    plt.close()

    # Extract die coordinates from tags (format: "die:x,y")
    die_x, die_y = None, None
    for tag in tags[0]:
        if tag.startswith("die:"):
            coords = tag.split(":", 1)[1]
            die_x, die_y = [int(c) for c in coords.split(",")]
            break

    # Save results
    results = {
        "die_x": die_x,
        "die_y": die_y,
        "sheet_resistance": float(sheet_resistance),
        "sheet_resistance_std": float(sheet_resistance_std),
        "num_devices": len(resistances),
    }

    results_path = files[0].parent / "die_sheet_resistance.json"
    results_path.write_text(json.dumps(results, indent=2))

    return plot_path, results_path

In [None]:
func_def = gfhub.Function(die_sheet_resistance, dependencies={
    "numpy": "import numpy as np",
    "json": "import json",
    "matplotlib": "import matplotlib.pyplot as plt",
})

### Test function

In [None]:
analysis_results = client.query_files(
    name="*_linear_fit.json",
    tags=[f"project:resistance", user]
).groupby(("wafer", "die"))

In [None]:
key = (wafer, die) = list(analysis_results)[0]
results = analysis_results[key]
paths = [client.download_file(r['id'], f"./download_{i}.json") for i, r in enumerate(results)]
tags = [[gfhub.tags.into_string(t) for t in r["tags"].values()] for r in results]
#plot_path, _ = func_def.eval(paths, tags)
plot_path, _ = die_sheet_resistance(paths, tags)
Image.open(plot_path)

In [None]:
client.add_function(
    name="die_sheet_resistance", 
    script=func_def,
)

## Tag aggregation

To accurately tag the output files, we create a simple function to merge common tags in a list of list of tags:

In [None]:
def find_common_tags(
    tags: list[list[str]],
    /,
) -> list[str]:
    common = {}
    for _tags in tags:
        for t in _tags:
            if ":" in t:
                key, value = t.split(":", 1)
            else:
                key, value = t, ""
            if key not in common:
                common[key] = set()
            common[key].add(value)
    common_tags = {k: list(v)[0] for k, v in common.items() if len(v) == 1}
    return [k if not v else f"{k}:{v}" for k, v in common_tags.items() if not k.startswith('.')]

Let's prepare a function definition we can upload:

In [None]:
func_def = gfhub.Function(find_common_tags, dependencies={})

Let's test this on the tags we loaded earlier:

In [None]:
func_def.eval(tags)

In [None]:
client.add_function("find_common_tags", func_def)

## Create pipeline

We can now create a pipeline which brings this all together:

In [None]:
p = gfhub.Pipeline()

# a pipeline that takes a list of input paths (as opposed to a single input path)
# cannot be configure to auto-trigger on upload. Therefore we only add a manual trigger:
p.trigger = nodes.on_manual_trigger()

# trigger kicks of a load from S3
p.load_file = nodes.load()
p += p.trigger >> p.load_file

# it also kicks of a load of the tags
p.load_tags = nodes.load_tags()
p += p.trigger >> p.load_tags

# the data file path (now on the local filesystem) as well as the
# tags get passed to the analysis function
p.sheet_resistance = nodes.function(function="die_sheet_resistance")
p += p.load_file >> p.sheet_resistance[0]
p += p.load_tags >> p.sheet_resistance[1]

# we also determine which tags all the data files have in common
p.common_tags = nodes.function(function="find_common_tags")
p += p.load_tags >> p.common_tags

# we save the plot with the common tags
p.save_plot = nodes.save()
p += p.sheet_resistance[0] >> p.save_plot[0]
p += p.common_tags >> p.save_plot[1]

# we save the json with the common tags
p.save_json = nodes.save()
p += p.sheet_resistance[1] >> p.save_json[0]
p += p.common_tags >> p.save_json[1]

# once the pipeline is defined, we can upload it:
confirmation = client.add_pipeline("die_sheet_resistance", p)

Let's upload this pipeline

In [None]:
print(client.pipeline_url(confirmation['id']))

## Trigger pipeline for all dies

In [None]:
analysis_results = client.query_files(
    name="*_linear_fit.json",
    tags=[f"project:resistance", user]
).groupby(("wafer", "die"))

job_ids = []
for die_tag, files in tqdm(analysis_results.items()):
    # Get file IDs for this die
    input_ids = [f['id'] for f in files]
    
    # Trigger pipeline
    triggered = client.trigger_pipeline("die_sheet_resistance", input_ids)
    job_ids.extend(triggered['job_ids'])

print(f"Triggered {len(job_ids)} die analysis jobs")

## Wait for completion

In [None]:
jobs = client.wait_for_jobs(job_ids)

## Final plot

In [None]:
# Query die analysis plots
die_plots = client.query_files(
    name="die_sheet_resistance.png",
    tags=[f"project:resistance", user]
)

print(f"Found {len(die_plots)} die analysis plots")

# Display the first plot
if die_plots:
    img = Image.open(client.download_file(die_plots[0]['id']))
    display(img.resize((530, 400)))