### Imports

In [11]:
import pandas as pd
import os
from pathlib import Path
import warnings
import numpy as np
import matplotlib.pyplot as plt
import requests
from bs4 import BeautifulSoup
import scipy.stats as st
import sys

CURRENT_DIR = Path(os.getcwd())
# Move to the root directory
ROOT_DIR = CURRENT_DIR.parents[0]  # Adjust the number based on your folder structure

# Add the root directory to the system path
sys.path.append(str(ROOT_DIR))

# Import the importlib module
import importlib

# import function implementations
import stst_urls

# Reload the modules
importlib.reload(stst_urls)

# Re-import the functions
from stst_urls import QC_GTX_URL

# Input Raw File and Decoder File

In [13]:
wafer_codes = [
    "QCHY7",
]  # List of wafer codes

summaryfile_name = "qchy7"

decoder_file = "QC WAFER_LAYOUT 24Dec.csv"
decoder_file_path = ROOT_DIR / "decoders" / decoder_file
results_file_path = ROOT_DIR / "results"

warnings.filterwarnings("ignore")


# EXPERIMENTAL: URL fetching from GTX
# Define the URL of the directory containing the files
QC_GTX_URL = "https://sprgtxprod02.stni.seagate.com/~gtx/wafer/proc_LIV/data/byProdLot/QC/"

# Fetch the directory listing
response = requests.get(QC_GTX_URL, verify=False)
soup = BeautifulSoup(response.content, "html.parser")

# Find all links in the directory listing
links = soup.find_all("a")

# Filter the links to find subdirectories that match the wafer codes
subdirectory_urls = []
for link in links:
    href = link.get("href")
    if href and any(wafer_code in href for wafer_code in wafer_codes):
        subdirectory_urls.append(QC_GTX_URL + href)

# Now look inside each subdirectory for the required CSV files
# Fetches files with COD250 and RAW in the name, with a matching wafer code
# Also will fetch the most recent csv if there are multiple
file_urls = []
for subdirectory_url in subdirectory_urls:
    response = requests.get(subdirectory_url, verify=False)
    soup = BeautifulSoup(response.content, "html.parser")
    links = soup.find_all("a")
    latest_file = None
    latest_time = ""
    for link in links:
        href = link.get("href")
        if href and "RAW" in href and "COD250" in href:
            time_str = href[-18:-4]  # Extract the time string
            if time_str > latest_time:
                latest_time = time_str
                latest_file = subdirectory_url + href
    if latest_file:
        file_urls.append(latest_file)


# print(file_paths)
print(file_urls)

# DEBUG: INPUT LINKS TO OTHER GTX FILES HERE
# file_urls = [
#     "https://sprgtxprod02.stni.seagate.com/~gtx/wafer/proc_LIV/data/byProdLot/QC/QCHWQ/LIV_53_QCHWQ_DNS-LIVTKCOD_LCRVCOD250-DNS_RAW20250227044906.CSV",
#     "https://sprgtxprod02.stni.seagate.com/~gtx/wafer/proc_LIV/data/byProdLot/QC/QCHWQ/LIV_53_QCHWQ_LIVBLTKCOD_COD250-DNS_RAW20250228082707.CSV",
#     "https://sprgtxprod02.stni.seagate.com/~gtx/wafer/proc_LIV/data/byProdLot/QC/QCHWQ/LIV_53_QCHWQ_LIVBLTKCOD_COD250-DNS_RAW20250311164324.CSV",
# ]
# print(file_urls)

['https://sprgtxprod02.stni.seagate.com/~gtx/wafer/proc_LIV/data/byProdLot/QC/QCHY7/LIV_54_QCHY7_DNS-LIVTKCOD_LCRVCOD250-DNS_RAW20250312115454.CSV']


# Transform Data to Desired Raw Sweep Format

- selects required columns
- transposes
- stacks data in tall format
- adds in device coords from decoder file
- loops for every csv file chosen, and stores raw_sweep dataframes

In [3]:
def transform_raw_file(file_url, decoder_file_path):
    # Read the CSV file from the URL, skipping the first 19 rows
    df = pd.read_csv(file_url, skiprows=19)
    # Read the CSV file again to extract the second row
    header_df = pd.read_csv(file_url, nrows=2)
    # Extract the wafer id from the second row
    wafer_id = header_df.iloc[1, 1]
    # Print the wafer id for verification
    print(f"Wafer ID: {wafer_id}")
    # Get column names
    col_names = df.columns
    # Find columns containing "Vf" or "PD"
    selected_cols = [col for col in col_names if "Vf" in col or "PD" in col]
    # Subset the data frame with selected columns
    df_subset = df[selected_cols]
    # Find and delete columns containing "Vf@" or "PD@"
    cols_to_delete = [col for col in df_subset.columns if "Vf@" in col or "PD@" in col]
    df_subset.drop(columns=cols_to_delete, inplace=True)
    # Transpose the data frame
    df_transposed = df_subset.transpose()
    # Reset index to make the transposed columns into rows
    df_transposed.reset_index(inplace=True)
    # Add a new row at the top with the desired titles
    new_columns = ["Label"] + list(range(1, len(df_transposed.columns)))
    df_transposed.columns = new_columns
    df_transposed.loc[-1] = new_columns  # Add the new row at the top
    df_transposed.index = df_transposed.index + 1  # Shift the index
    df_transposed = df_transposed.sort_index()  # Sort by index to place the new row at the top
    # Split transposed table into Vf and PD data tables
    df_vf = df_transposed[df_transposed["Label"].str.contains("Vf")]
    df_pd = df_transposed[df_transposed["Label"].str.contains("PD")]
    # Drop the 'Label' column
    df_vf.drop(columns=["Label"], inplace=True)
    df_pd.drop(columns=["Label"], inplace=True)
    # Learn data dimensions
    n_meas = df_vf.shape[0]
    print(f"Number of Current Measurements per Device: {n_meas}")
    n_devices = df_vf.shape[1]
    print(f"Number of Devices: {n_devices}")
    # Concatenate all Voltage columns into one
    df_concat_vf = pd.concat([df_vf[col] for col in df_vf.columns], ignore_index=True).to_frame(name="Vf")
    # Create TOUCHDOWN column
    df_concat_vf["TOUCHDOWN"] = [i // n_meas + 1 for i in range(n_meas * n_devices)]
    # Concatenate all PD columns into one
    df_concat_pd = pd.concat([df_pd[col] for col in df_pd.columns], ignore_index=True).to_frame(name="PD")
    # Cartesian join of Vf and PD data tables
    df_raw_sweeps = pd.concat([df_concat_vf, df_concat_pd], axis=1)
    # Add device coordinates from original RAW file
    if "TOUCHDOWN" in df.columns and "STX_WAFER_X_UM" in df.columns and "STX_WAFER_Y_UM" in df.columns:
        df_raw_sweeps = df_raw_sweeps.merge(df[["TOUCHDOWN", "STX_WAFER_X_UM", "STX_WAFER_Y_UM"]], on="TOUCHDOWN", how="left")
    else:
        print("Required columns for merging device coordinates are missing in the original RAW file.")
    if decoder_file_path.exists():
        df_decoder = pd.read_csv(decoder_file_path)
        # Update with decoder to get TE_LABEL etc.
        if "YMIN" in df_decoder.columns and "XMIN" in df_decoder.columns:
            df_raw_sweeps = df_raw_sweeps.merge(
                df_decoder[["YMIN", "XMIN", "TE_LABEL", "TYPE"]],
                left_on=["STX_WAFER_Y_UM", "STX_WAFER_X_UM"],
                right_on=["YMIN", "XMIN"],
                how="left",
            ).drop(columns=["YMIN", "XMIN"])
        else:
            print("Required columns for merging decoder data are missing in the decoder file.")
    else:
        print(f"Decoder file not found at {decoder_file_path}")
    # Rename the columns
    df_raw_sweeps.rename(columns={"STX_WAFER_X_UM": "X_UM", "STX_WAFER_Y_UM": "Y_UM"}, inplace=True)
    # Add current column as a repeating sequence of length n_meas
    df_raw_sweeps["LDI_mA"] = [i % n_meas + 1 for i in range(len(df_raw_sweeps))]
    # Add a column for WAFER_ID with the wafer_id value repeated for every row
    df_raw_sweeps.insert(0, "WAFER_ID", wafer_id)
    return df_raw_sweeps


raw_sweeps_tables = []

warnings.filterwarnings("ignore")

# CALLING THE CODE
for file_url in file_urls:
    df_raw_sweeps = transform_raw_file(file_url, decoder_file_path)
    raw_sweeps_tables.append(df_raw_sweeps)

# Display the first 10 rows of the raw_sweeps table
print(raw_sweeps_tables[0].head(10))

Wafer ID: QCHY7
Number of Current Measurements per Device: 250
Number of Devices: 1288
  WAFER_ID        Vf  TOUCHDOWN        PD   X_UM   Y_UM TE_LABEL      TYPE  \
0    QCHY7  1.322787          1  0.020999 -32339 -48416    64N6V  BL LASER   
1    QCHY7  1.409667          1  0.022642 -32339 -48416    64N6V  BL LASER   
2    QCHY7  1.448417          1  0.027571 -32339 -48416    64N6V  BL LASER   
3    QCHY7  1.476043          1  0.019356 -32339 -48416    64N6V  BL LASER   
4    QCHY7  1.497334          1  0.007854 -32339 -48416    64N6V  BL LASER   
5    QCHY7  1.513642          1  0.007854 -32339 -48416    64N6V  BL LASER   
6    QCHY7  1.529409          1  0.016070 -32339 -48416    64N6V  BL LASER   
7    QCHY7  1.542743          1  0.020999 -32339 -48416    64N6V  BL LASER   
8    QCHY7  1.556540          1  0.019356 -32339 -48416    64N6V  BL LASER   
9    QCHY7  1.566436          1  0.016070 -32339 -48416    64N6V  BL LASER   

   LDI_mA  
0       1  
1       2  
2       3  
3     