# **MODELISATION OF CPUs**

### **GOAL :** Clean jupyter notebook for modelisation of CPUs and CPUs encapsulation materials

Imports and libraries

In [1]:
import pandas as pd
import requests
import os
from urllib.parse import urlparse
import camelot
import re
import plotly.express as px
import matplotlib.pyplot as plt
import numpy as np
import plotly.graph_objects as go
import seaborn as sns
from sklearn.linear_model import LinearRegression
from scipy.stats import pearsonr
from scipy.stats import spearmanr
from plotly.express import get_trendline_results


### **Dataset :** MPU datasheets from STMicroelectronics and MPU datasheets from Infineon

1. LGA

https://www.st.com/content/ccc/resource/quality_and_reliability/quality_certificate/material_declaration/group3/0c/c1/60/5e/77/68/41/05/DM00013099/files/MD_O7%20SO%2008%20.15%20JEDEC_E7O7%20CPU016J.pdf/jcr:content/translations/en.MD_O7%20SO%2008%20.15%20JEDEC_E7O7%20CPU016J.pdf
https://www.st.com/content/ccc/resource/quality_and_reliability/quality_certificate/material_declaration/group3/18/9a/ee/7c/2e/72/4b/97/DM00825583/files/MD_A08N%20VFLGA2.5X3X.86%2014L%20P.5%20L.475X.25_22AA%20AU36ABC_vers2_signed.pdf/jcr:content/translations/en.MD_A08N%20VFLGA2.5X3X.86%2014L%20P.5%20L.475X.25_22AA%20AU36ABC_vers2_signed.pdf
https://www.st.com/content/ccc/resource/quality_and_reliability/quality_certificate/material_declaration/group3/5d/8a/58/6b/86/12/48/d6/DM01104733/files/MD_A06R%20LGA%209.6X7.5%2018LD%20CERAMIC%20CAVITY_A2XZ%20AU23CDF_ver2-signed.pdf/jcr:content/translations/en.MD_A06R%20LGA%209.6X7.5%2018LD%20CERAMIC%20CAVITY_A2XZ%20AU23CDF_ver2-signed.pdf
https://www.st.com/content/ccc/resource/quality_and_reliability/quality_certificate/material_declaration/group3/98/a1/e0/01/27/36/4c/9c/DM01100634/files/MD_3N%20LLGA%203X3X1.0%2016L%20%20FOR%20SENSOR_D53N%20MT78ADA_ver2-signed.pdf/jcr:content/translations/en.MD_3N%20LLGA%203X3X1.0%2016L%20%20FOR%20SENSOR_D53N%20MT78ADA_ver2-signed.pdf
https://www.st.com/content/ccc/resource/quality_and_reliability/quality_certificate/material_declaration/group3/e5/55/aa/da/5a/a3/40/33/DM00076551/files/MD_0E%20HLGA%204X3X1%204LD_B50EMT69AAA%20(MP34DTW01TR)_signed.pdf/jcr:content/translations/en.MD_0E%20HLGA%204X3X1%204LD_B50EMT69AAA%20(MP34DTW01TR)_signed.pdf
https://www.st.com/content/ccc/resource/quality_and_reliability/quality_certificate/material_declaration/group3/e5/55/aa/da/5a/a3/40/33/DM00076551/files/MD_0E%20HLGA%204X3X1%204LD_B50EMT69AAA%20(MP34DTW01TR)_signed.pdf/jcr:content/translations/en.MD_0E%20HLGA%204X3X1%204LD_B50EMT69AAA%20(MP34DTW01TR)_signed.pdf
https://www.st.com/content/ccc/resource/quality_and_reliability/quality_certificate/material_declaration/group3/9f/55/1f/91/15/47/47/88/DM00150211/files/MD_A0VT_FCLGA_4X4X0.8_WRAVAA4_signed.pdf/jcr:content/translations/en.MD_A0VT_FCLGA_4X4X0.8_WRAVAA4_signed.pdf
https://www.infineon.com/row/public/documents/24/75/infineon-mcds-pasco2v15-ma006046957-materialcontentdatasheet-en.pdf

2. FCLGA

https://www.st.com/content/ccc/resource/quality_and_reliability/quality_certificate/material_declaration/group3/9f/55/1f/91/15/47/47/88/DM00150211/files/MD_A0VT_FCLGA_4X4X0.8_WRAVAA4_signed.pdf/jcr:content/translations/en.MD_A0VT_FCLGA_4X4X0.8_WRAVAA4_signed.pdf

3. BGA

https://www.st.com/content/ccc/resource/quality_and_reliability/quality_certificate/material_declaration/group3/98/a1/e0/01/27/36/4c/9c/DM01100634/files/MD_3N%20LLGA%203X3X1.0%2016L%20%20FOR%20SENSOR_D53N%20MT78ADA_ver2-signed.pdf/jcr:content/translations/en.MD_3N%20LLGA%203X3X1.0%2016L%20%20FOR%20SENSOR_D53N%20MT78ADA_ver2-signed.pdf
https://www.st.com/content/ccc/resource/quality_and_reliability/quality_certificate/material_declaration/group3/e5/55/aa/da/5a/a3/40/33/DM00076551/files/MD_0E%20HLGA%204X3X1%204LD_B50EMT69AAA%20(MP34DTW01TR)_signed.pdf/jcr:content/translations/en.MD_0E%20HLGA%204X3X1%204LD_B50EMT69AAA%20(MP34DTW01TR)_signed.pdf
https://www.st.com/content/ccc/resource/quality_and_reliability/quality_certificate/material_declaration/group3/2c/24/58/7c/0b/69/45/93/DM00089983/files/MD_A0SZ%20LGA%206.5X4X1%2028%20L%20PITCH%200.6%20MM_21DFMV0HCCB%20(LSM330DS)%20WCP%20ver2_signed.pdf/jcr:content/translations/en.MD_A0SZ%20LGA%206.5X4X1%2028%20L%20PITCH%200.6%20MM_21DFMV0HCCB%20(LSM330DS)%20WCP%20ver2_signed.pdf
https://www.infineon.com/row/public/documents/10/71/infineon-ma001226480-materialcontentsheet-en.pdf
https://www.infineon.com/row/public/documents/10/71/infineon-ma001426600-materialcontentsheet-en.pdf

(C:\Users\Arthur Resilio\Desktop\Resilio\Coding\metaboli_cpu_gpu_data\PGA_BGA_BOM.ods)


Data Loading and cleaning

In [2]:
# load_dfs.py

# Folder containing Excel files
folder_path = r"C:\Users\Arthur Resilio\Desktop\Excel_cleaned"

# List all Excel files
all_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith(".xlsx")]

dfs = []

for file in all_files:
    try:
        df = pd.read_excel(file)

        # --- 1. Clean column names ---
        df.columns = (
            df.columns.astype(str)
            .str.strip()
            .str.replace('\n', ' ', regex=False)
            .str.replace('\r', '', regex=False)
            .str.replace('\xa0', ' ', regex=False)
        )

        # --- 2. Identify date columns (keep as datetime) ---
        date_columns = [col for col in df.columns if 'date' in col.lower()]
        for col in date_columns:
            try:
                df[col] = pd.to_datetime(df[col], errors='ignore')
            except:
                pass

        # --- 3. Convert mass/weight columns to numeric ---
        mass_columns = ['Mass [mg]', 'Weight [mg]', 'Mass\n[mg]', 'Weight\n[mg]']
        for col in mass_columns:
            if col in df.columns:
                df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)

        dfs.append(df)

    except Exception as e:
        print(f"Error loading {file}: {e}")

print(f"✅ Total datasets successfully loaded: {len(dfs)}")

# Optionally save dfs to a pickle for use in the next script
pd.to_pickle(dfs, r"C:\Users\Arthur Resilio\Desktop\dfs_loaded.pkl")

# clean_dfs.py
# Load previously saved dfs
dfs = pd.read_pickle(r"C:\Users\Arthur Resilio\Desktop\dfs_loaded.pkl")

def clean_name(name):
    """Standardize text: lowercase, strip spaces, remove parentheses contents."""
    if not isinstance(name, str):
        return name
    name = name.strip()
    name = re.sub(r'\s+', ' ', name)  # collapse multiple spaces
    name = name.lower()
    name = re.sub(r'\s*\(.*?\)\s*', '', name)  # remove parentheses and contents
    return name

def compute_surface_from_first_two(size_series):
    """
    Compute surface using the first two valid numeric Size [mm] values for a given CPU.
    If only one value exists, assume square.
    """
    valid_sizes = pd.to_numeric(size_series, errors="coerce").dropna().values
    if len(valid_sizes) >= 2:
        return valid_sizes[0] * valid_sizes[1]
    elif len(valid_sizes) == 1:
        return valid_sizes[0] ** 2
    else:
        return None

def clean_bom_df(df):
    """
    Cleans a single CPU BOM DataFrame:
    - Forward fill CPU metadata (Name, Socket, Size, Date)
    - Standardize names and substances
    - Compute total mass and surface area per CPU
    """
    df = df.copy()

    # Forward fill CPU metadata
    for col in ['Name', 'Socket', 'Size [mm]', 'Date']:
        if col in df.columns:
            df[col] = df[col].ffill()

    # Standardize text columns
    for col in ['Homogeneous Material', 'Substances']:
        if col in df.columns:
            df[col] = df[col].apply(clean_name)

    # Convert mass column to numeric
    if 'Mass [mg]' in df.columns:
        df['Mass [mg]'] = pd.to_numeric(df['Mass [mg]'], errors='coerce').fillna(0)
    else:
        df['Mass [mg]'] = 0

    # ✅ Compute total CPU mass per Name
    df['CPU Total Mass [mg]'] = df.groupby('Name')['Mass [mg]'].transform('sum')

    # ✅ Compute surface per CPU from the first two valid size values
    df['Surface [mm²]'] = df.groupby('Name')['Size [mm]'].transform(compute_surface_from_first_two)

    return df

# Clean all DataFrames
dfs_cleaned = [clean_bom_df(df) for df in dfs]

# Save cleaned DataFrames
output_path = r"C:\Users\Arthur Resilio\Desktop\dfs_cleaned.pkl"
pd.to_pickle(dfs_cleaned, output_path)

print(f"Cleaned DataFrames saved with consistent total mass and 2-value-based surface computation → {output_path}")



  df[col] = pd.to_datetime(df[col], errors='ignore')
  df[col] = pd.to_datetime(df[col], errors='ignore')
  df[col] = pd.to_datetime(df[col], errors='ignore')
  df[col] = pd.to_datetime(df[col], errors='ignore')
  df[col] = pd.to_datetime(df[col], errors='ignore')
  df[col] = pd.to_datetime(df[col], errors='ignore')
  df[col] = pd.to_datetime(df[col], errors='ignore')


✅ Total datasets successfully loaded: 10
Cleaned DataFrames saved with consistent total mass and 2-value-based surface computation → C:\Users\Arthur Resilio\Desktop\dfs_cleaned.pkl


  df[col] = pd.to_datetime(df[col], errors='ignore')
  df[col] = pd.to_datetime(df[col], errors='ignore')
  df[col] = pd.to_datetime(df[col], errors='ignore')


Definition of high_level material categories, and surface and weight total

In [3]:
hm_set = set()
for df in dfs_cleaned:
    if 'Homogeneous Material' in df.columns:
        hm_set.update(df['Homogeneous Material'].astype(str).str.strip().str.lower())

# Convert to sorted list
all_hm = sorted(hm_set)

# --- Initialize sets ---
hm_lga = set()
hm_bga = set()

# --- Iterate through each dataframe ---
for df in dfs_cleaned:
    if 'Homogeneous Material' in df.columns and 'Socket' in df.columns:
        # Clean strings
        df["Homogeneous Material"] = df["Homogeneous Material"].astype(str).str.strip().str.lower()
        df["Socket"] = df["Socket"].astype(str).str.strip().str.upper()

        # Separate by socket type
        hm_lga.update(df.loc[df["Socket"] == "LGA", "Homogeneous Material"])
        hm_bga.update(df.loc[df["Socket"] == "BGA", "Homogeneous Material"])

# --- Convert to sorted lists ---
hm_lga = sorted(hm_lga)
hm_bga = sorted(hm_bga)

# --- Print results ---
print("Homogeneous Materials for LGA CPUs:")
print(", ".join(hm_lga))
print("\nHomogeneous Materials for BGA CPUs:")
print(", ".join(hm_bga))

def group_homogeneous_material(dfs_cleaned):
    # Define mapping groups
    hm_mapping = {
        'die': ['chip_1', 'chip_2', 'die', 'die attach'],
        'carrier': ['cavity', 'leadfinish', 'leadframe', 'leadframe coating', 'substrate', 'substrate metal', 'substrate plastic',],
        'encapsulation': ['encapsulation', 'lid', 'lid platings', 'metal lid', 'mold compound', 'plating'],
        'connection': ['bump', 'wire', 'underfill', 'solder paste', 'solder resists', 'solderball','underfill'],
        'finishing': ['finishing', 'glue', 'coating', '']
    }

    # Create a flat mapping dictionary (value -> high level group)
    material_to_group = {}
    for group, materials in hm_mapping.items():
        for material in materials:
            material_to_group[material] = group

    dfs_high_level = []

    for df in dfs_cleaned:
        df_copy = df.copy()

        # Clean and map homogeneous material to high level group
        df_copy['High Level Material'] = (
            df_copy['Homogeneous Material']
            .astype(str).str.lower().str.strip()
            .map(material_to_group)  # Map to high level
            .fillna('other')  # For anything not in mapping
        )

        dfs_high_level.append(df_copy)

    return dfs_high_level

Homogeneous Materials for LGA CPUs:
, bump, chip_1, chip_2, coating, die, die attach, encapsulation, finishing, glue, leadfinish, lid, lid platings, metal lid, mold compound, plating, solder paste, solder resists, substrate, substrate metal, substrate plastic, underfill, wire

Homogeneous Materials for BGA CPUs:
cavity, die, die attach, encapsulation, glue, mold compound, plating, solder paste, solderball, substrate, underfill, wire


In [4]:
dfs_high_level = group_homogeneous_material(dfs_cleaned)
dfs_high_level[1].head()

Unnamed: 0,Name,Socket,Size [mm],Nb,Date,Homogeneous Material,Material Group,Substances,CAS,Mass [mg],Average Mass [%],Sum [%],Average Mass [ppm],Sum [ppm],CPU Total Mass [mg],Surface [mm²],High Level Material
0,MA001426600,BGA,27.0,,2019-09-08,die,inorganic material,silicon,7440-21-3,54.869,1.73,1.73,17263,17263.0,3178.487,729.0,die
1,MA001426600,BGA,27.0,,2019-09-08,wire,non noble metal,copper,7440-50-8,4.217,0.13,0.13,1327,1327.0,3178.487,729.0,connection
2,MA001426600,BGA,1.9,,2019-09-08,encapsulation,organic material,carbon black,1333-86-4,2.38,0.07,,749,,3178.487,729.0,encapsulation
3,MA001426600,BGA,1.9,,2019-09-08,encapsulation,plastics,epoxy resin,-,146.346,4.6,,46042,,3178.487,729.0,encapsulation
4,MA001426600,BGA,1.9,,2019-09-08,encapsulation,inorganic material,silicon dioxide,60676-86-0,1041.077,32.76,37.43,327538,374329.0,3178.487,729.0,encapsulation


## **Parametric modelisation**

### **GOAL :** Create a parametric CPU model for the materials for various types of CPUs

- Socket type
- High level / homogeneous materials
- Surface
- Total Mass

In [5]:
def plot_cpu_category_relation(
    df_list,
    category,
    category_level='high',       # 'high' or 'homogeneous'
    socket_type='all',           # 'BGA', 'LGA', or 'all'
    y_value_type='mass',         # 'mass' or 'percentage'
    x_axis='mass'                # 'mass' or 'surface'
):
    """
    Plots CPU-level relationship between total mass/surface and mass (or %) of a given material category.
    Each point = one CPU. The y-axis value is the SUM of all substance masses in the chosen category for that CPU.
    Also returns the trendline formula if available.
    """

    # Combine all datasets
    df = pd.concat(df_list, ignore_index=True)

    # Optional: filter socket
    if socket_type.lower() != 'all':
        df = df[df['Socket'].str.lower() == socket_type.lower()]

    # Choose the right category column
    cat_col = 'High Level Material' if category_level == 'high' else 'Homogeneous Material'
    if cat_col not in df.columns:
        raise ValueError(f"Column '{cat_col}' not found in the DataFrame.")

    # Drop missing category rows
    df = df.dropna(subset=[cat_col, 'Mass [mg]', 'CPU Total Mass [mg]'])

    # --- STEP 1: aggregate substance mass per CPU × category ---
    cat_mass = (
        df.groupby(['Name', cat_col], as_index=False)['Mass [mg]']
        .sum()
        .rename(columns={'Mass [mg]': 'Category Mass [mg]'})
    )

    # --- STEP 2: total CPU mass & surface ---
    cpu_totals = (
        df.groupby('Name', as_index=False)[['CPU Total Mass [mg]', 'Surface [mm²]']]
        .mean()
    )

    # --- STEP 3: merge totals ---
    merged = pd.merge(cat_mass, cpu_totals, on='Name', how='left')

    # --- STEP 4: filter the chosen category ---
    subset = merged[merged[cat_col].str.lower() == category.lower()]
    if subset.empty:
        raise ValueError(f"No data found for category '{category}' at level '{category_level}'.")

    # --- STEP 5: compute y-value ---
    if y_value_type == 'percentage':
        subset['Category Value'] = subset['Category Mass [mg]'] / subset['CPU Total Mass [mg]'] * 100
        y_label = f"{category.capitalize()} mass [% of CPU]"
    else:
        subset['Category Value'] = subset['Category Mass [mg]']
        y_label = f"{category.capitalize()} mass [mg]"

    # --- STEP 6: choose x-axis variable ---
    if x_axis == 'mass':
        x_col, x_label = 'CPU Total Mass [mg]', 'CPU Total Mass [mg]'
    elif x_axis == 'surface':
        x_col, x_label = 'Surface [mm²]', 'CPU Surface [mm²]'
    else:
        raise ValueError("x_axis must be 'mass' or 'surface'.")

    # --- STEP 7: plot interactive scatter with trendline ---
    fig = px.scatter(
        subset,
        x=x_col,
        y='Category Value',
        hover_name='Name',
        title=f"{category.capitalize()} vs CPU {x_axis.capitalize()} ({y_value_type})",
        labels={'Category Value': y_label, x_col: x_label},
        color=x_col,
        color_continuous_scale='Viridis',
        trendline='ols'
    )

    # Extract trendline formula
    results = get_trendline_results(fig)
    formula = None

    if not results.empty:
        model = results.iloc[0]["px_fit_results"]
        params = model.params

        # Case 1: statsmodels Series with named params
        if hasattr(params, "index"):
            intercept = params.get("Intercept", params.iloc[0])
            slope_candidates = [k for k in params.index if k != "Intercept"]
            slope = params[slope_candidates[0]] if slope_candidates else params.iloc[1]
        # Case 2: plain NumPy array (no names)
        elif isinstance(params, (list, tuple)) or hasattr(params, "__len__"):
            intercept = params[0]
            slope = params[1] if len(params) > 1 else 0
        else:
            intercept, slope = None, None

        # Try to get R² if available
        r2 = getattr(model, "rsquared", None)
        if r2 is not None:
            formula = f"{y_label} = {intercept:.3f} + {slope:.3f} × {x_label}  (R² = {r2:.3f})"
        else:
            formula = f"{y_label} = {intercept:.3f} + {slope:.3f} × {x_label}"

    # Update figure layout
    fig.update_traces(marker=dict(size=10, opacity=0.8))
    fig.update_layout(height=600, margin=dict(l=80, r=60, t=80, b=60))

    # Show the figure
    fig.show()

    return fig, formula

In [6]:
plot_cpu_category_relation(
    dfs_high_level,
    category='carrier',
    category_level='high',
    socket_type='LGA',
    y_value_type='mass',
    x_axis='surface'
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subset['Category Value'] = subset['Category Mass [mg]']


(Figure({
     'data': [{'hovertemplate': ('<b>%{hovertext}</b><br><br>CPU' ... ' mass [mg]=%{y}<extra></extra>'),
               'hovertext': array(['A2XZ*AU23CDF ', 'D53N*MT78ADA ', 'MA006046957*PASCO2V15',
                                   'SAVT*WRAVAA4'], dtype=object),
               'legendgroup': '',
               'marker': {'color': {'bdata': 'AAAAAAAAUkAAAAAAAAAiQGdmZmZmJmhAAAAAAAAAMEA=', 'dtype': 'f8'},
                          'coloraxis': 'coloraxis',
                          'opacity': 0.8,
                          'size': 10,
                          'symbol': 'circle'},
               'mode': 'markers',
               'name': '',
               'orientation': 'v',
               'showlegend': False,
               'type': 'scatter',
               'x': {'bdata': 'AAAAAAAAUkAAAAAAAAAiQGdmZmZmJmhAAAAAAAAAMEA=', 'dtype': 'f8'},
               'xaxis': 'x',
               'y': {'bdata': 'Urgehes5b0DJdr6fGi8FQJZDi2znu05AQmDl0CLbGkA=', 'dtype': 'f8'},
               'yax

The only high level category for which we have enough points of data to fit a regression is the carrier

## **Model**

Parameters in TechPowerUp: size, socket

In [7]:
def create_cpu_model(
    dfs_cleaned,
    dfs_high_level,
    selected_materials=None,  # dict: {high_level: [sub_materials]} or list of high-level materials
    high_level=True,
    show_BGA=True,
    show_LGA=True
):
    """
    Build a CPU model using either dfs_cleaned (all homogeneous materials)
    or dfs_high_level (5 high-level categories) depending on high_level argument.
    
    selected_materials:
        - If high_level=True: dict keys are high-level categories, values are sub-materials (optional)
        - If high_level=False: dict values are homogeneous materials
    """

    # Select dataset
    dfs = dfs_high_level if high_level else dfs_cleaned

    # Merge
    df_all = pd.concat(dfs, ignore_index=True)

    # Clean numeric and string columns
    df_all["Mass [mg]"] = pd.to_numeric(df_all["Mass [mg]"], errors="coerce")
    df_all = df_all.dropna(subset=["Mass [mg]", "Substances", "Homogeneous Material"])
    df_all["Socket"] = df_all["Socket"].astype(str).str.upper().str.strip()

    if high_level:
        material_col = "High Level Material"
        df_all[material_col] = df_all[material_col].astype(str).str.lower().str.strip()
    else:
        material_col = "Homogeneous Material"
        df_all[material_col] = df_all[material_col].astype(str).str.lower().str.strip()

    # Clean homogeneous material
    df_all["Homogeneous Material"] = df_all["Homogeneous Material"].astype(str).str.lower().str.strip()
    df_all["Substances"] = df_all["Substances"].astype(str).str.lower().str.strip()

    # Filter by socket
    sockets_to_keep = []
    if show_BGA: sockets_to_keep.append("BGA")
    if show_LGA: sockets_to_keep.append("LGA")
    if not sockets_to_keep:
        print("⚠ No sockets selected")
        return pd.DataFrame()
    df_all = df_all[df_all["Socket"].isin(sockets_to_keep)]

    # Filter by selected materials
    if selected_materials:
        mask = pd.Series([False]*len(df_all))
        for hl_cat, subs in selected_materials.items():
            hl_cat = hl_cat.lower().strip()
            subs = [s.lower().strip() for s in subs]
            if high_level:
                # Filter High-Level Material and optionally sub-materials
                if subs:
                    mask = mask | ((df_all[material_col] == hl_cat) &
                                   (df_all["Homogeneous Material"].isin(subs)))
                else:
                    mask = mask | (df_all[material_col] == hl_cat)
            else:
                # Filter Homogeneous Material only
                mask = mask | (df_all["Homogeneous Material"].isin(subs))
        df_all = df_all[mask]

    if df_all.empty:
        print("⚠ No data matches the selected materials and sockets.")
        return pd.DataFrame()

    # Compute mean per (High-Level Material, Substance) or (Homogeneous Material, Substance)
    df_model = df_all.groupby([material_col, "Substances"], as_index=False)["Mass [mg]"].mean()

    # Normalize to 100%
    total_mass = df_model["Mass [mg]"].sum()
    df_model["Mass %"] = df_model["Mass [mg]"] / total_mass * 100 if total_mass > 0 else 0

    return df_model.sort_values(by=material_col)

In [8]:
def plot_cpu_model(cpu_df, material_col="High Level Material"):
    if cpu_df.empty:
        print("⚠ No data to plot (empty CPU model).")
        return

    # --- Absolute mass plot ---
    fig_mass = px.bar(
        cpu_df,
        x=material_col,
        y="Mass [mg]",
        color="Substances",
        title="CPU Composition – Absolute Mass (mg)",
        barmode="stack",
        hover_data=["Mass %"]
    )
    fig_mass.update_layout(yaxis_title="Mass [mg]", hovermode="x unified")
    fig_mass.show()

    # --- Percentage plot (always 100%) ---
    fig_percent = px.bar(
        cpu_df,
        x=material_col,
        y="Mass %",
        color="Substances",
        title="CPU Composition – Percentage (%)",
        barmode="stack",
        hover_data=["Mass [mg]"]
    )
    fig_percent.update_layout(yaxis_title="Mass [%]", yaxis_range=[0, 100], hovermode="x unified")
    fig_percent.show()

Standard CPU model

In [9]:
cpu_standard = create_cpu_model(
    dfs_cleaned=dfs_cleaned,
    dfs_high_level=dfs_high_level,
    selected_materials=None,  # Use all materials
    high_level=True,          # High-level categories
    show_BGA=True,
    show_LGA=True
)
plot_cpu_model(cpu_standard)

Flip Chip Land Grid Array CPU

In [10]:
flip_chip_materials = {
    "connection": ["solder paste", "solder resists"],
    "die": ["die", "chip_1", "chip_2"],
    "encapsulation": ["metal lid", "lid plating", "lid", "plating", "underfill", "bump"],
    "carrier": ["substrate", "substrate metal", "substrate plastic"],
    "finishing": ["leadfinish", "finishing"]
}

In [11]:
cpu_fclga = create_cpu_model(
    dfs_cleaned=dfs_cleaned,
    dfs_high_level=dfs_high_level,
    selected_materials=flip_chip_materials,
    high_level=False,    # Use homogeneous materials
    show_BGA=False,
    show_LGA=True
)

# Use correct material column for plotting
plot_cpu_model(cpu_fclga, material_col="Homogeneous Material")


Boolean Series key will be reindexed to match DataFrame index.



### Adding the parameters 
- Surface for LGA using relation from above

In [12]:
def create_cpu_model(
    dfs_cleaned,
    dfs_high_level,
    selected_materials=None,  # dict: {high_level: [sub_materials]} or list of high-level materials
    high_level=True,
    show_BGA=True,
    show_LGA=True,
    surface=None  # NEW: CPU surface area [mm²] to scale carrier mass (LGA only)
):
    """
    Build a CPU material model.

    If `surface` is provided, scales the total carrier mass according to:
        Carrier mass [mg] = 57.753 + 0.309 * CPU Surface [mm²]
    but **only for LGA sockets**.
    The internal carrier substance ratios are kept constant.
    """

    # Select dataset
    dfs = dfs_high_level if high_level else dfs_cleaned
    df_all = pd.concat(dfs, ignore_index=True)

    # Clean numeric and string columns
    df_all["Mass [mg]"] = pd.to_numeric(df_all["Mass [mg]"], errors="coerce")
    df_all = df_all.dropna(subset=["Mass [mg]", "Substances", "Homogeneous Material"])
    df_all["Socket"] = df_all["Socket"].astype(str).str.upper().str.strip()

    # Normalize material column names
    if high_level:
        material_col = "High Level Material"
        df_all[material_col] = df_all[material_col].astype(str).str.lower().str.strip()
    else:
        material_col = "Homogeneous Material"
        df_all[material_col] = df_all[material_col].astype(str).str.lower().str.strip()

    df_all["Homogeneous Material"] = df_all["Homogeneous Material"].astype(str).str.lower().str.strip()
    df_all["Substances"] = df_all["Substances"].astype(str).str.lower().str.strip()

    # Filter by socket
    sockets_to_keep = []
    if show_BGA:
        sockets_to_keep.append("BGA")
    if show_LGA:
        sockets_to_keep.append("LGA")

    if not sockets_to_keep:
        print("⚠ No sockets selected")
        return pd.DataFrame()

    df_all = df_all[df_all["Socket"].isin(sockets_to_keep)]

    # Filter by selected materials
    if selected_materials:
        mask = pd.Series([False]*len(df_all))
        for hl_cat, subs in selected_materials.items():
            hl_cat = hl_cat.lower().strip()
            subs = [s.lower().strip() for s in subs]
            if high_level:
                if subs:
                    mask = mask | ((df_all[material_col] == hl_cat) &
                                   (df_all["Homogeneous Material"].isin(subs)))
                else:
                    mask = mask | (df_all[material_col] == hl_cat)
            else:
                mask = mask | (df_all["Homogeneous Material"].isin(subs))
        df_all = df_all[mask]

    if df_all.empty:
        print("⚠ No data matches the selected materials and sockets.")
        return pd.DataFrame()

    # Compute mean per material/substance
    df_model = df_all.groupby([material_col, "Substances", "Socket"], as_index=False)["Mass [mg]"].mean()

    # --- NEW: Apply carrier scaling only for LGA ---
    if surface is not None and "LGA" in sockets_to_keep:
        # Compute desired total carrier mass for LGA
        target_carrier_mass = 57.753 + 0.309 * surface

        # Identify LGA rows for carrier materials
        carrier_mask = (
            (df_model["Socket"] == "LGA") &
            (df_model[material_col].str.contains("carrier|substrate", case=False, regex=True))
        )

        current_carrier_mass = df_model.loc[carrier_mask, "Mass [mg]"].sum()

        if current_carrier_mass > 0:
            scale_factor = target_carrier_mass / current_carrier_mass
            df_model.loc[carrier_mask, "Mass [mg]"] *= scale_factor
        else:
            print("⚠ No carrier material found for LGA to scale.")

    # Drop socket column after scaling
    df_model = df_model.drop(columns=["Socket"], errors="ignore")

    # Normalize to 100%
    total_mass = df_model["Mass [mg]"].sum()
    df_model["Mass %"] = df_model["Mass [mg]"] / total_mass * 100 if total_mass > 0 else 0

    return df_model.sort_values(by=material_col)


In [13]:
def plot_cpu_model(cpu_df, material_col="High Level Material"):
    if cpu_df.empty:
        print("⚠ No data to plot (empty CPU model).")
        return

    # Common layout options
    layout_common = dict(
        hovermode="x unified",
        legend=dict(
            title="Substances",
            font=dict(size=12),
            bgcolor="rgba(255,255,255,0.8)",
            bordercolor="rgba(0,0,0,0.2)",
            borderwidth=1
        ),
        margin=dict(l=80, r=80, t=100, b=80),
        font=dict(size=14),
    )

    # --- Absolute mass plot ---
    fig_mass = px.bar(
        cpu_df,
        x=material_col,
        y="Mass [mg]",
        color="Substances",
        title="CPU Composition – Absolute Mass (mg)",
        barmode="stack",
        hover_data={
            "Mass [mg]": True,
            "Mass %": True,
            material_col: True
        },
        width=1200,   # Larger plot width
        height=700    # Larger plot height
    )
    fig_mass.update_layout(yaxis_title="Mass [mg]", **layout_common)
    fig_mass.show()

    # --- Percentage plot (always 100%) ---
    fig_percent = px.bar(
        cpu_df,
        x=material_col,
        y="Mass %",
        color="Substances",
        title="CPU Composition – Percentage (%)",
        barmode="stack",
        hover_data={
            "Mass [mg]": True,
            "Mass %": True,
            material_col: True
        },
        width=1200,
        height=800
    )
    fig_percent.update_layout(
        yaxis_title="Mass [%]",
        yaxis_range=[0, 100],
        **layout_common
    )
    fig_percent.show()

In [14]:
cpu_fclga = create_cpu_model(
    dfs_cleaned=dfs_cleaned,
    dfs_high_level=dfs_high_level,
    selected_materials=flip_chip_materials,
    high_level=False,
    show_BGA=False,
    show_LGA=True,
    surface=300  # CPU surface in mm²
)

plot_cpu_model(cpu_fclga, material_col="Homogeneous Material")


Boolean Series key will be reindexed to match DataFrame index.



In [15]:
def list_substances(cpu_df, material_name):
    """Print the unique substances for a given homogeneous material."""
    if cpu_df.empty:
        print("⚠ No data available.")
        return

    # Filter for the given material
    subset = cpu_df[cpu_df["Homogeneous Material"] == material_name]

    if subset.empty:
        print(f"⚠ No data found for material: {material_name}")
        return

    # Get unique substances
    substances = subset["Substances"].unique()

    print(f"Substances in '{material_name}':")
    for s in substances:
        print(f" - {s}")

# Example usage:
list_substances(cpu_fclga, "bump")

Substances in 'bump':
 - copper
 - silver
 - tin


In [16]:
def list_masses(cpu_df, material_name):
    """Print only the mass values (mg) for a given homogeneous material."""
    subset = cpu_df[cpu_df["Homogeneous Material"] == material_name]

    if subset.empty:
        print(f"⚠ No data found for material: {material_name}")
        return

    for mass in subset["Mass [mg]"]:
        print(mass)

list_masses(cpu_fclga, "encapsulation")

⚠ No data found for material: encapsulation
