In [1]:
import pandas as pd
import requests
import os
from urllib.parse import urlparse
import camelot
import re
import plotly.express as px
import matplotlib.pyplot as plt
import numpy as np
import plotly.graph_objects as go
import seaborn as sns
from sklearn.linear_model import LinearRegression
from scipy.stats import pearsonr
from scipy.stats import spearmanr
from plotly.express import get_trendline_results


In [2]:
# load_dfs.py

# Folder containing Excel files
folder_path = r"C:\Users\Arthur Resilio\Desktop\Excel_cleaned"

# List all Excel files
all_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith(".xlsx")]

dfs = []

for file in all_files:
    try:
        df = pd.read_excel(file)

        # --- 1. Clean column names ---
        df.columns = (
            df.columns.astype(str)
            .str.strip()
            .str.replace('\n', ' ', regex=False)
            .str.replace('\r', '', regex=False)
            .str.replace('\xa0', ' ', regex=False)
        )

        # --- 2. Identify date columns (keep as datetime) ---
        date_columns = [col for col in df.columns if 'date' in col.lower()]
        for col in date_columns:
            try:
                df[col] = pd.to_datetime(df[col], errors='ignore')
            except:
                pass

        # --- 3. Convert mass/weight columns to numeric ---
        mass_columns = ['Mass [mg]', 'Weight [mg]', 'Mass\n[mg]', 'Weight\n[mg]']
        for col in mass_columns:
            if col in df.columns:
                df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)

        dfs.append(df)

    except Exception as e:
        print(f"Error loading {file}: {e}")

print(f"✅ Total datasets successfully loaded: {len(dfs)}")

# Optionally save dfs to a pickle for use in the next script
pd.to_pickle(dfs, r"C:\Users\Arthur Resilio\Desktop\dfs_loaded.pkl")

# clean_dfs.py
# Load previously saved dfs
dfs = pd.read_pickle(r"C:\Users\Arthur Resilio\Desktop\dfs_loaded.pkl")

def clean_name(name):
    """Standardize text: lowercase, strip spaces, remove parentheses contents."""
    if not isinstance(name, str):
        return name
    name = name.strip()
    name = re.sub(r'\s+', ' ', name)  # collapse multiple spaces
    name = name.lower()
    name = re.sub(r'\s*\(.*?\)\s*', '', name)  # remove parentheses and contents
    return name

def compute_surface_from_first_two(size_series):
    """
    Compute surface using the first two valid numeric Size [mm] values for a given CPU.
    If only one value exists, assume square.
    """
    valid_sizes = pd.to_numeric(size_series, errors="coerce").dropna().values
    if len(valid_sizes) >= 2:
        return valid_sizes[0] * valid_sizes[1]
    elif len(valid_sizes) == 1:
        return valid_sizes[0] ** 2
    else:
        return None

def clean_bom_df(df):
    """
    Cleans a single CPU BOM DataFrame:
    - Forward fill CPU metadata (Name, Socket, Size, Date)
    - Standardize names and substances
    - Compute total mass and surface area per CPU
    """
    df = df.copy()

    # Forward fill CPU metadata
    for col in ['Name', 'Socket', 'Size [mm]', 'Date']:
        if col in df.columns:
            df[col] = df[col].ffill()

    # Standardize text columns
    for col in ['Homogeneous Material', 'Substances']:
        if col in df.columns:
            df[col] = df[col].apply(clean_name)

    # Convert mass column to numeric
    if 'Mass [mg]' in df.columns:
        df['Mass [mg]'] = pd.to_numeric(df['Mass [mg]'], errors='coerce').fillna(0)
    else:
        df['Mass [mg]'] = 0

    # ✅ Compute total CPU mass per Name
    df['CPU Total Mass [mg]'] = df.groupby('Name')['Mass [mg]'].transform('sum')

    # ✅ Compute surface per CPU from the first two valid size values
    df['Surface [mm²]'] = df.groupby('Name')['Size [mm]'].transform(compute_surface_from_first_two)

    return df

# Clean all DataFrames
dfs_high_level = [clean_bom_df(df) for df in dfs]

# Save cleaned DataFrames
output_path = r"C:\Users\Arthur Resilio\Desktop\dfs_cleaned.pkl"
pd.to_pickle(dfs_high_level, output_path)

# print(f"Cleaned DataFrames saved with consistent total mass and 2-value-based surface computation → {output_path}")


  df[col] = pd.to_datetime(df[col], errors='ignore')
  df[col] = pd.to_datetime(df[col], errors='ignore')
  df[col] = pd.to_datetime(df[col], errors='ignore')
  df[col] = pd.to_datetime(df[col], errors='ignore')
  df[col] = pd.to_datetime(df[col], errors='ignore')
  df[col] = pd.to_datetime(df[col], errors='ignore')
  df[col] = pd.to_datetime(df[col], errors='ignore')


✅ Total datasets successfully loaded: 10


  df[col] = pd.to_datetime(df[col], errors='ignore')
  df[col] = pd.to_datetime(df[col], errors='ignore')
  df[col] = pd.to_datetime(df[col], errors='ignore')


In [3]:
hm_set = set()
for df in dfs_high_level:
    if 'Homogeneous Material' in df.columns:
        hm_set.update(df['Homogeneous Material'].astype(str).str.strip().str.lower())

# Convert to sorted list
all_hm = sorted(hm_set)

# --- Initialize sets ---
hm_lga = set()
hm_bga = set()

# --- Iterate through each dataframe ---
for df in dfs_high_level:
    if 'Homogeneous Material' in df.columns and 'Socket' in df.columns:
        # Clean strings
        df["Homogeneous Material"] = df["Homogeneous Material"].astype(str).str.strip().str.lower()
        df["Socket"] = df["Socket"].astype(str).str.strip().str.upper()

        # Separate by socket type
        hm_lga.update(df.loc[df["Socket"] == "LGA", "Homogeneous Material"])
        hm_bga.update(df.loc[df["Socket"] == "BGA", "Homogeneous Material"])

# --- Convert to sorted lists ---
hm_lga = sorted(hm_lga)
hm_bga = sorted(hm_bga)

# --- Print results ---
print("Homogeneous Materials for LGA CPUs:")
print(", ".join(hm_lga))
print("\nHomogeneous Materials for BGA CPUs:")
print(", ".join(hm_bga))

def group_homogeneous_material(dfs_cleaned):
    # Define mapping groups
    hm_mapping = {
        'die': ['chip_1', 'chip_2', 'die', 'die attach'],
        'carrier': ['cavity', 'leadfinish', 'leadframe', 'leadframe coating', 'substrate', 'substrate metal', 'substrate plastic',],
        'encapsulation': ['encapsulation', 'lid', 'lid platings', 'metal lid', 'mold compound', 'plating'],
        'connection': ['bump', 'wire', 'underfill', 'solder paste', 'solder resists', 'solderball','underfill'],
        'finishing': ['finishing', 'glue', 'coating', '']
    }

    # Create a flat mapping dictionary (value -> high level group)
    material_to_group = {}
    for group, materials in hm_mapping.items():
        for material in materials:
            material_to_group[material] = group

    dfs_high_level = []

    for df in dfs_cleaned:
        df_copy = df.copy()

        # Clean and map homogeneous material to high level group
        df_copy['High Level Material'] = (
            df_copy['Homogeneous Material']
            .astype(str).str.lower().str.strip()
            .map(material_to_group)  # Map to high level
            .fillna('other')  # For anything not in mapping
        )

        dfs_high_level.append(df_copy)

    return dfs_high_level

Homogeneous Materials for LGA CPUs:
, bump, chip_1, chip_2, coating, die, die attach, encapsulation, finishing, glue, leadfinish, lid, lid platings, metal lid, mold compound, plating, solder paste, solder resists, substrate, substrate metal, substrate plastic, underfill, wire

Homogeneous Materials for BGA CPUs:
cavity, die, die attach, encapsulation, glue, mold compound, plating, solder paste, solderball, substrate, underfill, wire


In [4]:
dfs_high_level = group_homogeneous_material(dfs_high_level)
dfs_high_level[8].head()

Unnamed: 0,Name,Socket,Size [mm],Nb,Date,Homogeneous Material,Material Group,Component mass [mg],Level,Substance Category,Substances,CAS,Mass [mg],Concentration in homogeneous material (ppm),Concentration in product (ppm),CPU Total Mass [mg],Surface [mm²],High Level Material
0,SAVT*WRAVAA4,LGA,4.0,24.0,2014-11-25,die,Other inorganic materials,7.757,supplier,die,silicon,7440-21-3,0.8,103133,27221,29.389,16.0,die
1,SAVT*WRAVAA4,LGA,4.0,,2014-11-25,die,,,supplier,metallisation,aluminium,7429-90-5,0.003,387,102,29.389,16.0,die
2,SAVT*WRAVAA4,LGA,0.8,,2014-11-25,die,,,supplier,metallisation,copper,7440-50-8,0.061,7864,2076,29.389,16.0,die
3,SAVT*WRAVAA4,LGA,0.8,,2014-11-25,die,,,supplier,metallisation,tantalum,7440-25-7,0.003,387,102,29.389,16.0,die
4,SAVT*WRAVAA4,LGA,0.8,,2014-11-25,die,,,supplier,metallisation,titanium,7440-32-6,0.001,129,34,29.389,16.0,die


In [5]:
def plot_cpu_category_relation(
    df_list,
    category,
    category_level='high',       # 'high' or 'homogeneous'
    socket_type='all',           # 'BGA', 'LGA', or 'all'
    y_value_type='mass',         # 'mass' or 'percentage'
    x_axis='mass'                # 'mass' or 'surface'
):
    """
    Plots CPU-level relationship between total mass/surface and mass (or %) of a given material category.
    Each point = one CPU. The y-axis value is the SUM of all substance masses in the chosen category for that CPU.
    Also returns the trendline formula if available.
    """

    # Combine all datasets
    df = pd.concat(df_list, ignore_index=True)

    # Optional: filter socket
    if socket_type.lower() != 'all':
        df = df[df['Socket'].str.lower() == socket_type.lower()]

    # Choose the right category column
    cat_col = 'High Level Material' if category_level == 'high' else 'Homogeneous Material'
    if cat_col not in df.columns:
        raise ValueError(f"Column '{cat_col}' not found in the DataFrame.")

    # Drop missing category rows
    df = df.dropna(subset=[cat_col, 'Mass [mg]', 'CPU Total Mass [mg]'])

    # --- STEP 1: aggregate substance mass per CPU × category ---
    cat_mass = (
        df.groupby(['Name', cat_col], as_index=False)['Mass [mg]']
        .sum()
        .rename(columns={'Mass [mg]': 'Category Mass [mg]'})
    )

    # --- STEP 2: total CPU mass & surface ---
    cpu_totals = (
        df.groupby('Name', as_index=False)[['CPU Total Mass [mg]', 'Surface [mm²]']]
        .mean()
    )

    # --- STEP 3: merge totals ---
    merged = pd.merge(cat_mass, cpu_totals, on='Name', how='left')

    # --- STEP 4: filter the chosen category ---
    subset = merged[merged[cat_col].str.lower() == category.lower()]
    if subset.empty:
        raise ValueError(f"No data found for category '{category}' at level '{category_level}'.")

    # --- STEP 5: compute y-value ---
    if y_value_type == 'percentage':
        subset['Category Value'] = subset['Category Mass [mg]'] / subset['CPU Total Mass [mg]'] * 100
        y_label = f"{category.capitalize()} mass [% of CPU]"
    else:
        subset['Category Value'] = subset['Category Mass [mg]']
        y_label = f"{category.capitalize()} mass [mg]"

    # --- STEP 6: choose x-axis variable ---
    if x_axis == 'mass':
        x_col, x_label = 'CPU Total Mass [mg]', 'CPU Total Mass [mg]'
    elif x_axis == 'surface':
        x_col, x_label = 'Surface [mm²]', 'CPU Surface [mm²]'
    else:
        raise ValueError("x_axis must be 'mass' or 'surface'.")

    # --- STEP 7: plot interactive scatter with trendline ---
    fig = px.scatter(
        subset,
        x=x_col,
        y='Category Value',
        hover_name='Name',
        title=f"{category.capitalize()} vs CPU {x_axis.capitalize()} ({y_value_type})",
        labels={'Category Value': y_label, x_col: x_label},
        color=x_col,
        color_continuous_scale='Viridis',
        trendline='ols'
    )

    # Extract trendline formula
    results = get_trendline_results(fig)
    formula = None

    if not results.empty:
        model = results.iloc[0]["px_fit_results"]
        params = model.params

        # Case 1: statsmodels Series with named params
        if hasattr(params, "index"):
            intercept = params.get("Intercept", params.iloc[0])
            slope_candidates = [k for k in params.index if k != "Intercept"]
            slope = params[slope_candidates[0]] if slope_candidates else params.iloc[1]
        # Case 2: plain NumPy array (no names)
        elif isinstance(params, (list, tuple)) or hasattr(params, "__len__"):
            intercept = params[0]
            slope = params[1] if len(params) > 1 else 0
        else:
            intercept, slope = None, None

        # Try to get R² if available
        r2 = getattr(model, "rsquared", None)
        if r2 is not None:
            formula = f"{y_label} = {intercept:.3f} + {slope:.3f} × {x_label}  (R² = {r2:.3f})"
        else:
            formula = f"{y_label} = {intercept:.3f} + {slope:.3f} × {x_label}"

    # Update figure layout
    fig.update_traces(marker=dict(size=10, opacity=0.8))
    fig.update_layout(height=600, margin=dict(l=80, r=60, t=80, b=60))

    # Show the figure
    fig.show()

    return fig, formula

In [6]:
plot_cpu_category_relation(
    dfs_high_level,
    category='carrier',
    category_level='high',
    socket_type='LGA',
    y_value_type='mass',
    x_axis='surface'
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subset['Category Value'] = subset['Category Mass [mg]']


(Figure({
     'data': [{'hovertemplate': ('<b>%{hovertext}</b><br><br>CPU' ... ' mass [mg]=%{y}<extra></extra>'),
               'hovertext': array(['A2XZ*AU23CDF ', 'D53N*MT78ADA ', 'MA006046957*PASCO2V15',
                                   'SAVT*WRAVAA4'], dtype=object),
               'legendgroup': '',
               'marker': {'color': {'bdata': 'AAAAAAAAUkAAAAAAAAAiQGdmZmZmJmhAAAAAAAAAMEA=', 'dtype': 'f8'},
                          'coloraxis': 'coloraxis',
                          'opacity': 0.8,
                          'size': 10,
                          'symbol': 'circle'},
               'mode': 'markers',
               'name': '',
               'orientation': 'v',
               'showlegend': False,
               'type': 'scatter',
               'x': {'bdata': 'AAAAAAAAUkAAAAAAAAAiQGdmZmZmJmhAAAAAAAAAMEA=', 'dtype': 'f8'},
               'xaxis': 'x',
               'y': {'bdata': 'Urgehes5b0DJdr6fGi8FQJZDi2znu05AQmDl0CLbGkA=', 'dtype': 'f8'},
               'yax

In [7]:
import numpy as np
import pandas as pd

def calculate_alpha_values_with_substrate(dfs_high_level, socket_type="LGA", surface_mm2=100.0, cpu_weight=200.0):
    """
    Calculate alpha values for high-level components considering the substrate mass.

    Parameters:
    - dfs_high_level: List of DataFrames containing the high-level material data.
    - socket_type: Type of socket ("LGA" or "BGA").
    - surface_mm2: Surface area in square millimeters.
    - cpu_weight: Total CPU weight in milligrams.

    Returns:
    - Dictionary of alpha values for each high-level component.
    """

    # Combine all input dataframes
    df = pd.concat(dfs_high_level, ignore_index=True)

    # Filter by socket type
    df = df[df['Socket'].str.upper() == socket_type.upper()]

    # Calculate substrate mass
    if socket_type == "LGA":
        substrate_mass = 57.753 + 0.309 * surface_mm2
    elif socket_type == "BGA":
        substrate_mass = -34.658 + 1.895 * surface_mm2
    else:
        raise ValueError("Socket type must be 'LGA' or 'BGA'")

    # Calculate remaining mass
    remaining_mass = cpu_weight - substrate_mass

    # Group by 'High Level Material' and sum the 'Mass [mg]'
    grouped = df.groupby('High Level Material', as_index=False)['Mass [mg]'].sum()

    # Exclude substrate from the grouped data to calculate proportions for other components
    other_components = grouped[grouped['High Level Material'] != 'carrier']

    # Calculate total mass of other components
    total_other_mass = other_components['Mass [mg]'].sum()

    # Calculate proportions for other components
    other_components['proportion'] = other_components['Mass [mg]'] / total_other_mass

    # Allocate remaining mass to other components based on their proportions
    other_components['allocated_mass'] = other_components['proportion'] * remaining_mass

    # Combine substrate mass with allocated masses for other components
    alpha_dict = {'carrier': substrate_mass / cpu_weight}
    for _, row in other_components.iterrows():
        alpha_dict[row['High Level Material']] = row['allocated_mass'] / cpu_weight

    return alpha_dict

def calculate_element_fractions(dfs_high_level, socket_type="LGA"):
    """
    Calculate element fractions for each high-level component based on the datasets.

    Parameters:
    - dfs_high_level: List of DataFrames containing the high-level material data.
    - socket_type: Type of socket ("LGA" or "BGA").

    Returns:
    - Dictionary of element fractions for each high-level component.
    """

    # Combine all input dataframes
    df = pd.concat(dfs_high_level, ignore_index=True)

    # Filter by socket type
    df = df[df['Socket'].str.upper() == socket_type.upper()]

    # Group by 'High Level Material' and 'Substances', then sum the 'Mass [mg]'
    grouped = df.groupby(['High Level Material', 'Substances'], as_index=False)['Mass [mg]'].sum()

    # Calculate total mass for each 'High Level Material'
    total_mass_per_component = grouped.groupby('High Level Material')['Mass [mg]'].transform('sum')

    # Calculate element fractions
    grouped['fraction'] = grouped['Mass [mg]'] / total_mass_per_component

    # Convert to a nested dictionary
    element_fractions = {}
    for component in grouped['High Level Material'].unique():
        component_data = grouped[grouped['High Level Material'] == component]
        element_fractions[component] = component_data.set_index('Substances')['fraction'].to_dict()

    return element_fractions

def f(surface_mm2, socket_type):
    """
    Calculate substrate mass based on surface area and socket type.

    Parameters:
    - surface_mm2: Surface area in square millimeters.
    - socket_type: Type of socket ("LGA" or "BGA").

    Returns:
    - Substrate mass in milligrams.
    """
    if socket_type == "LGA":
        substrate_mass = 57.753 + 0.309 * surface_mm2
    elif socket_type == "BGA":
        substrate_mass = -34.658 + 1.895 * surface_mm2
    else:
        raise ValueError("Socket type must be 'LGA' or 'BGA'")
    return substrate_mass

def g(substrate_mass, cpu_weight, alpha_dict):
    """
    Calculate masses of high-level components based on substrate mass and CPU weight.

    Parameters:
    - substrate_mass: Mass of the substrate in milligrams.
    - cpu_weight: Total CPU weight in milligrams.
    - alpha_dict: Dictionary of alpha values for each high-level component.

    Returns:
    - Dictionary of masses for each high-level component.
    """
    component_masses = {}
    for component, alpha in alpha_dict.items():
        component_masses[component] = alpha * cpu_weight

    return component_masses

def h(component_masses, element_fractions):
    """
    Calculate masses of elements within each high-level component.

    Parameters:
    - component_masses: Dictionary of masses for each high-level component.
    - element_fractions: Dictionary of element fractions for each high-level component.

    Returns:
    - Dictionary of element masses for each high-level component.
    """
    element_masses = {}
    for component, mass in component_masses.items():
        if component in element_fractions:
            element_masses[component] = {}
            for element, fraction in element_fractions[component].items():
                element_masses[component][element] = fraction * mass

    return element_masses

def calculate_uncertainty_matrices(dfs_high_level, socket_type="LGA"):
    """
    Calculate uncertainty matrices for the functions f, g, and h.

    Parameters:
    - dfs_high_level: List of DataFrames containing the high-level material data.
    - socket_type: Type of socket ("LGA" or "BGA").

    Returns:
    - Uncertainty matrices for f, g, and h.
    """
    # Placeholder for uncertainty matrices
    sigma_f = np.array([[0.1, 0.01], [0.01, 0.1]])  # Example covariance matrix for f
    sigma_g = np.diag([0.01, 0.01, 0.01, 0.01])  # Example covariance matrix for g
    sigma_h = {
        'carrier': np.diag([0.01, 0.01, 0.01]),
        'connection': np.diag([0.01, 0.01]),
        'die': np.diag([0.01, 0.01]),
        'encapsulation': np.diag([0.01, 0.01]),
        'finishing': np.diag([0.01, 0.01])
    }  # Example covariance matrices for h

    return sigma_f, sigma_g, sigma_h


In [8]:
# Example usage
surface_mm2 = 4256
socket_type = "LGA"
cpu_weight = 2800

# Calculate alpha values
alpha_values = calculate_alpha_values_with_substrate(dfs_high_level, socket_type, surface_mm2, cpu_weight)
print("Alpha Values:", alpha_values)

# Calculate element fractions
element_fractions = calculate_element_fractions(dfs_high_level, socket_type)
print("Element Fractions:", element_fractions)

# Calculate substrate mass
substrate_mass = f(surface_mm2, socket_type)
print("Substrate Mass:", substrate_mass)

# Calculate component masses
component_masses = g(substrate_mass, cpu_weight, alpha_values)
print("Component Masses:", component_masses)

# Calculate element masses
element_masses = h(component_masses, element_fractions)
print("Element Masses:", element_masses)

# Calculate uncertainty matrices
sigma_f, sigma_g, sigma_h = calculate_uncertainty_matrices(dfs_high_level, socket_type)
print("Sigma f:", sigma_f)
print("Sigma g:", sigma_g)
print("Sigma h:", sigma_h)

Alpha Values: {'carrier': 0.4903060714285714, 'connection': 0.0024898125006616407, 'die': 0.16771939025239727, 'encapsulation': 0.33327530265227784, 'finishing': 0.006209423166091834}
Element Fractions: {'carrier': {'alumina': 0.10055202095808381, 'aluminium hydroxide': 0.0014938872255489022, 'aluminiumhydroxide': 0.002863023952095808, 'aluminum oxide': 0.6448478043912175, 'amorphous silica': 0.004107410179640718, 'barium sulfate': 0.0020053642714570855, 'bismaleimide polymer': 0.0007859281437125748, 'bismaleimide triazine resin': 0.002601047904191616, 'bismethane': 0.0009605788423153692, 'bisphenol f type epoxy resin': 0.0006175149700598802, 'boron': 3.1187624750499e-06, 'calcium oxide': 0.004194735528942115, 'calcium sulfate': 2.8068862275449095e-05, 'calciumoxide': 0.00033370758483033925, 'chromium oxide': 0.03356412175648702, 'chromiumoxide': 0.0043226047904191605, 'copper': 0.02561439620758483, 'copper phthalocyanine bule': 9.356287425149699e-06, 'cu': 0.00498378243512974, 'fiber 



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [9]:
def element_masses_to_api_text(element_masses):
    """
    Convert element masses to a formatted text message for API usage.

    Parameters:
    - element_masses: Dictionary of element masses for each high-level component.

    Returns:
    - Formatted text message for API usage.
    """
    DIRECT_MAPPING = {
        "gold": "Gold()",
        "nickel": "Nickel()",
        "alumina": "AluminiumOxide()",
        "aluminium hydroxide": "AluminiumHydroxide()",
        "aluminiumhydroxide": "AluminiumHydroxide()",
        "aluminum oxide": "AluminiumOxide()",
        "amorphous silica": "Silica()",
        "barium sulfate": "BariumSulfide()",
        "bismaleimide polymer": "EpoxyResin()",
        "bismethane": "Pentane()",
        "bisphenol f type epoxy resin": "BisphenolAEpoxy()",
        "bisphenol a diglycidyl ether resin": "BisphenolAEpoxy()",
        "boron": "BoronCarbide()",
        "calcium oxide": "HydratedLime()",
        "calcium sulfate": "HydratedLime()",
        "calciumoxide": "HydratedLime()",
        "chromium oxide": "Chromium()",
        "chromiumoxide": "Chromium()",
        "copper": "Copper()",
        "copper phthalocyanine bule": "Copper()",
        "cu": "Copper()",
        "dimethyl siloxane": "SiliconeTetrachloride()",
        "fiber glass": "GlassFibre()",
        "glass fiber": "GlassFibre()",
        "glass fibre": "GlassFibre()",
        "magnesium oxide": "MagnesiumOxide()",
        "magnesiumoxide": "MagnesiumOxide()",
        "methoxymethylethoxy propanol": "MethoxyPropanol()",
        "molybdenum": "Molybdenum()",
        "other aluminium compounds": "AluminiumIngot()",
        "other inoranic filler": "InorganicChemicals()",
        "phenolic polymer resin, epikote 155": "PhenolicResin()",
        "phosphorus": "PhosphorousChloride()",
        "polymerized biphenyl resin": "EpoxyResin()",
        "proprietary material-cured resin": "EpoxyResin()",
        "silicon dioxide": "Silicone()",
        "talc containing no asbestiform fibers": "MagnesiumOxide()",
        "talc4)": "MagnesiumOxide()",
        "thermosetting resin": "EpoxyResin()",
        "titandioxide": "TitaniumDioxide()",
        "titanium oxide": "TitaniumDioxide()",
        "triazine": "Triazine()",
        "tungsten": "Tungsten()",
        "zinc hydroxide": "ZincOxide()",
        "zinc": "ZincOxide()",
        "bismaleimide triazine resin": "Triazine()",
        "silver": "Silver()",
        "tin": "Tin()",
        "1,6-bisnaphthalene": "EpoxyResin()",
        "3-glycidoxypropyltrimethoxysilane": "EpoxyResin()",
        "additives": "PhthalicAnhydride()",
        "amine type hardener": "Piperidine()",
        "bisphenol a liquid epoxy resin": "BisphenolAEpoxy()",
        "bisphenol f liquid epoxy resin": "BisphenolAEpoxy()",
        "carbon black": "CarbonBlack()",
        "silicondioxide": "Silica()",
        "silicone resin": "Silicone()",
        "silicone": "Silicone()",
        "carbon": "CarbonBlack()",
        "chromium": "Chromium()",
        "iron": "PigIron()",
        "manganese": "Manganese()",
        "methyl methacrylate crosspolymer": "MethylMethacrylate()",
        "silicon": "Silicone()",
        "sulfur": "Sulfur()",
        "dimer acid diglycidyl ester": "Adhesive()",
        "epon resin 828": "EpoxyResin()",
        "epoxy resin": "EpoxyResin()",
        "metal hydroxide": "AluminiumHydroxide()",
        "polytetrafluoroethylene": "PolytetraFluoroEthylene()",
        "proprietary material-other epoxy resins": "EpoxyResin()",
        "proprietary material-other phenolic resins": "PhenolicResin()",
        "silica vitreous 60676-86-0": "Silica()",
        "silica, vitreous [ fused silica ]": "Silica()",
        "silicon dioxide cas 7631-86-9": "Silicone()",
        "silicondioxide": "Silicone()",
        "sumitomo-molding-epoxy resin a-ip": "EpoxyResin()",
        "sumitomo-molding-phenol resin-ip": "EpoxyResin()",
        "cobalt": "Cobalt()",
        "dicyandiamide": "Nitrile()",
        "oxirane": "EthyleneOxide()",
        "filler": "CalciumCarbonate()",
        "palladium": "Palladium()",
        "silicone tetrachloride": "SiliconeTetrachloride()",
    }

    result = ""
    for component, elements in element_masses.items():
        result += f"{component.upper()}: list[Exchange] = [\n"
        for element, mass in elements.items():
            terminal_class = DIRECT_MAPPING.get(element.lower())
            if terminal_class:
                mass_str = f"{mass:.15g}"
                result += f"    Exchange({mass_str} * 1e-6, {terminal_class}),\n"
        result += "]\n\n"
    return result


api_text = element_masses_to_api_text(element_masses)
print(api_text)


CARRIER: list[Exchange] = [
    Exchange(138.043545836452 * 1e-6, AluminiumOxide()),
    Exchange(2.05089353480539 * 1e-6, AluminiumHydroxide()),
    Exchange(3.93052247380239 * 1e-6, AluminiumHydroxide()),
    Exchange(885.283822193114 * 1e-6, AluminiumOxide()),
    Exchange(5.63888681699102 * 1e-6, Silica()),
    Exchange(2.75307837761976 * 1e-6, BariumSulfide()),
    Exchange(1.07896695359281 * 1e-6, EpoxyResin()),
    Exchange(3.57086682260479 * 1e-6, Triazine()),
    Exchange(1.31873738772455 * 1e-6, Pentane()),
    Exchange(0.847759749251497 * 1e-6, BisphenolAEpoxy()),
    Exchange(0.00428161489520958 * 1e-6, BoronCarbide()),
    Exchange(5.75877203405688 * 1e-6, HydratedLime()),
    Exchange(0.0385345340568862 * 1e-6, HydratedLime()),
    Exchange(0.458132793787425 * 1e-6, HydratedLime()),
    Exchange(46.0787395022455 * 1e-6, Chromium()),
    Exchange(5.93431824476048 * 1e-6, Chromium()),
    Exchange(35.1649031343563 * 1e-6, Copper()),
    Exchange(0.0128448446856287 * 1e-6, C

In [10]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

def calculate_f_uncertainties(dfs_high_level):
    """
    Calculate the uncertainties for the substrate mass function (f).
    """
    df = pd.concat(dfs_high_level, ignore_index=True)

    # Filter data for LGA and BGA
    lga_data = df[df['Socket'].str.upper() == 'LGA']
    bga_data = df[df['Socket'].str.upper() == 'BGA']

    # Calculate uncertainties for LGA
    if not lga_data.empty:
        # Group by 'Name' and calculate the mean surface area and total mass
        grouped_lga = lga_data.groupby('Name').agg({'Size [mm]': 'mean', 'Mass [mg]': 'sum'}).reset_index()
        X_lga = grouped_lga['Size [mm]'].values.reshape(-1, 1)
        y_lga = grouped_lga['Mass [mg]'].values.reshape(-1, 1)

        model_lga = LinearRegression(fit_intercept=True)
        model_lga.fit(X_lga, y_lga)
        coef_lga = model_lga.coef_[0]
        intercept_lga = model_lga.intercept_

        # Calculate residuals
        residuals_lga = y_lga - model_lga.predict(X_lga)
        residual_variance_lga = np.var(residuals_lga, ddof=2)

        # Covariance matrix for LGA
        X_design_lga = np.column_stack([np.ones_like(X_lga.reshape(-1)), X_lga.reshape(-1)])
        cov_lga = residual_variance_lga * np.linalg.inv(X_design_lga.T @ X_design_lga)
    else:
        coef_lga, intercept_lga, cov_lga = None, None, None

    # Calculate uncertainties for BGA
    if not bga_data.empty:
        # Group by 'Name' and calculate the mean surface area and total mass
        grouped_bga = bga_data.groupby('Name').agg({'Size [mm]': 'mean', 'Mass [mg]': 'sum'}).reset_index()
        X_bga = grouped_bga['Size [mm]'].values.reshape(-1, 1)
        y_bga = grouped_bga['Mass [mg]'].values.reshape(-1, 1)

        model_bga = LinearRegression(fit_intercept=True)
        model_bga.fit(X_bga, y_bga)
        coef_bga = model_bga.coef_[0]
        intercept_bga = model_bga.intercept_

        # Calculate residuals
        residuals_bga = y_bga - model_bga.predict(X_bga)
        residual_variance_bga = np.var(residuals_bga, ddof=2)

        # Covariance matrix for BGA
        X_design_bga = np.column_stack([np.ones_like(X_bga.reshape(-1)), X_bga.reshape(-1)])
        cov_bga = residual_variance_bga * np.linalg.inv(X_design_bga.T @ X_design_bga)
    else:
        coef_bga, intercept_bga, cov_bga = None, None, None

    return {
        'LGA': {'intercept': intercept_lga, 'coef': coef_lga, 'cov': cov_lga},
        'BGA': {'intercept': intercept_bga, 'coef': coef_bga, 'cov': cov_bga}
    }

def calculate_g_uncertainties(dfs_high_level, socket_type="LGA"):
    """
    Calculate the uncertainties for the component masses function (g).
    """
    df = pd.concat(dfs_high_level, ignore_index=True)
    df = df[df['Socket'].str.upper() == socket_type.upper()]

    # Group by 'High Level Material' and sum the 'Mass [mg]'
    grouped = df.groupby('High Level Material', as_index=False)['Mass [mg]'].sum()

    # Calculate proportions for each component
    total_mass = grouped['Mass [mg]'].sum()
    grouped['proportion'] = grouped['Mass [mg]'] / total_mass

    # Calculate covariance matrix for proportions
    proportions = grouped['proportion'].values
    cov_proportions = np.cov(proportions, rowvar=False)

    return cov_proportions

def calculate_h_uncertainties(dfs_high_level, socket_type="LGA"):
    """
    Calculate the uncertainties for the element masses function (h).
    """
    df = pd.concat(dfs_high_level, ignore_index=True)
    df = df[df['Socket'].str.upper() == socket_type.upper()]

    # Group by 'High Level Material' and 'Substances', then sum the 'Mass [mg]'
    grouped = df.groupby(['High Level Material', 'Substances'], as_index=False)['Mass [mg]'].sum()

    # Calculate total mass for each 'High Level Material'
    total_mass_per_component = grouped.groupby('High Level Material')['Mass [mg]'].transform('sum')

    # Calculate element fractions
    grouped['fraction'] = grouped['Mass [mg]'] / total_mass_per_component

    # Calculate covariance matrix for each component's element fractions
    cov_matrices = {}
    for component in grouped['High Level Material'].unique():
        component_data = grouped[grouped['High Level Material'] == component]
        fractions = component_data['fraction'].values
        cov_matrices[component] = np.cov(fractions, rowvar=False)

    return cov_matrices

def modelisation_cpu_with_uncertainties(dfs_high_level, surface_mm2=4256, socket_type="LGA", cpu_weight=2800):
    """
    Main function to calculate alpha values, element fractions, component masses, and element masses,
    along with their uncertainties.
    """
    # Calculate uncertainties for f
    f_uncertainties = calculate_f_uncertainties(dfs_high_level)

    # Calculate uncertainties for g
    g_uncertainties = calculate_g_uncertainties(dfs_high_level, socket_type)

    # Calculate uncertainties for h
    h_uncertainties = calculate_h_uncertainties(dfs_high_level, socket_type)

    # Calculate alpha values
    alpha_values = calculate_alpha_values_with_substrate(dfs_high_level, socket_type, surface_mm2, cpu_weight)

    # Calculate element fractions
    element_fractions = calculate_element_fractions(dfs_high_level, socket_type)

    # Calculate substrate mass
    substrate_mass = f(surface_mm2, socket_type)

    # Calculate component masses
    component_masses = g(substrate_mass, cpu_weight, alpha_values)

    # Calculate element masses
    element_masses = h(component_masses, element_fractions)

    # Convert element masses to API text
    api_text = element_masses_to_api_text(element_masses)

    return api_text, component_masses, element_masses, f_uncertainties, g_uncertainties, h_uncertainties

def calculate_alpha_values_with_substrate(dfs_high_level, socket_type="LGA", surface_mm2=100.0, cpu_weight=200.0):
    """
    Calculate alpha values for high-level components considering the substrate mass.
    """
    df = pd.concat(dfs_high_level, ignore_index=True)
    df = df[df['Socket'].str.upper() == socket_type.upper()]

    if socket_type == "LGA":
        substrate_mass = 57.753 + 0.309 * surface_mm2
    elif socket_type == "BGA":
        substrate_mass = -34.658 + 1.895 * surface_mm2
    else:
        raise ValueError("Socket type must be 'LGA' or 'BGA'")

    remaining_mass = cpu_weight - substrate_mass
    grouped = df.groupby('High Level Material', as_index=False)['Mass [mg]'].sum()
    other_components = grouped[grouped['High Level Material'] != 'carrier']
    total_other_mass = other_components['Mass [mg]'].sum()
    other_components['proportion'] = other_components['Mass [mg]'] / total_other_mass
    other_components['allocated_mass'] = other_components['proportion'] * remaining_mass

    alpha_dict = {'carrier': substrate_mass / cpu_weight}
    for _, row in other_components.iterrows():
        alpha_dict[row['High Level Material']] = row['allocated_mass'] / cpu_weight

    return alpha_dict

def calculate_element_fractions(dfs_high_level, socket_type="LGA"):
    """
    Calculate element fractions for each high-level component.
    """
    df = pd.concat(dfs_high_level, ignore_index=True)
    df = df[df['Socket'].str.upper() == socket_type.upper()]

    grouped = df.groupby(['High Level Material', 'Substances'], as_index=False)['Mass [mg]'].sum()
    total_mass_per_component = grouped.groupby('High Level Material')['Mass [mg]'].transform('sum')
    grouped['fraction'] = grouped['Mass [mg]'] / total_mass_per_component

    element_fractions = {}
    for component in grouped['High Level Material'].unique():
        component_data = grouped[grouped['High Level Material'] == component]
        element_fractions[component] = component_data.set_index('Substances')['fraction'].to_dict()

    return element_fractions

def f(surface_mm2, socket_type):
    """
    Calculate substrate mass based on surface area and socket type.
    """
    if socket_type == "LGA":
        substrate_mass = 57.753 + 0.309 * surface_mm2
    elif socket_type == "BGA":
        substrate_mass = -34.658 + 1.895 * surface_mm2
    else:
        raise ValueError("Socket type must be 'LGA' or 'BGA'")
    return substrate_mass

def g(substrate_mass, cpu_weight, alpha_dict):
    """
    Calculate masses of high-level components.
    """
    component_masses = {}
    for component, alpha in alpha_dict.items():
        component_masses[component] = alpha * cpu_weight
    return component_masses

def h(component_masses, element_fractions):
    """
    Calculate masses of elements within each high-level component.
    """
    element_masses = {}
    for component, mass in component_masses.items():
        if component in element_fractions:
            element_masses[component] = {}
            for element, fraction in element_fractions[component].items():
                element_masses[component][element] = fraction * mass
    return element_masses

def element_masses_to_api_text(element_masses):
    """
    Convert element masses to a formatted text message for API usage.
    """
    DIRECT_MAPPING = {
        "gold": "Gold()",
        "nickel": "Nickel()",
        "alumina": "AluminiumOxide()",
        "aluminium hydroxide": "AluminiumHydroxide()",
        "aluminiumhydroxide": "AluminiumHydroxide()",
        "aluminum oxide": "AluminiumOxide()",
        "amorphous silica": "Silica()",
        "barium sulfate": "BariumSulfide()",
        "bismaleimide polymer": "EpoxyResin()",
        "bismethane": "Pentane()",
        "bisphenol f type epoxy resin": "BisphenolAEpoxy()",
        "bisphenol a diglycidyl ether resin": "BisphenolAEpoxy()",
        "boron": "BoronCarbide()",
        "calcium oxide": "HydratedLime()",
        "calcium sulfate": "HydratedLime()",
        "calciumoxide": "HydratedLime()",
        "chromium oxide": "Chromium()",
        "chromiumoxide": "Chromium()",
        "copper": "Copper()",
        "copper phthalocyanine bule": "Copper()",
        "cu": "Copper()",
        "dimethyl siloxane": "SiliconeTetrachloride()",
        "fiber glass": "GlassFibre()",
        "glass fiber": "GlassFibre()",
        "glass fibre": "GlassFibre()",
        "magnesium oxide": "MagnesiumOxide()",
        "magnesiumoxide": "MagnesiumOxide()",
        "methoxymethylethoxy propanol": "MethoxyPropanol()",
        "molybdenum": "Molybdenum()",
        "other aluminium compounds": "AluminiumIngot()",
        "other inoranic filler": "InorganicChemicals()",
        "phenolic polymer resin, epikote 155": "PhenolicResin()",
        "phosphorus": "PhosphorousChloride()",
        "polymerized biphenyl resin": "EpoxyResin()",
        "proprietary material-cured resin": "EpoxyResin()",
        "silicon dioxide": "Silicone()",
        "talc containing no asbestiform fibers": "MagnesiumOxide()",
        "talc4)": "MagnesiumOxide()",
        "thermosetting resin": "EpoxyResin()",
        "titandioxide": "TitaniumDioxide()",
        "titanium oxide": "TitaniumDioxide()",
        "triazine": "Triazine()",
        "tungsten": "Tungsten()",
        "zinc hydroxide": "ZincOxide()",
        "zinc": "ZincOxide()",
        "bismaleimide triazine resin": "Triazine()",
        "silver": "Silver()",
        "tin": "Tin()",
        "1,6-bisnaphthalene": "EpoxyResin()",
        "3-glycidoxypropyltrimethoxysilane": "EpoxyResin()",
        "additives": "PhthalicAnhydride()",
        "amine type hardener": "Piperidine()",
        "bisphenol a liquid epoxy resin": "BisphenolAEpoxy()",
        "bisphenol f liquid epoxy resin": "BisphenolAEpoxy()",
        "carbon black": "CarbonBlack()",
        "silicondioxide": "Silica()",
        "silicone resin": "Silicone()",
        "silicone": "Silicone()",
        "carbon": "CarbonBlack()",
        "chromium": "Chromium()",
        "iron": "PigIron()",
        "manganese": "Manganese()",
        "methyl methacrylate crosspolymer": "MethylMethacrylate()",
        "silicon": "Silicone()",
        "sulfur": "Sulfur()",
        "dimer acid diglycidyl ester": "Adhesive()",
        "epon resin 828": "EpoxyResin()",
        "epoxy resin": "EpoxyResin()",
        "metal hydroxide": "AluminiumHydroxide()",
        "polytetrafluoroethylene": "PolytetraFluoroEthylene()",
        "proprietary material-other epoxy resins": "EpoxyResin()",
        "proprietary material-other phenolic resins": "PhenolicResin()",
        "silica vitreous 60676-86-0": "Silica()",
        "silica, vitreous [ fused silica ]": "Silica()",
        "silicon dioxide cas 7631-86-9": "Silicone()",
        "silicondioxide": "Silicone()",
        "sumitomo-molding-epoxy resin a-ip": "EpoxyResin()",
        "sumitomo-molding-phenol resin-ip": "EpoxyResin()",
        "cobalt": "Cobalt()",
        "dicyandiamide": "Nitrile()",
        "oxirane": "EthyleneOxide()",
        "filler": "CalciumCarbonate()",
        "palladium": "Palladium()",
        "silicone tetrachloride": "SiliconeTetrachloride()",
    }

    result = ""
    for component, elements in element_masses.items():
        result += f"{component.upper()}: list[Exchange] = [\n"
        for element, mass in elements.items():
            terminal_class = DIRECT_MAPPING.get(element.lower())
            if terminal_class:
                mass_str = f"{mass:.15g}"
                result += f"    Exchange({mass_str} * 1e-6, {terminal_class}),\n"
        result += "]\n\n"
    return result

# Example usage
# Assuming dfs_high_level is a list of DataFrames containing the high-level material data
# api_text, component_masses, element_masses, f_uncertainties, g_uncertainties, h_uncertainties = modelisation_cpu_with_uncertainties(dfs_high_level, 4256, "LGA", 2800)
# print(api_text)
# print("f uncertainties:", f_uncertainties)
# print("g uncertainties:", g_uncertainties)
# print("h uncertainties:", h_uncertainties)


In [11]:
api_text, component_masses, element_masses, f_uncertainties, g_uncertainties, h_uncertainties = modelisation_cpu_with_uncertainties(dfs_high_level, 4256, "LGA", 2800)
print(api_text)
print("f uncertainties:", f_uncertainties)
print("g uncertainties:", g_uncertainties)
print("h uncertainties:", h_uncertainties)

CARRIER: list[Exchange] = [
    Exchange(138.043545836452 * 1e-6, AluminiumOxide()),
    Exchange(2.05089353480539 * 1e-6, AluminiumHydroxide()),
    Exchange(3.93052247380239 * 1e-6, AluminiumHydroxide()),
    Exchange(885.283822193114 * 1e-6, AluminiumOxide()),
    Exchange(5.63888681699102 * 1e-6, Silica()),
    Exchange(2.75307837761976 * 1e-6, BariumSulfide()),
    Exchange(1.07896695359281 * 1e-6, EpoxyResin()),
    Exchange(3.57086682260479 * 1e-6, Triazine()),
    Exchange(1.31873738772455 * 1e-6, Pentane()),
    Exchange(0.847759749251497 * 1e-6, BisphenolAEpoxy()),
    Exchange(0.00428161489520958 * 1e-6, BoronCarbide()),
    Exchange(5.75877203405688 * 1e-6, HydratedLime()),
    Exchange(0.0385345340568862 * 1e-6, HydratedLime()),
    Exchange(0.458132793787425 * 1e-6, HydratedLime()),
    Exchange(46.0787395022455 * 1e-6, Chromium()),
    Exchange(5.93431824476048 * 1e-6, Chromium()),
    Exchange(35.1649031343563 * 1e-6, Copper()),
    Exchange(0.0128448446856287 * 1e-6, C



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

