# **Composite Material properties prediction using Artificial Neural Networks(ANN) Model.**
                        *By
                       Baren Kumar Baidya*
Steps:
1. Assign Libraries
2. ,CSV File uploaded
3. Feature and Target Selection
4. Data Preprocessing Pipeline
5. Model Definition and Training
6. Evaluation
7.7. Example Prediction (Demonstrating Surrogate Use)

In [1]:
import pandas as pd
import numpy as np
import re
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import r2_score, mean_absolute_error

In [14]:
url="https://raw.githubusercontent.com/baren-kumar/ML-Material-Science-/refs/heads/main/Material_Composition.csv"
df=pd.read_csv(url)
df.head()

Unnamed: 0,ID,Material Composition,Fiber Type,Matrix Type,Fiber Volume Fraction (%),Matrix Volume Fraction (%),Manufacturing Process,Process Parameters,Tensile Strength (MPa),Flexural Strength (MPa),Thermal Conductivity (W/m·K),Glass Transition Temperature (°C),Impact Resistance (J),Source
0,1,CFRP/Epoxy,Carbon,Epoxy,60,40,Autoclave Curing,"120∘C,0.7MPa",1250,1800,1.8,130,45,Synthetic-1
1,2,GFRP/Polyester,Glass,Polyester,45,55,Resin Infusion (RTM),"80∘C,0.5MPa",550,850,0.45,95,30,Synthetic-2
2,3,AFRP/Epoxy,Aramid,Epoxy,55,45,Prepreg Layup,"135∘C,0.8MPa",820,1100,0.1,140,60,Synthetic-3
3,4,CFRP/PEEK,Carbon,PEEK,65,35,Compression Molding,"380∘C,1.2MPa",1600,2200,2.5,180,35,Synthetic-4
4,5,BasaltF/PP,Basalt,Polypropylene (PP),30,70,Extrusion,"200∘C,0.2MPa",300,450,0.35,165,40,Synthetic-5


In [3]:
df.tail(3)

Unnamed: 0,ID,Material Composition,Fiber Type,Matrix Type,Fiber Volume Fraction (%),Matrix Volume Fraction (%),Manufacturing Process,Process Parameters,Tensile Strength (MPa),Flexural Strength (MPa),Thermal Conductivity (W/m·K),Glass Transition Temperature (°C),Impact Resistance (J),Source
97,98,SiCF/Ceramic,SiC,Ceramic,37,63,Hot Pressing,"1480∘C,19.6MPa",180,280,14.7,1480,13,Synthetic-98
98,99,BoronF/Epoxy,Boron,Epoxy,59,41,Filament Winding,"106∘C,0.56MPa",1360,1960,1.86,121,48,Synthetic-99
99,100,GFRP/Polyester,Glass,Polyester,44,56,Spray Layup,"27∘C,Ambient",390,540,0.34,69,18,Synthetic-100


In [5]:
df.isnull().sum()

Unnamed: 0,0
ID,0
Material Composition,0
Fiber Type,0
Matrix Type,0
Fiber Volume Fraction (%),0
Matrix Volume Fraction (%),0
Manufacturing Process,0
Process Parameters,0
Tensile Strength (MPa),0
Flexural Strength (MPa),0


In [6]:
# --- 2. Feature Engineering: Parsing Process Parameters ---

def parse_process_parameters(param_str):
    """
    Parses the 'Process Parameters' string to extract Curing Temperature and Applied Pressure.
    Example input: "120∘C,0.7MPa"
    """
    temp_match = re.search(r'(\d+)\s*∘C', param_str)
    pressure_match = re.search(r'(\d+\.?\d*)\s*MPa', param_str)

    # Use 25°C (Room Temp) and 0.1MPa (Ambient Pressure) as defaults for 'Ambient' or missing values
    temp = float(temp_match.group(1)) if temp_match else 25.0
    pressure = float(pressure_match.group(1)) if pressure_match else 0.1

    return temp, pressure

# Apply the parsing function
df[['Curing Temperature (°C)', 'Applied Pressure (MPa)']] = df['Process Parameters'].apply(
    lambda x: pd.Series(parse_process_parameters(str(x)))
)

In [7]:
df.head(3)

Unnamed: 0,ID,Material Composition,Fiber Type,Matrix Type,Fiber Volume Fraction (%),Matrix Volume Fraction (%),Manufacturing Process,Process Parameters,Tensile Strength (MPa),Flexural Strength (MPa),Thermal Conductivity (W/m·K),Glass Transition Temperature (°C),Impact Resistance (J),Source,Curing Temperature (°C),Applied Pressure (MPa)
0,1,CFRP/Epoxy,Carbon,Epoxy,60,40,Autoclave Curing,"120∘C,0.7MPa",1250,1800,1.8,130,45,Synthetic-1,120.0,0.7
1,2,GFRP/Polyester,Glass,Polyester,45,55,Resin Infusion (RTM),"80∘C,0.5MPa",550,850,0.45,95,30,Synthetic-2,80.0,0.5
2,3,AFRP/Epoxy,Aramid,Epoxy,55,45,Prepreg Layup,"135∘C,0.8MPa",820,1100,0.1,140,60,Synthetic-3,135.0,0.8


In [8]:
# --- 3. Feature and Target Selection ---

# Inputs (X): Mapping to the Constituent and Processing Features from the .md file
categorical_features = ['Fiber Type', 'Matrix Type', 'Manufacturing Process']
numerical_features = [
    'Fiber Volume Fraction (%)',
    'Curing Temperature (°C)',
    'Applied Pressure (MPa)'
]

X = df[categorical_features + numerical_features]

# Output (Y): Selecting Tensile Strength (MPa) as the prediction target
TARGET_COLUMN = 'Tensile Strength (MPa)'
Y = df[TARGET_COLUMN].values.ravel()

# Check for missing values in features and target
if X.isnull().any().any() or pd.Series(Y).isnull().any():
    print("Warning: Missing values found. Dropping rows with NaN for a clean run.")
    df.dropna(subset=categorical_features + numerical_features + [TARGET_COLUMN], inplace=True)
    X = df[categorical_features + numerical_features]
    Y = df[TARGET_COLUMN].values.ravel()

In [9]:
# --- 4. Data Preprocessing Pipeline ---

# Create transformers for each type of feature
numerical_transformer = Pipeline(steps=[
    ('scaler', StandardScaler()) # Standardize numerical inputs
])

categorical_transformer = Pipeline(steps=[
    # Use handle_unknown='ignore' to safely encode unseen categories in test data
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Create a preprocessor using ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ],
    remainder='drop' # Drop any columns not specified
)

In [10]:
# --- 5. Model Definition and Training ---

# Define the MLP Regressor (A simple Artificial Neural Network)
# Hidden layer size is set to (50, 50, 50) - three layers of 50 neurons each.
# This represents the multiple hidden layers mentioned in the document.
mlp_model = MLPRegressor(
    hidden_layer_sizes=(50, 50, 50),
    max_iter=500,        # Number of epochs
    activation='relu',   # Rectified Linear Unit activation function
    solver='adam',       # Optimization algorithm
    random_state=42,
    early_stopping=True  # Stop training if validation score doesn't improve
)

# Create the full pipeline: Preprocessor -> Model
model_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', mlp_model)
])

# Split data into training and testing sets (80% train, 20% test)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

print("\n--- Training the MLP Surrogate Model ---")
model_pipeline.fit(X_train, Y_train)
print("Training complete.")


--- Training the MLP Surrogate Model ---
Training complete.




In [11]:
# --- 6. Evaluation ---

# Make predictions on the test set
Y_pred = model_pipeline.predict(X_test)

# Calculate performance metrics
r2 = r2_score(Y_test, Y_pred)
mae = mean_absolute_error(Y_test, Y_pred)

print("\n--- Model Performance on Test Data ---")
print(f"Target Property: {TARGET_COLUMN}")
print(f"R-squared (R²): {r2:.4f} (Closer to 1 is better)")
print(f"Mean Absolute Error (MAE): {mae:.2f} MPa (Average prediction error)")


--- Model Performance on Test Data ---
Target Property: Tensile Strength (MPa)
R-squared (R²): 0.9736 (Closer to 1 is better)
Mean Absolute Error (MAE): 53.84 MPa (Average prediction error)


In [13]:
# --- 7. Example Prediction (Demonstrating Surrogate Use) ---

# Define a hypothetical, optimal composite recipe
# (This simulates the 'Surrogate Evaluation' step in the Inverse Design Workflow)
new_composite = pd.DataFrame({
    'Fiber Type': ['Carbon'],
    'Matrix Type': ['Epoxy'],
    'Manufacturing Process': ['Autoclave Curing'],
    'Fiber Volume Fraction (%)': [65], # High volume fraction
    'Curing Temperature (°C)': [130],
    'Applied Pressure (MPa)': [0.8]
})

predicted_strength = model_pipeline.predict(new_composite)[0]

print("\n--- Surrogate Model Prediction ---")
print(f"Recipe: Carbon/Epoxy (Vf=65%), Cured at 130°C/0.8MPa")
print(f"Predicted Tensile Strength: {predicted_strength:.2f} MPa")


--- Surrogate Model Prediction ---
Recipe: Carbon/Epoxy (Vf=65%), Cured at 130°C/0.8MPa
Predicted Tensile Strength: 1235.22 MPa
