In [29]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, r2_score, mean_absolute_error, mean_squared_error
import joblib


In [30]:
# Load the dataset (replace 'Data.csv' with your file path)
file_path = "material.csv"
data = pd.read_csv(file_path)

# Display the first few rows of the dataset
print("Dataset Loaded Successfully!")
data.head()


Dataset Loaded Successfully!


Unnamed: 0,Material,Su,Sy,E,G,mu,Ro,Use
0,ANSI Steel SAE 1015 as-rolled,421,314,207000,79000,0.3,7860,True
1,ANSI Steel SAE 1015 normalized,424,324,207000,79000,0.3,7860,True
2,ANSI Steel SAE 1015 annealed,386,284,207000,79000,0.3,7860,True
3,ANSI Steel SAE 1020 as-rolled,448,331,207000,79000,0.3,7860,True
4,ANSI Steel SAE 1020 normalized,441,346,207000,79000,0.3,7860,True


In [31]:
# Select relevant columns
selected_columns = [ 'Su', 'Sy', 'E', 'G', 'mu', 'Ro', 'Use']
df = data[selected_columns]

# Rename columns for better readability
df.columns = [ 'Ultimate_Tensile_Strength', 'Yield_Strength', 
              'Elastic_Modulus', 'Shear_Modulus', 
              'Poissons_Ratio', 'Density', 'Use']

# Display summary of selected data
print(f"Selected {len(selected_columns)} columns.")
df.head()


Selected 7 columns.


Unnamed: 0,Ultimate_Tensile_Strength,Yield_Strength,Elastic_Modulus,Shear_Modulus,Poissons_Ratio,Density,Use
0,421,314,207000,79000,0.3,7860,True
1,424,324,207000,79000,0.3,7860,True
2,386,284,207000,79000,0.3,7860,True
3,448,331,207000,79000,0.3,7860,True
4,441,346,207000,79000,0.3,7860,True


In [32]:
# Check for missing values
missing_values = df.isnull().sum()
print("Missing Values per Column:")
print(missing_values)

# Drop rows with missing values (or use imputation if necessary)
df_cleaned = df.dropna()

print(f"Rows before cleaning: {len(df)}")
print(f"Rows after cleaning: {len(df_cleaned)}")


Missing Values per Column:
Ultimate_Tensile_Strength    0
Yield_Strength               0
Elastic_Modulus              0
Shear_Modulus                0
Poissons_Ratio               0
Density                      0
Use                          0
dtype: int64
Rows before cleaning: 1552
Rows after cleaning: 1552


In [33]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Encode target variable ('Use') into binary format (Yes=1, No=0)
label_encoder = LabelEncoder()
df_cleaned['Use'] = label_encoder.fit_transform(df_cleaned['Use'])

# Scale numerical features using StandardScaler
scaler = StandardScaler()
scaled_features = scaler.fit_transform(df_cleaned[numerical_features])

# Create final preprocessed DataFrame
df_preprocessed = pd.DataFrame(scaled_features, columns=numerical_features)
df_preprocessed['Use'] = df_cleaned['Use'].values

print("Preprocessing Complete!")
df_preprocessed.head()


Preprocessing Complete!


Unnamed: 0,Ultimate_Tensile_Strength,Yield_Strength,Elastic_Modulus,Shear_Modulus,Poissons_Ratio,Density,Use
0,-0.464461,-0.252291,0.756067,-0.05267,-0.121096,0.439896,1
1,-0.455279,-0.217736,0.756067,-0.05267,-0.121096,0.439896,1
2,-0.571583,-0.355958,0.756067,-0.05267,-0.121096,0.439896,1
3,-0.381824,-0.193547,0.756067,-0.05267,-0.121096,0.439896,1
4,-0.403248,-0.141713,0.756067,-0.05267,-0.121096,0.439896,1


In [34]:
data = df_preprocessed

In [35]:
print("\n--- Task 1: Material Suitability Classification ---")
features_suitability = ['Ultimate_Tensile_Strength', 'Yield_Strength', 'Elastic_Modulus',
                        'Shear_Modulus', 'Poissons_Ratio', 'Density']
target_suitability = 'Use'

X_suitability = data[features_suitability]
y_suitability = data[target_suitability].apply(lambda x: 1 if x else 0)

X_train_suitability, X_test_suitability, y_train_suitability, y_test_suitability = train_test_split(
    X_suitability, y_suitability, test_size=0.3, random_state=42)

# Build a pipeline with StandardScaler and GradientBoostingClassifier
pipeline_suitability = Pipeline([
    ('scaler', StandardScaler()),
    ('model', GradientBoostingClassifier(random_state=42))
])
pipeline_suitability.fit(X_train_suitability, y_train_suitability)

# Evaluate Task 1 model
y_pred_suitability = pipeline_suitability.predict(X_test_suitability)
accuracy = accuracy_score(y_test_suitability, y_pred_suitability)
precision = precision_score(y_test_suitability, y_pred_suitability)
recall = recall_score(y_test_suitability, y_pred_suitability)
f1 = f1_score(y_test_suitability, y_pred_suitability)

print("\nTask 1: Material Suitability Classification Metrics:")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

# Save the scaler and model for Task 1
joblib.dump(pipeline_suitability.named_steps['scaler'], 'suitability_scaler.pkl')
joblib.dump(pipeline_suitability.named_steps['model'], 'suitability_model.pkl')
print("Suitability model and scaler saved.")



--- Task 1: Material Suitability Classification ---

Task 1: Material Suitability Classification Metrics:
Accuracy: 0.9978540772532188
Precision: 1.0
Recall: 0.9666666666666667
F1 Score: 0.9830508474576272
Suitability model and scaler saved.


In [36]:
print("\n--- Task 2: Ultimate Tensile Strength Prediction ---")
features_strength = ['Yield_Strength', 'Elastic_Modulus', 'Shear_Modulus', 'Poissons_Ratio', 'Density']
target_strength = 'Ultimate_Tensile_Strength'

X_strength = data[features_strength]
y_strength = data[target_strength]

X_train_strength, X_test_strength, y_train_strength, y_test_strength = train_test_split(
    X_strength, y_strength, test_size=0.3, random_state=42)

# Build a pipeline with standard scaler and GradientBoostingRegressor
pipeline_strength = Pipeline([
    ('scaler', StandardScaler()),
    ('model', GradientBoostingRegressor(random_state=42))
])
pipeline_strength.fit(X_train_strength, y_train_strength)

# Evaluate Task 2 model
y_pred_strength = pipeline_strength.predict(X_test_strength)
r2_strength = r2_score(y_test_strength, y_pred_strength)
mae_strength = mean_absolute_error(y_test_strength, y_pred_strength)
rmse_strength = np.sqrt(mean_squared_error(y_test_strength, y_pred_strength))

print("\nTask 2: Ultimate Tensile Strength Prediction Metrics:")
print("R² Score:", r2_strength)
print("MAE:", mae_strength)
print("RMSE:", rmse_strength)

# Save the scaler and model for Task 2
joblib.dump(pipeline_strength.named_steps['scaler'], 'tensile_strength_scaler.pkl')
joblib.dump(pipeline_strength.named_steps['model'], 'tensile_strength_model.pkl')
print("Tensile strength model and scaler saved.")



--- Task 2: Ultimate Tensile Strength Prediction ---

Task 2: Ultimate Tensile Strength Prediction Metrics:
R² Score: 0.9655705865476607
MAE: 0.14076473198889536
RMSE: 0.18806241622503772
Tensile strength model and scaler saved.


In [37]:
print("\n--- Task 3: Shear Modulus Prediction ---")
features_modulus = ['Ultimate_Tensile_Strength', 'Yield_Strength', 'Elastic_Modulus', 'Poissons_Ratio', 'Density']
target_modulus = 'Shear_Modulus'

X_modulus = data[features_modulus]
y_modulus = data[target_modulus]

X_train_modulus, X_test_modulus, y_train_modulus, y_test_modulus = train_test_split(
    X_modulus, y_modulus, test_size=0.3, random_state=42)

# Build a pipeline with standard scaler and GradientBoostingRegressor
pipeline_modulus = Pipeline([
    ('scaler', StandardScaler()),
    ('model', GradientBoostingRegressor(random_state=42))
])
pipeline_modulus.fit(X_train_modulus, y_train_modulus)

# Evaluate Task 3 model
y_pred_modulus = pipeline_modulus.predict(X_test_modulus)
r2_modulus = r2_score(y_test_modulus, y_pred_modulus)
mae_modulus = mean_absolute_error(y_test_modulus, y_pred_modulus)
rmse_modulus = np.sqrt(mean_squared_error(y_test_modulus, y_pred_modulus))

print("\nTask 3: Shear Modulus Prediction Metrics:")
print("R² Score:", r2_modulus)
print("MAE:", mae_modulus)
print("RMSE:", rmse_modulus)

# Save the scaler and model for Task 3
joblib.dump(pipeline_modulus.named_steps['scaler'], 'shear_modulus_scaler.pkl')
joblib.dump(pipeline_modulus.named_steps['model'], 'shear_modulus_model.pkl')
print("Shear modulus model and scaler saved.")



--- Task 3: Shear Modulus Prediction ---

Task 3: Shear Modulus Prediction Metrics:
R² Score: 0.804478904510688
MAE: 0.024169209479351295
RMSE: 0.3618026977702215
Shear modulus model and scaler saved.


In [38]:
print("\n--- Task 4: Density Prediction ---")
features_density = ['Ultimate_Tensile_Strength', 'Yield_Strength', 'Elastic_Modulus', 'Shear_Modulus', 'Poissons_Ratio']
target_density = 'Density'

X_density = data[features_density]
y_density = data[target_density]

X_train_density, X_test_density, y_train_density, y_test_density = train_test_split(
    X_density, y_density, test_size=0.3, random_state=42)

# Build a pipeline with standard scaler and GradientBoostingRegressor
pipeline_density = Pipeline([
    ('scaler', StandardScaler()),
    ('model', GradientBoostingRegressor(random_state=42))
])
pipeline_density.fit(X_train_density, y_train_density)

# Evaluate Task 4 model
y_pred_density = pipeline_density.predict(X_test_density)
r2_density = r2_score(y_test_density, y_pred_density)
mae_density = mean_absolute_error(y_test_density, y_pred_density)
rmse_density = np.sqrt(mean_squared_error(y_test_density, y_pred_density))

print("\nTask 4: Density Prediction Metrics:")
print("R² Score:", r2_density)
print("MAE:", mae_density)
print("RMSE:", rmse_density)

# Save the scaler and model for Task 4
joblib.dump(pipeline_density.named_steps['scaler'], 'density_scaler.pkl')
joblib.dump(pipeline_density.named_steps['model'], 'density_model.pkl')
print("Density model and scaler saved.")



--- Task 4: Density Prediction ---

Task 4: Density Prediction Metrics:
R² Score: 0.9944078335616982
MAE: 0.029048171734952517
RMSE: 0.07848856413284191
Density model and scaler saved.
