In [47]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import seaborn as sns
import pickle
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, r2_score, mean_absolute_error, mean_squared_error
import joblib


In [None]:
# Load the dataset (replace 'Data.csv' with your file path)
file_path = "material.csv"
data = pd.read_csv(file_path)

# Display the first few rows of the dataset
print("Dataset Loaded Successfully!")
data.head()

# Select relevant columns
selected_columns = [ 'Su', 'Sy', 'E', 'G', 'mu', 'Ro', 'Use']
df = data[selected_columns]

# Rename columns for better readability
df.columns = [ 'Ultimate_Tensile_Strength', 'Yield_Strength', 
              'Elastic_Modulus', 'Shear_Modulus', 
              'Poissons_Ratio', 'Density', 'Use']

# Display summary of selected data
print(f"Selected {len(selected_columns)} columns.")
df.head()

# Check for missing values
missing_values = df.isnull().sum()
print("Missing Values per Column:")
print(missing_values)

# Drop rows with missing values (or use imputation if necessary)
df_cleaned = df.dropna()

print(f"Rows before cleaning: {len(df)}")
print(f"Rows after cleaning: {len(df_cleaned)}")

from sklearn.preprocessing import LabelEncoder, StandardScaler

# Encode target variable ('Use') into binary format (Yes=1, No=0)
label_encoder = LabelEncoder()
df_cleaned['Use'] = label_encoder.fit_transform(df_cleaned['Use'])

df_cleaned

data = df_cleaned

In [46]:

### Task 1: Material Suitability Classification ###
print("\n--- Task 1: Material Suitability Classification ---")
features_suitability = ['Ultimate_Tensile_Strength', 'Yield_Strength', 'Elastic_Modulus',
                        'Shear_Modulus', 'Poissons_Ratio', 'Density']
target_suitability = 'Use'

X_suitability = data[features_suitability]
y_suitability = data[target_suitability].apply(lambda x: 1 if x else 0)

# Split into train and test sets
X_train_suitability, X_test_suitability, y_train_suitability, y_test_suitability = train_test_split(
    X_suitability, y_suitability, test_size=0.3, random_state=42)

# Initialize and train Gradient Boosting Classifier
best_classification_model = GradientBoostingClassifier(random_state=42)
best_classification_model.fit(X_train_suitability, y_train_suitability)

# Make predictions
y_pred = best_classification_model.predict(X_test_suitability)

# Calculate evaluation metrics
accuracy = accuracy_score(y_test_suitability, y_pred)
precision = precision_score(y_test_suitability, y_pred)
recall = recall_score(y_test_suitability, y_pred)
f1 = f1_score(y_test_suitability, y_pred)

# Print metrics
print("Gradient Boosting Classifier Metrics:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

# Save the trained model
with open("suitability_model.pkl", "wb") as model_file:
    pickle.dump(best_classification_model, model_file)

print("Model saved as suitability_model.pkl")



--- Task 1: Material Suitability Classification ---
Gradient Boosting Classifier Metrics:
Accuracy: 0.9979
Precision: 1.0000
Recall: 0.9667
F1-Score: 0.9831
Model saved as suitability_model.pkl


In [48]:
### Task 2: Ultimate Tensile Strength Prediction ###
print("\n--- Task 2: Ultimate Tensile Strength Prediction ---")
features_strength = ['Yield_Strength', 'Elastic_Modulus', 'Shear_Modulus',
                     'Poissons_Ratio', 'Density']
target_strength = 'Ultimate_Tensile_Strength'

X_strength = data[features_strength]
y_strength = data[target_strength]

X_train_strength, X_test_strength, y_train_strength, y_test_strength = train_test_split(
    X_strength, y_strength, test_size=0.3, random_state=42)

# Train Gradient Boosting Regressor model
best_regression_model = GradientBoostingRegressor(random_state=42)
best_regression_model.fit(X_train_strength, y_train_strength)

# Evaluate the model
y_pred = best_regression_model.predict(X_test_strength)
r2 = r2_score(y_test_strength, y_pred)
mae = mean_absolute_error(y_test_strength, y_pred)
rmse = np.sqrt(mean_squared_error(y_test_strength, y_pred))

# Print metrics
print("Regression Metrics for Ultimate Tensile Strength:")
print(f"Gradient Boosting Regressor: R² Score = {r2}, MAE = {mae}, RMSE = {rmse}")

# Save the trained model
joblib.dump(best_regression_model, "tensile_strength_model.pkl")



--- Task 2: Ultimate Tensile Strength Prediction ---
Regression Metrics for Ultimate Tensile Strength:
Gradient Boosting Regressor: R² Score = 0.9655574959253143, MAE = 46.01712944199703, RMSE = 61.45724103154477


['tensile_strength_model.pkl']

In [50]:
### Task 3: Shear Modulus Prediction ###
print("\n--- Task 3: Shear Modulus Prediction ---")
# Feature selection
features_modulus = ['Ultimate_Tensile_Strength', 'Yield_Strength', 'Elastic_Modulus',
                    'Poissons_Ratio', 'Density']
target_modulus = 'Shear_Modulus'

X_modulus = data[features_modulus]
y_modulus = data[target_modulus]

# Split the data
X_train_modulus, X_test_modulus, y_train_modulus, y_test_modulus = train_test_split(
    X_modulus, y_modulus, test_size=0.3, random_state=42
)

# Train Gradient Boosting Regressor
gbr_modulus = GradientBoostingRegressor(random_state=42)
gbr_modulus.fit(X_train_modulus, y_train_modulus)

# Predictions
y_pred_modulus = gbr_modulus.predict(X_test_modulus)

# Metrics
r2 = r2_score(y_test_modulus, y_pred_modulus)
mae = mean_absolute_error(y_test_modulus, y_pred_modulus)
rmse = np.sqrt(mean_squared_error(y_test_modulus, y_pred_modulus))

print(f"R² Score: {r2:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")

# Save the model
with open("shear_modulus_model.pkl", "wb") as model_file:
    pickle.dump(gbr_modulus, model_file)

print("Model saved as 'shear_modulus_model.pkl'.")



--- Task 3: Shear Modulus Prediction ---
R² Score: 0.8045
Mean Absolute Error (MAE): 3028.4583
Root Mean Squared Error (RMSE): 45328.9870
Model saved as 'shear_modulus_model.pkl'.


In [51]:
### Task 4: Density Prediction ###
print("\n--- Task 4: Density Prediction ---")
# Feature selection
features_density = ['Ultimate_Tensile_Strength', 'Yield_Strength', 'Elastic_Modulus',
                    'Shear_Modulus', 'Poissons_Ratio']
target_density = 'Density'

X_density = data[features_density]
y_density = data[target_density]

# Split the data
X_train_density, X_test_density, y_train_density, y_test_density = train_test_split(
    X_density, y_density, test_size=0.3, random_state=42
)

# Train Gradient Boosting Regressor
gbr_density = GradientBoostingRegressor(random_state=42)
gbr_density.fit(X_train_density, y_train_density)

# Predictions
y_pred_density = gbr_density.predict(X_test_density)

# Metrics
r2 = r2_score(y_test_density, y_pred_density)
mae = mean_absolute_error(y_test_density, y_pred_density)
rmse = np.sqrt(mean_squared_error(y_test_density, y_pred_density))


print(f"R² Score: {r2:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")

# Save the model
with open("density_model.pkl", "wb") as model_file:
    pickle.dump(gbr_density, model_file)

print("Model saved as 'density_model.pkl'.")



--- Task 4: Density Prediction ---
R² Score: 0.9944
Mean Absolute Error (MAE): 61.4221
Root Mean Squared Error (RMSE): 165.4364
Model saved as 'density_model.pkl'.
