In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib

# Loading the dataset
data = pd.read_excel('synthetic_rubber_concrete.xlsx')

# Handling negative values in strength columns (assuming they are anomalies)
strength_cols = ['compressive_strength_MPa', 'flexural_strength_MPa', 'tensile_strength_MPa', 'modulus_of_elasticity_GPa']
for col in strength_cols:
    data = data[data[col] >= 0]  # Remove rows with negative values

# Defining input and output features
X = data[['cement_kg_m3', 'fine_aggregate_kg_m3', 'coarse_aggregate_kg_m3', 'rubber_pct', 
          'water_pct', 'w_c_ratio', 'rubber_shape', 'rubber_size', 'curing_days']]
y = data[['compressive_strength_MPa', 'flexural_strength_MPa', 'tensile_strength_MPa', 'modulus_of_elasticity_GPa']]

# Defining categorical and numerical features
categorical_features = ['rubber_shape', 'rubber_size']
numerical_features = ['cement_kg_m3', 'fine_aggregate_kg_m3', 'coarse_aggregate_kg_m3', 
                      'rubber_pct', 'water_pct', 'w_c_ratio', 'curing_days']

# Creating a preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(drop='first', handle_unknown='ignore'), categorical_features),
        ('num', 'passthrough', numerical_features)
    ])

# Creating the model pipeline
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])

# Splitting the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Training the model
model.fit(X_train, y_train)

# Evaluating the model
train_score = model.score(X_train, y_train)
test_score = model.score(X_test, y_test)
print(f"Training R² Score: {train_score:.4f}")
print(f"Testing R² Score: {test_score:.4f}")

# Saving the model and preprocessor
joblib.dump(model, 'concrete_model.pkl')

Training R² Score: 0.8814
Testing R² Score: 0.1364


['concrete_model.pkl']