# Concrete Compressive Strength - Regression & Classification

<img src="https://ascentfuturetech.com/wp-content/uploads/2016/05/ascent-futuretech-pages-banner.png" alt="Concrete Compressive Strength - Regand Cls" />

In [4]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, classification_report, accuracy_score
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.svm import SVR, SVC
from sklearn.neighbors import KNeighborsRegressor, KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.pipeline import make_pipeline
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical

In [5]:
# Load data
df = pd.read_excel('Concrete_Data.xls')

# Rename columns
df.columns = ['Cement', 'Slag', 'FlyAsh', 'Water', 'Plasticizer', 'CoarseAgg', 'FineAgg', 'Age', 'Strength']

In [6]:
#prepare x and y
X = df[['Cement', 'Slag', 'FlyAsh', 'Water', 'Plasticizer', 'CoarseAgg', 'FineAgg', 'Age']]

y = df['Strength']

In [7]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1030 entries, 0 to 1029
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Cement       1030 non-null   float64
 1   Slag         1030 non-null   float64
 2   FlyAsh       1030 non-null   float64
 3   Water        1030 non-null   float64
 4   Plasticizer  1030 non-null   float64
 5   CoarseAgg    1030 non-null   float64
 6   FineAgg      1030 non-null   float64
 7   Age          1030 non-null   int64  
dtypes: float64(7), int64(1)
memory usage: 64.5 KB


# Problem 1 - Regression

In [9]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [10]:
# Define models
models = {
    'Linear Regression': LinearRegression(),
    'Decision Tree': DecisionTreeRegressor(),
    'Random Forest': RandomForestRegressor(),
    'SVR': SVR(),
    'KNN': KNeighborsRegressor(),
    'Polynomial Regression': make_pipeline(PolynomialFeatures(degree=2), LinearRegression()),
}

In [11]:
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)

## Deep Learning Model

In [13]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Define deep learning model
model_dl = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1)
])

model_dl.compile(optimizer='adam', loss='mean_squared_error')

# Train deep learning model
model_dl.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

# Evaluate deep learning model
y_pred_dl = model_dl.predict(X_test).flatten()
mse_dl = mean_squared_error(y_test, y_pred_dl)
print(f'Deep Learning Model Mean Squared Error: {mse_dl:.2f}')


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 18ms/step - loss: 1709.9232 - val_loss: 344.9902
Epoch 2/50
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 322.8960 - val_loss: 152.8686
Epoch 3/50
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 160.1140 - val_loss: 118.0806
Epoch 4/50
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 120.5515 - val_loss: 110.1887
Epoch 5/50
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 114.9667 - val_loss: 98.3359
Epoch 6/50
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 97.1287 - val_loss: 93.5356
Epoch 7/50
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 86.6749 - val_loss: 90.0204
Epoch 8/50
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 86.2668 - val_loss: 87.2229
Epoch 9/50
[1m21/21[0m [32

# Proble 2-Classification

In [15]:
def green_cls(s): 
    if (s.Slag + s.FlyAsh < 150.0) and (s.Plasticizer < 10.0):
        return "n/a"
    else:
        return "green"

def strength_cls(x):
    if x < 17.0:
        return "non-structural"
    elif x < 28.0:
        return "residential"
    elif x < 70.0:
        return "commercial"
    else:
        return "high-strength"

df["Green"] = df.apply(green_cls, axis=1)
df["ConcreteClass"] = df.Strength.apply(strength_cls)
print(df)
df.Plasticizer = df.Plasticizer.apply(lambda x: "yes" if x > 0 else "no")
df.drop("Strength", axis=1, inplace=True)


# Convert categorical features to numeric
df = pd.get_dummies(df, columns=['Plasticizer', 'Green'], drop_first=True)

# Features and target
X_class = df.drop('ConcreteClass', axis=1)
y_class = df['ConcreteClass']

# Split data
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)


      Cement   Slag  FlyAsh  Water  Plasticizer  CoarseAgg  FineAgg  Age  \
0      540.0    0.0     0.0  162.0          2.5     1040.0    676.0   28   
1      540.0    0.0     0.0  162.0          2.5     1055.0    676.0   28   
2      332.5  142.5     0.0  228.0          0.0      932.0    594.0  270   
3      332.5  142.5     0.0  228.0          0.0      932.0    594.0  365   
4      198.6  132.4     0.0  192.0          0.0      978.4    825.5  360   
...      ...    ...     ...    ...          ...        ...      ...  ...   
1025   276.4  116.0    90.3  179.6          8.9      870.1    768.3   28   
1026   322.2    0.0   115.6  196.0         10.4      817.9    813.4   28   
1027   148.5  139.4   108.6  192.7          6.1      892.4    780.0   28   
1028   159.1  186.7     0.0  175.6         11.3      989.6    788.9   28   
1029   260.9  100.5    78.3  200.6          8.6      864.5    761.5   28   

       Strength  Green  ConcreteClass  
0     79.986111    n/a  high-strength  
1     6

In [16]:
#Train and Evaluate Classification Models
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score

# Define models
models_class = {
    'Logistic Regression': LogisticRegression(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'SVM': SVC(),
    'KNN': KNeighborsClassifier(),
    'Naive Bayes': GaussianNB()
}

# Train and evaluate models
for name, model in models_class.items():
    model.fit(X_train_class, y_train_class)
    y_pred_class = model.predict(X_test_class)
    accuracy = accuracy_score(y_test_class, y_pred_class)
    print(f'{name} Accuracy: {accuracy:.2f}')
    print(classification_report(y_test_class, y_pred_class))


Logistic Regression Accuracy: 0.80
                precision    recall  f1-score   support

    commercial       0.88      0.90      0.89       133
 high-strength       1.00      0.33      0.50         6
non-structural       0.74      0.81      0.78        32
   residential       0.53      0.49      0.51        35

      accuracy                           0.80       206
     macro avg       0.79      0.63      0.67       206
  weighted avg       0.80      0.80      0.80       206

Decision Tree Accuracy: 0.84
                precision    recall  f1-score   support

    commercial       0.92      0.90      0.91       133
 high-strength       1.00      0.50      0.67         6
non-structural       0.92      0.72      0.81        32
   residential       0.58      0.80      0.67        35

      accuracy                           0.84       206
     macro avg       0.86      0.73      0.77       206
  weighted avg       0.87      0.84      0.85       206



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Random Forest Accuracy: 0.89
                precision    recall  f1-score   support

    commercial       0.93      0.95      0.94       133
 high-strength       1.00      0.33      0.50         6
non-structural       0.93      0.84      0.89        32
   residential       0.71      0.77      0.74        35

      accuracy                           0.89       206
     macro avg       0.89      0.73      0.77       206
  weighted avg       0.89      0.89      0.89       206

SVM Accuracy: 0.65
                precision    recall  f1-score   support

    commercial       0.65      1.00      0.78       133
 high-strength       0.00      0.00      0.00         6
non-structural       0.00      0.00      0.00        32
   residential       0.00      0.00      0.00        35

      accuracy                           0.65       206
     macro avg       0.16      0.25      0.20       206
  weighted avg       0.42      0.65      0.51       206

KNN Accuracy: 0.73
                precision    re

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# Değerlendirme (Estimate) 
## Görülen sonuçlara göre Ramdom Forest en iyi genel performansı gösteriyor, yüksek doğruluk, precision ve recall değerleri ile en dengeli sonuçları sağlıyor.
## According to the results, Random Forest shows the best overall performance, providing the most balanced results with high accuracy, precision and recall values.