In [4]:
#import data manipulation libraries
import pandas as pd
import numpy as np

#import data visualization libraries
import matplotlib.pyplot as plt
import seaborn as sns

#import filter warning libraries
import warnings
warnings.filterwarnings('ignore')

#Import loggings
import logging
logging.basicConfig(level = logging.INFO,
                    format = '%(asctime)s - %(levelname)s - %(message)s',
                    filemode = 'w',
                    filename = 'model.log', force = True)

In [5]:
logging.info('Import Dataset...')

In [6]:
url = 'https://raw.githubusercontent.com/anirudhakolay/CementPrediction_Model/refs/heads/main/Concrete_Data%20(1).csv'
df = pd.read_csv(url)
df.sample(frac=1)

Unnamed: 0,Cement (component 1)(kg in a m^3 mixture),Blast Furnace Slag (component 2)(kg in a m^3 mixture),Fly Ash (component 3)(kg in a m^3 mixture),Water (component 4)(kg in a m^3 mixture),Superplasticizer (component 5)(kg in a m^3 mixture),Coarse Aggregate (component 6)(kg in a m^3 mixture),Fine Aggregate (component 7)(kg in a m^3 mixture),Age (day),"Concrete compressive strength(MPa, megapascals)"
155,362.6,189.0,0.0,164.9,11.6,944.7,755.8,56,77.30
501,491.0,26.0,123.0,210.0,3.9,882.0,699.0,3,25.61
347,213.7,0.0,174.7,154.8,10.2,1053.5,776.4,56,46.64
299,290.4,0.0,96.2,168.1,9.4,961.2,865.0,3,22.50
403,356.0,119.0,0.0,160.0,9.0,1061.0,657.0,28,59.00
...,...,...,...,...,...,...,...,...,...
411,173.5,50.1,173.5,164.8,6.5,1006.2,793.5,3,23.08
979,289.0,133.7,0.0,194.9,5.5,924.1,760.1,28,46.25
728,331.0,0.0,0.0,192.0,0.0,1025.0,821.0,28,31.74
816,525.0,0.0,0.0,189.0,0.0,1125.0,613.0,7,42.42


In [7]:
#OLS Regression(Ordinary Least Square)
!pip install statsmodels


Defaulting to user installation because normal site-packages is not writeable


In [12]:
import statsmodels.api as sm
X = df.drop(columns ='Concrete compressive strength(MPa, megapascals) ', axis = 1)
y = df['Concrete compressive strength(MPa, megapascals) ']

In [13]:
X = sm.add_constant(X)

In [14]:
model = sm.OLS(y,X).fit()
model.summary()

0,1,2,3
Dep. Variable:,"Concrete compressive strength(MPa, megapascals)",R-squared:,0.616
Model:,OLS,Adj. R-squared:,0.613
Method:,Least Squares,F-statistic:,204.3
Date:,"Wed, 28 May 2025",Prob (F-statistic):,6.29e-206
Time:,23:40:03,Log-Likelihood:,-3869.0
No. Observations:,1030,AIC:,7756.0
Df Residuals:,1021,BIC:,7800.0
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-23.3312,26.586,-0.878,0.380,-75.500,28.837
Cement (component 1)(kg in a m^3 mixture),0.1198,0.008,14.113,0.000,0.103,0.136
Blast Furnace Slag (component 2)(kg in a m^3 mixture),0.1039,0.010,10.247,0.000,0.084,0.124
Fly Ash (component 3)(kg in a m^3 mixture),0.0879,0.013,6.988,0.000,0.063,0.113
Water (component 4)(kg in a m^3 mixture),-0.1499,0.040,-3.731,0.000,-0.229,-0.071
Superplasticizer (component 5)(kg in a m^3 mixture),0.2922,0.093,3.128,0.002,0.109,0.476
Coarse Aggregate (component 6)(kg in a m^3 mixture),0.0181,0.009,1.926,0.054,-0.000,0.037
Fine Aggregate (component 7)(kg in a m^3 mixture),0.0202,0.011,1.887,0.059,-0.001,0.041
Age (day),0.1142,0.005,21.046,0.000,0.104,0.125

0,1,2,3
Omnibus:,5.378,Durbin-Watson:,1.282
Prob(Omnibus):,0.068,Jarque-Bera (JB):,5.304
Skew:,-0.174,Prob(JB):,0.0705
Kurtosis:,3.045,Cond. No.,106000.0


In [15]:
#Checking Dataset info
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1030 entries, 0 to 1029
Data columns (total 9 columns):
 #   Column                                                 Non-Null Count  Dtype  
---  ------                                                 --------------  -----  
 0   Cement (component 1)(kg in a m^3 mixture)              1030 non-null   float64
 1   Blast Furnace Slag (component 2)(kg in a m^3 mixture)  1030 non-null   float64
 2   Fly Ash (component 3)(kg in a m^3 mixture)             1030 non-null   float64
 3   Water  (component 4)(kg in a m^3 mixture)              1030 non-null   float64
 4   Superplasticizer (component 5)(kg in a m^3 mixture)    1030 non-null   float64
 5   Coarse Aggregate  (component 6)(kg in a m^3 mixture)   1030 non-null   float64
 6   Fine Aggregate (component 7)(kg in a m^3 mixture)      1030 non-null   float64
 7   Age (day)                                              1030 non-null   int64  
 8   Concrete compressive strength(MPa, megapascals)  

In [16]:
#Checking Descriptive Stats: EDA
#Univariate Analysis
from collections import OrderedDict

stats = []
for i in df.columns:
  numerical_stats = OrderedDict({
      'Feature':i,
      'Mean':df[i].mean(),
      'Median':df[i].median(),
      'Q1':df[i].quantile(0.25),
      'Q3':df[i].quantile(0.75),
      'IQR':df[i].quantile(0.75)-df[i].quantile(0.25),
      'Standard Deviation':df[i].std(),
      'Variance':df[i].var(),
      'Skewness':df[i].skew(),
      'Kurtosis':df[i].kurt()
  })
  stats.append(numerical_stats)
  report = pd.DataFrame(stats)

report 

Unnamed: 0,Feature,Mean,Median,Q1,Q3,IQR,Standard Deviation,Variance,Skewness,Kurtosis
0,Cement (component 1)(kg in a m^3 mixture),281.167864,272.9,192.375,350.0,157.625,104.506364,10921.58022,0.509481,-0.520652
1,Blast Furnace Slag (component 2)(kg in a m^3 m...,73.895825,22.0,0.0,142.95,142.95,86.279342,7444.124812,0.800717,-0.508175
2,Fly Ash (component 3)(kg in a m^3 mixture),54.18835,0.0,0.0,118.3,118.3,63.997004,4095.616541,0.537354,-1.328746
3,Water (component 4)(kg in a m^3 mixture),181.567282,185.0,164.9,192.0,27.1,21.354219,456.002651,0.074628,0.122082
4,Superplasticizer (component 5)(kg in a m^3 mix...,6.20466,6.4,0.0,10.2,10.2,5.973841,35.686781,0.907203,1.411269
5,Coarse Aggregate (component 6)(kg in a m^3 mi...,972.918932,968.0,932.0,1029.4,97.4,77.753954,6045.677357,-0.04022,-0.599016
6,Fine Aggregate (component 7)(kg in a m^3 mixture),773.580485,779.5,730.95,824.0,93.05,80.17598,6428.187792,-0.25301,-0.102177
7,Age (day),45.662136,28.0,7.0,56.0,49.0,63.169912,3990.437729,3.269177,12.168989
8,"Concrete compressive strength(MPa, megapascals)",35.817961,34.445,23.71,46.135,22.425,16.705742,279.081814,0.416977,-0.313725


In [17]:
# Postive or -ve skewness never impute with 'Mean', impute with 'Median'