In [70]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import lightgbm as lgb
from scipy.optimize import differential_evolution
import seaborn as sns

### Data Loading

In [None]:
df = pd.read_csv("../database/concrete_data.csv")
df.head()

Features

- Cement (kg)
- Blast Furnace Slag (kg) 
- Fly Ash (kg)
- Water (kg) 
- Superplasticizer (kg) 
- Coarse Aggregate (kg)
- Fine Aggregate (kg) 
- Age (Day)

Label
- Concrete compressive strength (MPa)

### Summarize Data

In [None]:
print("-----------Checking for Data Types-----------")
print(df.dtypes)
print("-----------Checking for Null Values-----------")
print(df.isnull().sum())
print("-----------Describing Data-----------")
df.describe()

### Checking for Outliers

In [None]:
print(df.columns)

In [None]:
import warnings 
warnings.filterwarnings("ignore")

for i in df.columns:
    fig = plt.figure(figsize=(5, 3))
    # Creating plot
    sns.violinplot(
        data=df[i], 
        inner=None, 
        color="#3258a8", 
        alpha=0.6)
    sns.boxplot(
        data=df[i],
        color="#a3d9d6",
        medianprops={"color": "#d791eb", "linewidth": 2},
        flierprops={"marker": "o", "markersize": 5, "markerfacecolor": "#db5e90"},
        showfliers=True,
    )
    plt.title(f'Boxplot for {i}')
    plt.grid(True)
    plt.show()

### Correlation Study

In [None]:
df_corr = df.corr()
target_corr = df_corr['concrete_compressive_strength']
target_corr = target_corr.sort_values(ascending=False)

plt.figure(figsize=(5, 3))
# Create a heatmap of the correlation matrix
sns.heatmap(df_corr, annot=True, cmap="coolwarm", linewidths=0.5, fmt=".2f")
# Set title
plt.title("Correlation Heatmap of Compressive Strength", fontsize=16)
# Show the plot
plt.show()

plt.figure(figsize=(5, 3))
sns.barplot(target_corr)
plt.title("Correlation Boxplot of Compressive Strength", fontsize=16)
plt.xticks(rotation=90)
plt.show()

#plt.figure(figsize=(5, 3))
#sns.pairplot(df)
#plt.show()


#for i in df.columns:
#    plt.figure(figsize=(4, 2))
#    sns.scatterplot(x=df[i],y=df['concrete_compressive_strength'])
#    plt.title(f"Line Plot of {i} against the Target Variable")
#    plt.xlabel(f"{i}")
#    plt.ylabel("Concrete Strength")
#    plt.show()
