In [1]:
import os
import numpy as np
import pandas as pd

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam

from imblearn.over_sampling import SMOTE  # <-- NEW

In [2]:
# Path to your CSV containing labels
label_csv_path = "/Users/berkeayyildizli/ENS_Graduation_Project/labeled_thermal_features.csv"

# Read the CSV
label_df = pd.read_csv(label_csv_path)

# Just to confirm
print(label_df.head())


            Frame  Min_Temp  Max_Temp  Mean_Temp   Std_Temp  Median_Temp  \
0     Frame_1.csv    1351.0    1875.0  1517.2217   69.05588       1510.0   
1    Frame_10.csv    1201.0    1719.0  1336.9498   90.49550       1301.0   
2   Frame_100.csv    1096.0    1690.0  1274.6260  101.33485       1238.0   
3  Frame_1000.csv    1034.0    1631.0  1185.3427   90.55383       1151.0   
4  Frame_1001.csv    1226.0    2107.0  1360.5409  116.82404       1330.0   

       Q1      Q3    IQR  Skewness   Kurtosis  High_Temp_Pixels  \
0  1475.0  1526.0   51.0  2.292498   6.778652              2950   
1  1285.0  1330.0   45.0  2.086964   4.205517              3732   
2  1213.0  1315.0  102.0  1.584581   2.368822              3642   
3  1139.0  1217.0   78.0  1.923829   4.081294              3339   
4  1313.0  1360.0   47.0  3.668308  14.856552              2495   

   Porosity Label  
0               1  
1               1  
2               0  
3               0  
4               1  


In [3]:
image_folder = "/Users/berkeayyildizli/ENS_Graduation_Project/data/images"  # Folder with Frame_*.csv

all_data = []
all_labels = []

for index, row in label_df.iterrows():
    frame_filename = row["Frame"]            # e.g. "Frame_1.csv"
    porosity_label = row["Porosity Label"]   # e.g. 0 or 1
    
    # Full path to that frame’s CSV
    csv_path = os.path.join(image_folder, frame_filename)
    
    # Read the 200×200 CSV as a NumPy array. Make sure header=None if each CSV has no header row:
    frame_data = pd.read_csv(csv_path, header=None).values  # shape (200, 200)
    
    all_data.append(frame_data)
    all_labels.append(porosity_label)

# Convert to NumPy arrays
all_data = np.array(all_data, dtype=np.float32)   # shape will be (N, 200, 200)
all_labels = np.array(all_labels, dtype=np.int32) # shape will be (N,)
print("all_data shape:", all_data.shape)
print("all_labels shape:", all_labels.shape)


all_data shape: (1564, 200, 201)
all_labels shape: (1564,)


In [4]:
all_data[all_data == 0] = np.nan
N, H, W = all_data.shape
all_data_flat = all_data.reshape((N, -1))  # (N, 40000)

imputer = SimpleImputer(strategy="mean")
all_data_flat_imputed = imputer.fit_transform(all_data_flat)
scaler = MinMaxScaler()
all_data_flat_scaled = scaler.fit_transform(all_data_flat_imputed)
all_data_processed = all_data_flat_scaled.reshape((N, H, W, 1))
print("all_data_processed shape:", all_data_processed.shape)
# For example: (1564, 200, 200, 1)


all_data_processed shape: (1564, 200, 201, 1)


In [5]:
X_train, X_test, y_train, y_test = train_test_split(
    all_data_processed,
    all_labels,
    test_size=0.2,
    random_state=42,
    stratify=all_labels
)

print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape: ", X_test.shape)
print("y_test shape: ", y_test.shape)


X_train shape: (1251, 200, 201, 1)
y_train shape: (1251,)
X_test shape:  (313, 200, 201, 1)
y_test shape:  (313,)


In [6]:
# ---------------------------
# SMOTE OVERSAMPLING SECTION
# ---------------------------
# <-- NEW

# Flatten X_train for SMOTE
Ntrain, H, W, C = X_train.shape
X_train_flat = X_train.reshape((Ntrain, H * W * C))

smote = SMOTE(random_state=42)  # You can adjust sampling_strategy if needed
X_train_sm, y_train_sm = smote.fit_resample(X_train_flat, y_train)

# Reshape oversampled training data back to 4D
Ntrain_sm = X_train_sm.shape[0]
X_train_sm = X_train_sm.reshape((Ntrain_sm, H, W, C))

print("After SMOTE:")
print("X_train_sm shape:", X_train_sm.shape)
print("y_train_sm shape:", y_train_sm.shape)
# <-- NEW

After SMOTE:
X_train_sm shape: (2388, 200, 201, 1)
y_train_sm shape: (2388,)


In [7]:
model = Sequential([
    Conv2D(filters=16, kernel_size=(3,3), activation='relu', input_shape=(H, W, 1)),
    MaxPooling2D(pool_size=(2,2)),
    
    Conv2D(filters=32, kernel_size=(3,3), activation='relu'),
    MaxPooling2D(pool_size=(2,2)),
    
    Flatten(),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification
])

model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [8]:
epochs = 10
batch_size = 16

history = model.fit(
    X_train,
    y_train,
    validation_split=0.2,   # or use a separate validation set
    epochs=epochs,
    batch_size=batch_size,
    verbose=1
)


Epoch 1/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 81ms/step - accuracy: 0.9054 - loss: 0.4125 - val_accuracy: 0.9482 - val_loss: 0.1172
Epoch 2/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 80ms/step - accuracy: 0.9531 - loss: 0.0841 - val_accuracy: 0.9880 - val_loss: 0.0503
Epoch 3/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 81ms/step - accuracy: 0.9821 - loss: 0.0451 - val_accuracy: 0.9681 - val_loss: 0.0724
Epoch 4/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 78ms/step - accuracy: 0.9677 - loss: 0.0699 - val_accuracy: 0.9801 - val_loss: 0.0373
Epoch 5/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 80ms/step - accuracy: 0.9821 - loss: 0.0408 - val_accuracy: 0.9761 - val_loss: 0.0427
Epoch 6/10
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 79ms/step - accuracy: 0.9651 - loss: 0.1081 - val_accuracy: 0.9721 - val_loss: 0.0555
Epoch 7/10
[1m63/63[0m [32m━━━━

In [9]:
y_pred = model.predict(X_test).flatten()
y_pred_classes = (y_pred > 0.5).astype(int)

acc = accuracy_score(y_test, y_pred_classes)
cm = confusion_matrix(y_test, y_pred_classes)
clf_report = classification_report(y_test, y_pred_classes)

print("Test Accuracy:", acc)
print("Confusion Matrix:\n", cm)
print("Classification Report:\n", clf_report)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
Test Accuracy: 0.9904153354632588
Confusion Matrix:
 [[298   1]
 [  2  12]]
Classification Report:
               precision    recall  f1-score   support

           0       0.99      1.00      0.99       299
           1       0.92      0.86      0.89        14

    accuracy                           0.99       313
   macro avg       0.96      0.93      0.94       313
weighted avg       0.99      0.99      0.99       313

