In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, classification_report
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from keras.callbacks import EarlyStopping

# Part 1 - Data Preparation and Exploration

# Define the path to the dataset directory
dataset_dir = '/path/to/dataset'

# Load the training dataset
train_df = pd.read_csv(os.path.join(dataset_dir, 'train.csv'))

# Define the image and mask dimensions
image_width = 1400
image_height = 2100
mask_width = 350
mask_height = 525

# Define a function to convert the encoded pixels to a binary mask
def rle_to_mask(rle, width, height):
    mask = np.zeros(width * height, dtype=np.uint8)
    rle = np.array([int(x) for x in rle.split()])
    starts = rle[::2]
    lengths = rle[1::2]
    for start, length in zip(starts, lengths):
        mask[start : start + length] = 1
    return mask.reshape((height, width))

# Create a new column 'mask' with the binary masks
train_df['mask'] = train_df['EncodedPixels'].map(lambda rle: rle_to_mask(rle, mask_width, mask_height))

# Display sample images with their masks
plt.figure(figsize=(10, 10))
for i in range(4):
    ax = plt.subplot(2, 4, i+1)
    img = cv2.imread(os.path.join(dataset_dir, 'train_images', train_df['Image_Label'][i].split('_')[0]))
    ax.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    ax.set_title(train_df['Image_Label'][i])
    ax.axis('off')
    ax = plt.subplot(2, 4, i+5)
    mask = train_df['mask'][i]
    ax.imshow(mask, cmap='gray')
    ax.axis('off')
plt.tight_layout()
plt.show()

# Split the data into training and validation sets
train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42)

# Define the image generator for data augmentation
datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

# Create the training and validation data generators
train_generator = datagen.flow_from_dataframe(
    train_df,
    directory=os.path.join(dataset_dir, 'train_images'),
    x_col='Image_Label',
    y_col='mask',
    target_size=(image_height, image_width),
    batch_size=32,
    class_mode='binary',
    subset='training'
)
val_generator = datagen.flow_from_dataframe(
    val_df,
    directory=os.path.join(dataset_dir, 'train_images'),
    x_col='Image_Label',
    y_col='mask',
    target_size=(image_height, image_width),
    batch_size=32,
    class_mode='binary',
    subset='validation'
)

# Part 2 - Multi-Class Classification

# Define the model architecture
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(image_height, image_width, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Define early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Train the model
history = model.fit(train_generator, epochs=20, validation_data=val_generator, callbacks=[early_stopping])

# Plot the training and validation accuracy and loss curves
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.tight_layout()
plt.show()

# Make predictions on the validation set
val_predictions = model.predict(val_generator)

# Convert predictions to binary labels
val_predictions = (val_predictions > 0.5).astype(int)

# Calculate f1 score and classification report
f1 = f1_score(val_generator.labels, val_predictions)
report = classification_report(val_generator.labels, val_predictions, target_names=['No Mask', 'Mask'])

# Print the f1 score and classification report
print('F1 Score:', f1)
print('Classification Report:')
print(report)


FileNotFoundError: [Errno 2] No such file or directory: '/path/to/dataset\\train.csv'