In [None]:
import os
import json
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout


In [None]:
# Load data
def load_train_data(path,train_data_num=0):
    images = []
    labels = []
    files=os.listdir(path)
    files = list(set([i.split(".")[0] for i in files if i.split(".")[0]]))
    i=0
    
    for img_file in files:
        if i==train_data_num and train_data_num!=0:
            break
        
        #print(img_file)
        
        # Save label from json
        json_file = path + "/" + img_file + '.json'
        
    
        with open(json_file) as f:
            data = json.load(f)
            colonies_count = data['colonies_number']
        labels.append(0 if colonies_count == 0 else 1)
        
        # Load and preprocess the image
        image_file = path + "/" + img_file + '.jpg'
        img = Image.open(image_file)
        img = img.resize((128, 128)) # Resize image to 128x128
        img = np.array(img) / 255.0 # Normalize pixel values
        
        images.append(img)
        
        i+=1
        
    return np.array(images), np.array(labels)

In [None]:
# Build the model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
# Train model function
def train(images,labels):
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

    history = model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

    # Evaluate the model
    test_loss, test_acc = model.evaluate(X_test, y_test)
    print(f"Test Accuracy: {test_acc*100:.2f}%")

images,labels = load_train_data('train_data')
train(images,labels)

In [None]:
def load_test_data(path,test_data_num=0):
    images = []
    IDs = []
    files=os.listdir(path)
    #files = list(set([i.split(".")[0] for i in files if i.split(".")[0]]))
    i=0
    
    for img_file in files:
        if i==test_data_num and test_data_num!=0:
            break
        
        # Load and preprocess the image
        image_file = path + "/" + img_file
        img = Image.open(image_file)
        img = img.resize((128, 128)) # Resize image to 128x128
        img = np.array(img) / 255.0 # Normalize pixel values
        
        images.append(img)
        IDs.append(img_file.split(".")[0])
        
        i+=1
        
    return np.array(images),IDs

test_images,IDs = load_test_data('test_data')
predictions = model.predict(test_images)
predicted_labels = (predictions > 0.5).astype(int)
predicted_labels = [i[0] for i in predicted_labels]

In [None]:
import pandas as pd

data = {
    'ID': IDs,
    'TARGET': predicted_labels
}

df = pd.DataFrame(data)
df.to_csv("output.csv", sep=',', index=False, encoding='utf-8')