In [None]:
import os, shutil, sys, warnings, math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from zipfile import ZipFile
from tqdm import trange
from tqdm.notebook import tqdm
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss, accuracy_score

In [None]:
CONFIG = {
    "TEST_FOLDER" : "/kaggle/input/padhai-text-non-text-classification-level-2/kaggle_level_2",
    "TRAIN_FOLDER" : "/kaggle/input/padhai-text-non-text-classification-level-2/level_2/",
    "IMAGE_SIZE": (16, 16),/
    "THRESHOLD": 0.7
}

In [None]:
def read_all(folder_path, key_prefix=""):
    print("Reading:")
    images={}
    files = os.listdir(folder_path)
    for i, filename in tqdm(enumerate(files), total=len(files)):
        file_path = os.path.join(folder_path,filename)
        image_index = key_prefix + filename[:-4]
        image = Image.open(file_path)
        image = image.convert("L")
        images[image_index] = np.array(image.copy()).flatten()
        image.close()
    return images

In [None]:
languages = ['ta', 'hi', 'en'] # tamil, hindi, english
# read in backgrounds with extra 'bgr_' for identification
images_train = read_all(f'{CONFIG["TRAIN_FOLDER"]}/background/',key_prefix="bgr_") # change the path
# read in the images from all three languages with extra prefix for identification
for language in languages:
  images_train.update(read_all(CONFIG["TRAIN_FOLDER"] + '/' + language, key_prefix=language+"_" ))
print(len(images_train))

# read in test images
images_test = read_all(CONFIG["TEST_FOLDER"], key_prefix='') # change the path
print(len(images_test))

list(images_test.keys())[:5]

X = []
Y = []

In [None]:
# add the image to columns then check if it is background image or not, depending on that put 0 or 1 in target
for key, value in images_train.items():
    X.append(value)
    if key[:4] == "bgr_":
        Y.append(0)
    else:
        Y.append(1)

ID_test = []
X_test = []
for key, value in images_test.items():
  ID_test.append(int(key))
  X_test.append(value)
  
        
X = np.array(X)
Y = np.array(Y)
X_test = np.array(X_test)

print(X.shape, Y.shape)
print(X_test.shape)

In [None]:
Y

In [None]:
# splitting for train and validation sets to check bias chances on test data. we already have the test data
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.15, random_state=10, stratify=Y, shuffle=True)

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [None]:
import tensorflow
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

In [None]:
# Creating a simple neural network model
model = Sequential()
model.add(Dense(10, input_shape=(256,), activation='sigmoid'))  # Input layer with 10 neurons
model.add(Dense(4,activation='sigmoid'))
model.add(Dense(1, activation='sigmoid'))  # Output layer with 1 neuron for binary classification

# Compiling the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
# Training the model
model.fit(X_train,Y_train, epochs=25, batch_size=32, validation_split=0.2)

In [None]:
# Assuming you have X_test and y_test as your test dataset

# Evaluate the model on the test dataset
loss, accuracy = model.evaluate(X_val, Y_val)

print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")