In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
pip install opencv-python numpy scikit-learn


In [None]:
import cv2
import numpy as np
from sklearn.cluster import KMeans
from sklearn.neighbors import KNeighborsClassifier

# Define the number of clusters (visual words)
num_clusters = 100

# Load the dataset
gen_images = []
used_images = []
unused_images = []

for i in range(1,501):
    gen_images.append(cv2.imread(f'/content/drive/MyDrive/generated/generated_{i:04d}.png'))
for i in range(1,81):
    used_images.append(cv2.imread(f'/content/drive/MyDrive/used/used_{i:04d}.png'))
    unused_images.append(cv2.imread(f'/content/drive/MyDrive/not_used/not_used_{i:04d}.png'))
# Extract SIFT features
sift = cv2.SIFT_create()

def extract_sift_features(images):
    sift = cv2.SIFT_create()
    sift_features = []
    for img in images:
        bgr_img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        gray = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2GRAY)
        kp, des = sift.detectAndCompute(gray, None)
        sift_features.append(des)
    return sift_features

gen_sift_features = extract_sift_features(gen_images)
used_sift_features = extract_sift_features(used_images)
unused_sift_features = extract_sift_features(unused_images)

# Cluster the SIFT features to create a vocabulary of visual words
kmeans = KMeans(n_clusters=num_clusters, random_state=42).fit(np.vstack(gen_sift_features))

# Convert each image into a histogram of visual words
def create_histogram(features):
    histogram = np.zeros((len(features), num_clusters), dtype=int)
    for i, des in enumerate(features):
        words = kmeans.predict(des)
        histogram[i, words] += 1
    return histogram

gen_histograms = create_histogram(gen_sift_features)
used_histograms = create_histogram(used_sift_features)
unused_histograms = create_histogram(unused_sift_features)

# Train a KNN classifier using the histogram of visual words as features
X_train = np.vstack((used_histograms, unused_histograms))
y_train = np.hstack((np.ones(len(used_histograms)), np.zeros(len(unused_histograms))))
classifier = KNeighborsClassifier(n_neighbors=5)
classifier.fit(X_train, y_train)

# Test the classifier on the testing data
test_gen_images = []
test_unknown_images = []
for i in range(1,10001):
    test_gen_images.append(cv2.imread(f'/content/drive/MyDrive/generated_1/generated_{i:05d}.png'))
for i in range(1,201):
    test_unknown_images.append(cv2.imread(f'/content/drive/MyDrive/real_unknown_1/real_unknown_{i:04d}.png'))

test_gen_sift_features = extract_sift_features(test_gen_images)
test_unknown_sift_features = extract_sift_features(test_unknown_images)

test_gen_histograms = create_histogram(test_gen_sift_features)
test_unknown_histograms = create_histogram(test_unknown_sift_features)

# Predict the labels of the testing data
X_test = np.vstack((test_gen_histograms, test_unknown_histograms))
y_pred = classifier.predict(X_test)




In [10]:
# Predict the labels of the testing data
X_test = np.vstack((test_gen_histograms, test_unknown_histograms))
y_pred = classifier.predict(X_test)

# Write the predictions to a file
with open('submission.txt', 'w') as f:
    for i in range(len(y_pred)):
        if i >= len(test_gen_images):
            f.write(f'{i+1-len(test_gen_images)},{y_pred[i]}\n')
