In [4]:
import random_clip_forest
from sklearn.linear_model import LogisticRegression
from transformers import BeitFeatureExtractor, BeitForImageClassification
from PIL import Image
import numpy as np
import pandas as pd
import torchvision.transforms.functional as fn
from sklearn.multioutput import MultiOutputClassifier


In [5]:
feature_extractor = BeitFeatureExtractor.from_pretrained('microsoft/beit-base-patch16-224-pt22k-ft22k')
model = BeitForImageClassification.from_pretrained('microsoft/beit-base-patch16-224-pt22k-ft22k')

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [6]:
# read in images
df = pd.read_csv('train.csv')
# throw away missing images
df = df.loc[df.image_id.apply(random_clip_forest.img_exists)]
df['Images'] = df['image_id'].apply(random_clip_forest.open_img_id)

df2 = df.copy()
df2['Images'] = df['Images'].apply(fn.hflip)

df3 = df.copy()
df3['Images'] = [fn.gaussian_blur(img=image,kernel_size=(9,9)) for image in df['Images']]

df = pd.concat([df, df2, df3])

labelsdf = pd.read_csv('labels.csv')
labels = labelsdf['object'].values.tolist()

In [7]:
X_train = feature_extractor(images=df['Images'].tolist(), return_tensors="np")

y_train = np.array([
    random_clip_forest.onehot(lbl) for lbl in df['labels']
]).astype(int)

In [8]:
logres = LogisticRegression(dual=True, solver='liblinear', random_state=342985, max_iter=400, class_weight='balanced')
final = MultiOutputClassifier(logres)

In [13]:
X_train["pixel_values"].reshape(X_train["pixel_values"].shape[0], -1).shape

(603, 150528)

In [14]:
final.fit(X_train["pixel_values"].reshape(X_train["pixel_values"].shape[0], -1), y_train)



In [25]:
testdf = pd.read_csv('test.csv')
testlabels = []
labelsdf = pd.read_csv('labels.csv')
for img_id in testdf.image_id:
    try:
        x = feature_extractor([random_clip_forest.open_img_id(img_id)], return_tensors="np")
        prediction = final.predict(x["pixel_values"].reshape(x["pixel_values"].shape[0], -1))
        predicted_labels = random_clip_forest.labelstring(prediction.astype(bool))

        if len(predicted_labels) == 0:
            testlabels.append('l1')
        else:
            testlabels.append(predicted_labels)
        print(img_id,
                ' '.join(labelsdf.loc[labelsdf.label_id.isin(testlabels[-1].split(' ')), 'object'].values.ravel()),
                sep='\t')
    except FileNotFoundError:
        print(img_id, 'missing, defaulting to l0')
        testlabels.append('l0')

img102.jpg	trees water
img103.jpg	people trees
img11.jpg	people trees grass snow buildings water flowers traffic signs traffic sign graffiti the sculpture of kissing students wall barge emajõe-peipsi barge jõmmu green garbage can fence suit human snow statue table pole
img113.jpg	trees
img114.jpg	trees water
img121.jpg	trees
img126.jpg	people grass water
img131.jpg	building cars
img136.jpg	trees
img137.jpg	trees
img139.jpg	trees building road pavement flowers traffic sign clouds stone road sand street tree wall playground children sunny yellow leaves chain light barge emajõe-peipsi barge jõmmu drone photography tractor lamp post glasses cloudy bushes autumn fountain river car plants red bus green garbage can beach benches fence performing park wedding boat child cat traffic suit snow plow leaves sign outdoor gym hands human man cone lights table tents stage book books flag pole
img140.jpg	people water clouds
img15.jpg	grass building water person nature pavement flowers bridge tree do

In [26]:
testdf['labels'] = testlabels
testdf.to_csv('kea_submissions/beit_logreg1.csv', index=False)