In [25]:
import random_clip_forest
import torch

import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from xgboost.sklearn import XGBClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.calibration import CalibratedClassifierCV
import torchvision.transforms.functional as fn
from torchvision.transforms import RandomAffine
from sklearn.preprocessing import StandardScaler
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import f1_score


In [26]:
from transformers import BeitFeatureExtractor, BeitForImageClassification
from PIL import Image

In [27]:
feature_extractor = BeitFeatureExtractor.from_pretrained('microsoft/beit-base-patch16-224-pt22k-ft22k')
model = BeitForImageClassification.from_pretrained('microsoft/beit-base-patch16-224-pt22k-ft22k')

In [28]:
# read in images
df = pd.read_csv('train.csv')
# throw away missing images
df = df.loc[df.image_id.apply(random_clip_forest.img_exists)]
df['Images'] = df['image_id'].apply(random_clip_forest.open_img_id)

df2 = df.copy()
df2['Images'] = df['Images'].apply(fn.hflip)

df3 = df.copy()
df3['Images'] = [fn.gaussian_blur(img=image,kernel_size=(9,9)) for image in df['Images']]

df = pd.concat([df, df2, df3])

labelsdf = pd.read_csv('labels.csv')
labels = labelsdf['object'].values.tolist()

In [29]:
y_train = np.array([
    random_clip_forest.onehot(lbl) for lbl in df['labels']
]).astype(int)

In [30]:
inputs = feature_extractor(images=df['Images'].tolist(), return_tensors="pt")


In [31]:
with torch.no_grad():
    outputs = model(**inputs)

In [40]:
X_train = outputs['logits']

In [33]:
xgboost_model = XGBClassifier(scale_pos_weight=90, max_delta_step=7)
final = MultiOutputClassifier(xgboost_model)

In [41]:
final.fit(X_train, y_train)

In [42]:
testdf = pd.read_csv('test.csv')
testlabels = []
labelsdf = pd.read_csv('labels.csv')
for img_id in testdf.image_id:
    try:
        inputs = feature_extractor([random_clip_forest.open_img_id(img_id)], return_tensors="pt")
        with torch.no_grad():
            x = model(**inputs)
        prediction = final.predict(x['logits'])
        predicted_labels = random_clip_forest.labelstring(prediction.astype(bool))

        if len(predicted_labels) == 0:
            testlabels.append('l1')
        else:
            testlabels.append(predicted_labels)
        print(img_id,
                ' '.join(labelsdf.loc[labelsdf.label_id.isin(testlabels[-1].split(' ')), 'object'].values.ravel()),
                sep='\t')
    except FileNotFoundError:
        print(img_id, 'missing, defaulting to l0')
        testlabels.append('l0')

img102.jpg	people trees buildings town hall of tartu flags drone photography
img103.jpg	trees grass
img11.jpg	people snow
img113.jpg	plants
img114.jpg	hands
img121.jpg	people trees grass water playground lamp post
img126.jpg	people trees water playground
img131.jpg	person
img136.jpg	trees
img137.jpg	trees park
img139.jpg	trees grass
img140.jpg	people trees sand
img15.jpg	trees grass bench green garbage can pole
img150.jpg	building traffic signs
img151.jpg	people trees buildings road
img152.jpg	people buildings road traffic signs
img156.jpg	people trees grass building buildings road bicycle traffic signs
img161.jpg	people trees water barge emajõe-peipsi barge jõmmu
img166.jpg	people trees grass road person pavement
img167.jpg	trees
img170.jpg	people trees building stone road table
img173.jpg	trees snow
img176.jpg	people snow
img18.jpg	trees grass building green garbage can
img180.jpg	people trees table
img181.jpg	people trees grass road nature plants
img182.jpg	people trees building
im

In [43]:
testdf['labels'] = testlabels
testdf.to_csv('kea_submissions/beit_xgb1.csv', index=False)