In [1]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [2]:
import fastai as meta_fai
import fastai.basics as fai
import fastai.vision as fv
from pathlib import Path
import pandas as pd
import random
import numpy as np
from collections import defaultdict
import gc
import torch
import torchvision.models
import torch.nn as nn
import torch.nn.functional as F
import torch.optim
import math
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

import PIL
from tqdm import tqdm_notebook as tqdm
from tqdm import trange

# Bug Spray ୧[ ˵ ͡ᵔ ͜ʟ ͡ᵔ ˵ ]୨

In [3]:
get_ipython().config.get('IPKernelApp', {})['parent_appname'] = ""

In [None]:
#!head train_photo_to_biz_ids.csv
#!ls train_photos | head
#!head train.csv

In [4]:
biz_label = pd.read_csv("train.csv").fillna('')
photo_biz = pd.read_csv("train_photo_to_biz_ids.csv")

In [None]:
#biz_label.head()
#photo_biz.head()

In [5]:
bizs, labels = biz_label['business_id'], biz_label['labels']
biz2label = {biz:label for biz,label in zip(bizs, labels)}

In [None]:
#biz2label

In [6]:
bizzes = list(biz2label.keys())

nvalid = 200

random.seed(13) # jajaja ... 13
random.shuffle(bizzes)
valid_biz, train_biz  = bizzes[:nvalid], bizzes[nvalid:]

In [7]:
torch.save(valid_biz, "valid_biz.list")
torch.save(train_biz, "train_biz.list")

In [8]:
photo2biz = {photo:biz for photo,biz in zip(photo_biz['photo_id'], photo_biz['business_id'])}

In [9]:
biz2photos = defaultdict(lambda : [])
for img,biz in photo2biz.items():
    biz2photos[biz].append(img)

In [10]:
photo_label = photo_biz.copy()
photo_label.business_id = photo_label.business_id.apply(lambda x: biz2label[x])
photo_label = photo_label.rename({"business_id":"labels"},axis=1);

In [None]:
#photo_label.head()
#photo_label.head()

In [11]:
def str2list(label):
    if label != label: return [] # if lbl == NaN
    return [int(x) for x in label.split()]

categories = ["good_for_lunch", "good_for_dinner", "takes_reservations", "outdoor_seating", "restaurant_is_expensive", "has_alcohol", "has_table_service", "ambience_is_classy", "good_for_kids"]

In [12]:
valid_dict = [0]*4001
for biz in valid_biz:
    valid_dict[biz] = 1
    
def is_valid(filename):
    pf = Path(filename)
    f = int(pf.stem)
    return valid_dict[photo2biz[f]]

In [13]:
def load_data(img_size, batch_size, amount=1):
    tfms = fv.get_transforms()
    return (fv.ImageItemList.from_df(photo_label,".",folder="train_photos", suffix='.jpg')
              .filter_by_rand(amount, seed=13)
              .split_by_valid_func(is_valid)
              .label_from_df(label_delim=' ') #dataframe
              .transform(tfms, size=img_size)
              .databunch(bs=batch_size))

In [14]:
data = load_data(224,64, amount=1)

In [None]:
data.show_batch(rows=3)


In [15]:
class F1_Score:
    def __init__(self,thresh:float):
        self.thresh = thresh
        
    def __call__(self,inp,targ):
        return meta_fai.metrics.fbeta(inp, targ, thresh=self.thresh, beta=1.)
    
    def __repr__(self):
        return f"F1({self.thresh})"
    
    @property
    def __name__(self):
        return self.__repr__()

In [16]:
metrics = [F1_Score(t) for t in [0.3, 0.35, 0.4, 0.43, 0.45]]

In [18]:
learner = fv.create_cnn(data, fv.models.resnet101, metrics=metrics, wd=0.1)

In [None]:
learner.lr_find(); learner.recorder.plot()

In [None]:
learner.fit_one_cycle(3,9e-2)

In [None]:
learner.lr_find(); learner.recorder.plot()

In [None]:
learner.fit_one_cycle(1,1e-3)

In [None]:
learner.save("big_cnn_101")

In [None]:
learner.unfreeze()

In [None]:
learner.lr_find(); learner.recorder.plot()

In [None]:
learner.fit_one_cycle(5,1e-2)

In [None]:
learner.save("simple_unfroze_cnn_101")

# Data Prep

In [19]:
#learner = fv.create_cnn(data, fv.models.resnet34, metrics=metrics, wd=0.1)
learner.load("big_cnn_101")

Learner(data=ImageDataBunch;

Train: LabelList
y: MultiCategoryList (213867 items)
[MultiCategory 3;8, MultiCategory 1;2;3;5;6;7, MultiCategory 1;2;3;4;5;6;7, MultiCategory 1;2;3;4;5;6;7, MultiCategory 1;2;3;4;5;6;7]...
Path: .
x: ImageItemList (213867 items)
[Image (3, 500, 373), Image (3, 500, 500), Image (3, 375, 500), Image (3, 375, 500), Image (3, 375, 500)]...
Path: .;

Valid: LabelList
y: MultiCategoryList (20975 items)
[MultiCategory 1;2;3;5;6;7, MultiCategory 3;6;8, MultiCategory 3;6;8, MultiCategory 3, MultiCategory 0;3;8]...
Path: .
x: ImageItemList (20975 items)
[Image (3, 500, 282), Image (3, 373, 500), Image (3, 500, 373), Image (3, 375, 500), Image (3, 500, 375)]...
Path: .;

Test: None, model=Sequential(
  (0): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1

In [20]:
class SimpleTensorDataset(Dataset):
    def __init__(self, features, labels):
        super().__init__()
        assert len(features) == len(labels)
        self.features = [torch.tensor(f).float().cpu() for f in features]
        self.labels = [torch.tensor(l).float().cpu() for l in labels]
        
    def __len__(self):
        return len(self.features)

    def __getitem__(self, i):
        return self.features[i], self.labels[i]

In [21]:
class forward_net(nn.Module):
    def __init__(self, num_classes=9):
        super(forward_net, self).__init__()
        self.fc1 = nn.Linear(82, 64)
        self.fc2 = nn.Linear(64, 48)
        self.fc3 = nn.Linear(48, 32)
        self.fc4 = nn.Linear(32, num_classes)
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.fc2(out)
        out = self.fc3(out)
        out = self.fc4(out)
        return out

model = forward_net()

In [22]:
def stat_function(values,f):
    n = len(values)
    return torch.tensor([f(torch.tensor([values[i][x] for i in range(n)]),n) for x in range(9)])

def get_stats(values):
    n = len(values)
    all_Amean = stat_function(values,lambda col,n: sum(col)/n)
    all_Gmean = stat_function(values,lambda col,n: torch.prod(col)**(1/n))
    all_Hmean = stat_function(values,lambda col,n: 1/sum(torch.div(1,col)))
    all_Cmean = stat_function(values,lambda col,n: (sum(col**2)/n)**0.5)
    all_Gmean_mod = stat_function(values, lambda col,n: 1-torch.prod(1-col)**(1/n))
    all_max = stat_function(values,lambda col,n: max(col))
    all_min = stat_function(values,lambda col,n: min(col))
    all_range = stat_function(values,lambda col,n: max(col)-min(col))
    var = lambda col,n,x: sum((col-all_Amean[x])**2)/n
    all_var = torch.tensor([var(torch.tensor([values[i][x] for i in range(n)]),n,x) for x in range(9)])
    return torch.cat((all_Amean,all_Gmean,all_Hmean,all_Cmean,all_Gmean_mod,all_max,all_min,all_range,all_var,torch.FloatTensor([n])))

In [24]:
#!mkdir stored

train_stats = []
train_labels = []
for biz in tqdm(train_biz[:10]):
    values = []
    r = len(biz2photos[biz])
    for photo_num in biz2photos[biz]:
        photo = fv.open_image("train_photos/"+str(photo_num)+".jpg")
        values.append(learner.predict(photo))
    pred = torch.tensor([list(v[2]) for v in values])
    
    train_stats.append(get_stats(pred))
    train_labels.append(values[0][1])

train_data = SimpleTensorDataset(train_stats,train_labels)
torch.save(train_data, "trainData_sample")

valid_stats = []
valid_labels = []
for biz in tqdm(train_biz[:10]):
    values = []
    r = len(biz2photos[biz])
    for photo_num in biz2photos[biz]:
        photo = fv.open_image("train_photos/"+str(photo_num)+".jpg")
        values.append(learner.predict(photo))
    pred = torch.tensor([list(v[2]) for v in values])
    
    valid_stats.append(get_stats(pred))
    valid_labels.append(values[0][1])

valid_data = SimpleTensorDataset(valid_stats,valid_labels)
torch.save(valid_data, "validData_sample")

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))




  """
  


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))




  """
  


# Training

In [26]:
bunch = meta_fai.basic_train.DataBunch(train_dl=train_data, valid_dl=valid_data)

AttributeError: 'SimpleTensorDataset' object has no attribute 'init_kwargs'

In [25]:
learn = meta_fai.basic_train.Learner(train_data, model, wd=0.1, metrics=
                        metrics)

AttributeError: 'SimpleTensorDataset' object has no attribute 'path'

In [None]:
learn.fit_one_cycle(1)