In [12]:
# Put these at the top of every notebook, to get automatic reloading and inline plotting
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [13]:
# This file contains all the main external libs we'll use
from fastai.imports import *

In [14]:
from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *

import urllib
from PIL import Image

In [15]:
PATH = 'data/tawkify/'
image_folder = f'{PATH}train/'
data_csv = PATH+'tw_dem_images.csv'
image_csv = PATH+'image_csv.csv'

model_csv = f'{PATH}current_model_gender.csv'

In [16]:
sz=220
# sz=400

### Create Training CSV data from Image CSV

In [None]:
img_df = pd.read_csv(image_csv)

In [None]:
img_df.shape

In [None]:
plt.imshow(plt.imread(img_df.local_photo.iloc[np.random.randint(10)]))

In [None]:
size_map = { idx:plt.imread(k.local_photo).shape for idx,k in img_df.iterrows()}

In [None]:
size_map

In [None]:
h, w, ch = list(zip(*size_map.values()))

In [None]:
plt.hist(w)

In [None]:
w = np.array(w)
plt.hist(w[w<600])

In [None]:
img_df.head()

In [None]:
img_df.loc[img_df.gender.isna()]

In [None]:
temp_df = img_df.loc[~img_df.gender.isna()]
temp_df = pd.DataFrame(data={'id': temp_df.userID, 'gender': temp_df.gender}, columns=['id', 'gender'])
temp_df.to_csv(model_csv, index=False)

In [None]:
temp_df.shape

### Architecture

In [None]:
# !rm -rf {PATH}tmp

In [6]:
temp_df = pd.read_csv(model_csv)

In [7]:
print(temp_df.groupby('gender').count())
print(temp_df.shape)

           id
gender       
female  60597
male    28681
(89278, 2)


In [17]:
arch=resnet34

In [18]:
tfms = tfms_from_model(arch, sz, aug_tfms=transforms_side_on, max_zoom=1.1)
n = temp_df.shape[0]-1
val_idxs = get_cv_idxs(n)
data = ImageClassifierData.from_csv(path=PATH, folder='train', csv_fname=model_csv, tfms=tfms, suffix='.jpg', val_idxs=val_idxs, bs=32)

In [None]:
m = arch(False)

In [None]:
m_cam = nn.Sequential(*children(m)[:-2],
                      nn.Conv2d(512, 2, 3, padding=1),
                      nn.AdaptiveAvgPool2d(1),
                      Flatten(),
                      nn.LogSoftmax())

In [None]:
def show_image(idx):
    x,y = data.trn_ds[idx]
    show_img = data.trn_ds.denorm(x)
    print(show_img.shape)
    print('Y value:', y)
    plt.imshow(show_img[0])

In [None]:
show_image(3)

In [None]:
# learn = ConvLearner.pretrained(arch, data=data, precompute=True)
learn = ConvLearner.pretrained(arch, data=data)
# learn = ConvLearner.from_model_data(m, data=data)

In [None]:
learn.freeze_to(-4)

In [None]:
[l.trainable for l in m_cam]

In [None]:
learn.lr_find()

In [None]:
learn.sched.plot()

In [None]:
learn.fit(lrs=1e-2, n_cycle=1,)
# learn.fit(lrs=1e-2, n_cycle=2,)

                                                               

In [None]:
learn.save('tw_gender_cam_firstrun1e-2')

In [None]:
learn.load('tw_gender_cam_firstrun1e-2')

In [None]:
learn.precompute=False

In [None]:
learn.fit(lrs=1e-2, n_cycle=3)

In [None]:
learn.save('tw_gender_cam_run2')

In [None]:
learn.unfreeze()

In [None]:
learn.lr_find()

In [None]:
learn.sched.plot()

In [None]:
learn.fit(lrs=[1e-4,1e-3,1e-2], n_cycle=3, cycle_len=1)

In [None]:
learn.save('tw_gender_cam_unfreeze1')

In [None]:
learn.load('tw_gender_cam_unfreeze1')

In [None]:
learn.fit(lrs=[1e-4,1e-3,1e-2], n_cycle=3, cycle_mult=2)

In [None]:
learn.save('tw_gender_cam_unfreeze2_nc3cm2')

In [None]:
learn.load('tw_gender_cam_unfreeze2_nc3cm2')

In [None]:
log_preds, y = learn.TTA()
probs = np.exp(log_preds)
accuracy(log_preds,y), metrics.log_loss(y, probs)

In [None]:
preds = np.argmax(log_preds, axis=1)
# np.array(preds==y).mean()
preds
# accuracy(log_preds,y)

### Test image

In [None]:
test_image_url = 'https://images-na.ssl-images-amazon.com/images/M/MV5BMTk4MDM0MDUzM15BMl5BanBnXkFtZTcwOTI4MzU1Mw@@._V1_UY317_CR7,0,214,317_AL_.jpg'

In [None]:
# test_image_file = f'{PATH}test_images/test1.jpg'
# test_image_file = f'{PATH}test_images/kenneth_s.jpg'
# test_image_file = f'{PATH}test_images/age_img/IMG_2776.jpg'
test_image_file = f'{PATH}test_images/age_img/IMG_8274.jpg'
# test_image_file = f'{PATH}test_images/age_img/IMG_3135.jpg'
# test_image_file = f'{PATH}test_images/age_img/IMG_8819.jpg'
# test_image_file = f'{PATH}test_images/age_img/IMG_9370.jpg'
# test_image_file = f'{PATH}test_images/age_img/IMG_2793.jpg'
# test_image_file = f'{PATH}test_images/grandma_test1.jpg'
# test_image_file = f'{PATH}test_images/lzl_test1.jpg'
# test_image_file = f'{PATH}test_images/age_img/IMG_9625.jpg'

# test_image_file = f'{PATH}test_images/jackie_test1.jpg'

In [None]:
# urllib.request.urlretrieve(test_image_url, test_image_file)

In [None]:
plt.imshow(plt.imread(test_image_file))

In [None]:
val_tfms = tfms[1]

In [None]:
# test_image = val_tfms(plt.imread(test_image_file))
test_image = val_tfms(open_image(test_image_file))


In [None]:
test_image.shape

In [None]:
eval_mode = learn.model.eval()
# needed so that we know we are in evaluation mode. Otherwise will throw an exception
# equivalent to learn.model.train(False)

In [None]:
gender = learn.predict_array([test_image])[0]

In [None]:
print(f'Gender:', data.classes[np.argmax[age]])

In [None]:
data.classes

### Find corrupted images

In [None]:
trn_tfms = tfms[0]
dl = data.trn_dl

In [None]:
first_batch = next(iter(dl))

In [None]:
first_batch[0][0].shape

In [None]:
temp_df.head()

In [None]:
import multiprocessing

multiprocessing.cpu_count()

In [None]:
for idx, row in tqdm(temp_df.iterrows(), total=temp_df.shape[0]):
    user_id = int(row.id)
    test_image_file = f'{PATH}train/{user_id}.jpg'
    try:
        x = trn_tfms(open_image(test_image_file))
        print(x)
    except Exception as e:
        print(e)
        print(user_id)
        print(test_image_file)
    break

In [None]:
from concurrent.futures import ThreadPoolExecutor, wait, as_completed
def transform_img(user_id):
    test_image_file = f'{PATH}train/{user_id}.jpg'
    try:
        img = open_image(test_image_file)
        trn_tfms(img)
    except Exception as e:
        print(e)
        print(user_id)
        print(test_image_file)

subset_df = temp_df
futures = []
with ProcessPoolExecutor(multiprocessing.cpu_count()) as executor:
    for idx, row in subset_df.iterrows():
        futures.append(executor.submit(transform_img, int(row.id)))

    kwargs = {
        'total': len(futures),
        'unit': 'nap',
        'unit_scale': True,
        'leave': True,
        'disable': False
    }
    for x in tqdm(as_completed(futures), **kwargs):
        pass