In [1]:
from pathlib import Path
from fastai.callbacks import SaveModelCallback
from fastai.vision import ImageList, cnn_learner, models, error_rate, accuracy, ShowGraph
from torch.nn.functional import cross_entropy
import config

import pandas as pd
from services import file_service

logger = config.create_logger(__name__)


In [2]:

image_path = Path('C:\\Kaggle Downloads\\deepfake-detection-challenge\\output\\merged\\images')
path = Path('C:\\Kaggle Downloads\\deepfake-detection-challenge\\output\\decorate_df\\dataframes\\df.pkl')

df = pd.read_pickle(path)
df = df.sample(frac=1).reset_index(drop=True)

df['real_or_fake_digit'] = df['gross_label'].apply(lambda x: 1 if x == 'fake' else 0)

num_rows = df.shape[0]

# NOTE: Shorten if necesseary
df = df.iloc[:num_rows//2, :]

logger.info(f'DF: {df.head()}')

df_val = df[df['test_train_split'] == 'validation']
df_train = df[df['test_train_split'] == 'train']
df_test = df[df['test_train_split'] == 'test']

2020-03-29 15:19:41,839 - __main__ - INFO - DF:                                                 path     score  \
0  C:\Kaggle Downloads\deepfake-detection-challen...  0.988823   
1  C:\Kaggle Downloads\deepfake-detection-challen...  0.968515   
2  C:\Kaggle Downloads\deepfake-detection-challen...  0.983153   
3  C:\Kaggle Downloads\deepfake-detection-challen...  0.994700   
4  C:\Kaggle Downloads\deepfake-detection-challen...  0.981365   

                                       original_path  \
0  D:\Kaggle Downloads\deepfake-detection-challen...   
1  D:\Kaggle Downloads\deepfake-detection-challen...   
2  D:\Kaggle Downloads\deepfake-detection-challen...   
3  D:\Kaggle Downloads\deepfake-detection-challen...   
4  D:\Kaggle Downloads\deepfake-detection-challen...   

                     filename video_name_stem gross_label  score_1places  \
0  jovqtearzg_291_98882.0.png      jovqtearzg        fake            1.0   
1  iyvhbdodpj_178_96852.0.png      iyvhbdodpj        fake         

In [3]:
logger.info(f'df_val Index: {type(df_val.index)}')

val_path = Path(df_val.iloc[0, df_val.columns.get_loc('path')])
logger.info(f'Path: {val_path.parent}')

data = (ImageList.from_df(df, image_path, cols='filename')
  .split_by_idxs(train_idx=df_train.index, valid_idx=df_val.index)
  .label_from_df(cols='real_or_fake_digit')
  .databunch(bs=8))

model = models.resnet50

2020-03-29 15:19:42,360 - __main__ - INFO - df_val Index: <class 'pandas.core.indexes.numeric.Int64Index'>
2020-03-29 15:19:42,376 - __main__ - INFO - Path: C:\Kaggle Downloads\deepfake-detection-challenge\output\merged\images


In [4]:
learn = cnn_learner(data, model, metrics=[error_rate, accuracy, cross_entropy])
learn.model.cuda() 

learn.save('before-learner')
learn.lr_find()
learn.recorder.plot()
learn.load('before-learner')
# 3e-06,4e-06

epoch,train_loss,valid_loss,error_rate,accuracy,cross_entropy,time


LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.


MemoryError: 

In [None]:
par_path = Path(config.OUTPUT_MODEL_PAR_PATH, model.__qualname__)
par_path.mkdir(exist_ok=True)

weights_par_path = Path(par_path, 'saved_weights')

file_path = file_service.get_unique_persist_filename(weights_par_path, base_output_stem=f'cnn_{model.__qualname__}_', extension='pkl', use_date=True)

learn.fit_one_cycle(50, slice(3e-06,4e-06), callbacks=[ShowGraph(learn), SaveModelCallback(learn, every='epoch',  
                  monitor='cross_entropy', name=file_path)])

pickle_par_path = Path(par_path, 'data')
pickle_par_path.mkdir(exist_ok=True)

df.to_pickle(Path(pickle_par_path, 'df'))
df_train.to_pickle(Path(pickle_par_path, 'df_train'))
df_val.to_pickle(Path(pickle_par_path, 'df_val'))
df_test.to_pickle(Path(pickle_par_path, 'df_test'))

data.path = pickle_par_path

data.save()

In [None]:
learn.unfreeze()
learn.fit_one_cycle(50, slice(3e-06,4e-06), callbacks=[ShowGraph(learn), SaveModelCallback(learn, every='epoch',  
                  monitor='cross_entropy', name=file_path)])

