# Training with the Fastai's CNN

- Based on https://medium.com/deena-does-data-science/train-a-cnn-with-the-fastai-library-88712a68e4d4
- But the goal is, instead of the canser dataset, use USD/EUR candlestick captures from https://finviz.com/forex_charts.ashx?t=EURUSD&tf=m5. Labels are BUY and SELL.

In [18]:
!echo Using `python --version`
!echo
!echo Current directory `pwd`
!echo
!echo Available image data folders:
!du -sh data/images/*

Using Python 3.7.1

Current directory /home/jovyan/work

Available image data folders:
44M	data/images/chiffon
67M	data/images/denim
84M	data/images/faux_fur
60M	data/images/faux_leather
73M	data/images/lace
56M	data/images/linen
66M	data/images/satin
124M	data/images/sequin
54M	data/images/velvet


In [2]:
from pathlib import Path
from random import shuffle
import pandas as pd

## 1. Make a dataframe

In [3]:
# A regular list of some images

fpaths = list(Path('./data/images/').rglob('*.jpg'))
fstems = [fp.stem for fp in fpaths]

In [4]:
# Series of the same images

df = pd.concat([pd.Series(fpaths, name='fpaths'), 
                pd.Series(fstems, name='fstems')], 
               axis=1)
df.head()

Unnamed: 0,fpaths,fstems
0,data/images/denim/00000343.jpg,343
1,data/images/denim/00000076.jpg,76
2,data/images/denim/00000170.jpg,170
3,data/images/denim/00000103.jpg,103
4,data/images/denim/00000069.jpg,69


In [5]:
# Convert 'fpaths' column from the object-type the string 

df['fpaths'] = df['fpaths'].apply(lambda x: str(x))
df.head()

Unnamed: 0,fpaths,fstems
0,data/images/denim/00000343.jpg,343
1,data/images/denim/00000076.jpg,76
2,data/images/denim/00000170.jpg,170
3,data/images/denim/00000103.jpg,103
4,data/images/denim/00000069.jpg,69


In [6]:
# Make *label* column

df['label'] = df['fpaths'].str.split('/', expand=True)[2]
df.head()

Unnamed: 0,fpaths,fstems,label
0,data/images/denim/00000343.jpg,343,denim
1,data/images/denim/00000076.jpg,76,denim
2,data/images/denim/00000170.jpg,170,denim
3,data/images/denim/00000103.jpg,103,denim
4,data/images/denim/00000069.jpg,69,denim


In [7]:
# Export the dataframe to a csv file

df.to_csv('/tmp/mydataframe.csv')
!head /tmp/mydataframe.csv

,fpaths,fstems,label
0,data/images/denim/00000343.jpg,00000343,denim
1,data/images/denim/00000076.jpg,00000076,denim
2,data/images/denim/00000170.jpg,00000170,denim
3,data/images/denim/00000103.jpg,00000103,denim
4,data/images/denim/00000069.jpg,00000069,denim
5,data/images/denim/00000182.jpg,00000182,denim
6,data/images/denim/00000019.jpg,00000019,denim
7,data/images/denim/00000434.jpg,00000434,denim
8,data/images/denim/00000092.jpg,00000092,denim


In [8]:
# Import the csv to the dataframe

df = pd.read_csv('/tmp/mydataframe.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,fpaths,fstems,label
0,0,data/images/denim/00000343.jpg,343,denim
1,1,data/images/denim/00000076.jpg,76,denim
2,2,data/images/denim/00000170.jpg,170,denim
3,3,data/images/denim/00000103.jpg,103,denim
4,4,data/images/denim/00000069.jpg,69,denim


## 2. Make a databunch (a set of datasets and dataloaders)

In [9]:
from fastai import *
from fastai.vision import *
from fastai.vision.image import *
from fastai.vision.data import  * 

data = ImageList.from_df(path='.', df=df, cols='fpaths')
data

ImageList (4655 items)
Image (3, 480, 480),Image (3, 800, 800),Image (3, 580, 540),Image (3, 1500, 1000),Image (3, 704, 550)
Path: .

In [10]:
data = data.split_by_rand_pct(valid_pct=0.2, seed=10)
data

ItemLists;

Train: ImageList (3724 items)
Image (3, 480, 480),Image (3, 580, 540),Image (3, 1500, 1000),Image (3, 498, 375),Image (3, 560, 420)
Path: .;

Valid: ImageList (931 items)
Image (3, 563, 450),Image (3, 417, 300),Image (3, 400, 400),Image (3, 480, 320),Image (3, 465, 370)
Path: .;

Test: None

In [11]:
data = data.label_from_df(cols='label')
data

LabelLists;

Train: LabelList (3724 items)
x: ImageList
Image (3, 480, 480),Image (3, 580, 540),Image (3, 1500, 1000),Image (3, 498, 375),Image (3, 560, 420)
y: CategoryList
denim,denim,denim,denim,denim
Path: .;

Valid: LabelList (931 items)
x: ImageList
Image (3, 563, 450),Image (3, 417, 300),Image (3, 400, 400),Image (3, 480, 320),Image (3, 465, 370)
y: CategoryList
denim,faux_leather,denim,sequin,denim
Path: .;

Test: None

In [12]:
#data = data.transform(tfms=tfms, size=49, padding_mode=zeros)
#data

In [13]:
tfms = get_transforms(do_flip=True, 
                      flip_vert=True, 
                      max_rotate=4., 
                      max_zoom=1.1, 
                      max_lighting=0.2, 
                      max_warp=0., 
                      p_affine=0.75, 
                      p_lighting=0.75)

In [14]:
data = data.databunch(bs=128, num_workers=4)
data



Shapes of the inputs/targets:
[[torch.Size([3, 480, 360]), torch.Size([3, 589, 408]), torch.Size([3, 1020, 640]), torch.Size([3, 1040, 800]), torch.Size([3, 600, 450]), torch.Size([3, 551, 343]), torch.Size([3, 699, 1045]), torch.Size([3, 1020, 640]), torch.Size([3, 1746, 1600]), torch.Size([3, 800, 800]), torch.Size([3, 417, 300]), torch.Size([3, 500, 384]), torch.Size([3, 496, 331]), torch.Size([3, 700, 600]), torch.Size([3, 466, 466]), torch.Size([3, 711, 540]), torch.Size([3, 1596, 1200]), torch.Size([3, 530, 400]), torch.Size([3, 400, 300]), torch.Size([3, 336, 224]), torch.Size([3, 3297, 5125]), torch.Size([3, 1530, 1020]), torch.Size([3, 225, 164]), torch.Size([3, 427, 295]), torch.Size([3, 900, 600]), torch.Size([3, 632, 500]), torch.Size([3, 445, 257]), torch.Size([3, 225, 225]), torch.Size([3, 1370, 1050]), torch.Size([3, 1200, 800]), torch.Size([3, 466, 466]), torch.Size([3, 2400, 3200]), torch.Size([3, 435, 290]), torch.Size([3, 480, 360]), torch.Size([3, 250, 275]), torch.

ImageDataBunch;

Train: LabelList (3724 items)
x: ImageList
Image (3, 480, 480),Image (3, 580, 540),Image (3, 1500, 1000),Image (3, 498, 375),Image (3, 560, 420)
y: CategoryList
denim,denim,denim,denim,denim
Path: .;

Valid: LabelList (931 items)
x: ImageList
Image (3, 563, 450),Image (3, 417, 300),Image (3, 400, 400),Image (3, 480, 320),Image (3, 465, 370)
y: CategoryList
denim,faux_leather,denim,sequin,denim
Path: .;

Test: None

In [15]:
# normalization

data = data.normalize()

RuntimeError: Traceback (most recent call last):
  File "/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 138, in _worker_loop
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "/opt/conda/lib/python3.7/site-packages/fastai/torch_core.py", line 121, in data_collate
    return torch.utils.data.dataloader.default_collate(to_data(batch))
  File "/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 232, in default_collate
    return [default_collate(samples) for samples in transposed]
  File "/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 232, in <listcomp>
    return [default_collate(samples) for samples in transposed]
  File "/opt/conda/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 209, in default_collate
    return torch.stack(batch, 0, out=out)
RuntimeError: invalid argument 0: Sizes of tensors must match except in dimension 0. Got 563 and 417 in dimension 2 at /pytorch/aten/src/TH/generic/THTensorMoreMath.cpp:1307
