# Training with the Fastai's CNN

In [79]:
!echo Using `python --version`
!echo
!echo Current directory `pwd`
!echo
!echo Available image data folders:
!du -sh data/images/*

Using Python 3.7.1

Current directory /home/jovyan/work

Available image data folders:
44M	data/images/chiffon
67M	data/images/denim
84M	data/images/faux_fur
60M	data/images/faux_leather
73M	data/images/lace
56M	data/images/linen
66M	data/images/satin
124M	data/images/sequin
54M	data/images/velvet


In [81]:
from pathlib import Path
from random import shuffle
import pandas as pd

## 1. Make a dataframe

In [95]:
# A regular list of some images

fpaths = list(Path('./data/images/').rglob('*.jpg'))
fstems = [fp.stem for fp in fpaths]

In [97]:
# Series of the same images

df = pd.concat([pd.Series(fpaths, name='fpaths'), 
                pd.Series(fstems, name='fstems')], 
               axis=1)
df.head()

Unnamed: 0,fpaths,fstems
0,data/images/denim/00000343.jpg,343
1,data/images/denim/00000076.jpg,76
2,data/images/denim/00000170.jpg,170
3,data/images/denim/00000103.jpg,103
4,data/images/denim/00000069.jpg,69


In [98]:
# Convert 'fpaths' column from the object-type the string 

df['fpaths'] = df['fpaths'].apply(lambda x: str(x))
df.head()

Unnamed: 0,fpaths,fstems
0,data/images/denim/00000343.jpg,343
1,data/images/denim/00000076.jpg,76
2,data/images/denim/00000170.jpg,170
3,data/images/denim/00000103.jpg,103
4,data/images/denim/00000069.jpg,69


In [85]:
# Make *label* column

df['label'] = df['fpaths'].str.split('/', expand=True)[2]
df.head()

Unnamed: 0,fstems,fpaths,label
0,350,data/images/sequin/00000350.jpg,sequin
1,472,data/images/lace/00000472.jpg,lace
2,203,data/images/velvet/00000203.jpg,velvet
3,325,data/images/satin/00000325.jpg,satin
4,380,data/images/denim/00000380.jpg,denim


In [99]:
# Export the dataframe to a csv file

df.to_csv('/tmp/mydataframe.csv')
!head /tmp/mydataframe.csv

,fpaths,fstems
0,data/images/denim/00000343.jpg,00000343
1,data/images/denim/00000076.jpg,00000076
2,data/images/denim/00000170.jpg,00000170
3,data/images/denim/00000103.jpg,00000103
4,data/images/denim/00000069.jpg,00000069
5,data/images/denim/00000182.jpg,00000182
6,data/images/denim/00000019.jpg,00000019
7,data/images/denim/00000434.jpg,00000434
8,data/images/denim/00000092.jpg,00000092


In [100]:
# Import the csv to the dataframe

df = pd.read_csv('/tmp/mydataframe.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,fpaths,fstems
0,0,data/images/denim/00000343.jpg,343
1,1,data/images/denim/00000076.jpg,76
2,2,data/images/denim/00000170.jpg,170
3,3,data/images/denim/00000103.jpg,103
4,4,data/images/denim/00000069.jpg,69


## 2. Make a databunch (a set of datasets and dataloaders)