# Data retrieving

In [1]:
!pwd

/home/gian/Scrivania/github/cytokinin/tutorials


In [2]:
!pip uninstall cytokinin -y

Uninstalling cytokinin-0.0.1:
  Successfully uninstalled cytokinin-0.0.1


In [3]:
!pip install ./../cytokinin

Processing /home/gian/Scrivania/github/cytokinin/cytokinin
Building wheels for collected packages: cytokinin
  Building wheel for cytokinin (setup.py) ... [?25ldone
[?25h  Created wheel for cytokinin: filename=cytokinin-0.0.1-cp37-none-any.whl size=12844 sha256=5bf6d74d5d66ba8da631466390abcd3f6e6749b492eefc6e959dff5d551705de
  Stored in directory: /tmp/pip-ephem-wheel-cache-7oaoe1fm/wheels/20/e3/ab/fd92d6e1db4dfa1a64713490d13b91970b9fc4d91d15b9af00
Successfully built cytokinin
Installing collected packages: cytokinin
Successfully installed cytokinin-0.0.1


In [4]:
import os
import json
import numpy as np
import pandas as pd
from pathlib import Path

In [5]:
import cytokinin as ck

In [6]:
root = Path('./../') # quite the same as str

In [7]:
# Set an example dir for images files
MOCKS = root.joinpath('./cytokinin/cytokinin/tests/mocks/')
IMGS = MOCKS/'imgs' # this is another Path object
os.listdir(str(IMGS))

['dog', 'stone']

## Load data

### Load from list

In [8]:
flist = []
for root, dirs, files in os.walk(IMGS, topdown=False):
    for f in files:
        flist.append(os.path.join(root, f))
flist[:5]

['../cytokinin/cytokinin/tests/mocks/imgs/dog/dog_35.jpeg',
 '../cytokinin/cytokinin/tests/mocks/imgs/dog/dog_19.jpeg',
 '../cytokinin/cytokinin/tests/mocks/imgs/dog/dog_31.jpeg',
 '../cytokinin/cytokinin/tests/mocks/imgs/dog/dog_8.jpg',
 '../cytokinin/cytokinin/tests/mocks/imgs/dog/dog_13.jpg']

In [9]:
from cytokinin.data import take_data
imgs = take_data('images')
imgs

INFO:root:CYTOKININ LOADED


<cytokinin.data.Data at 0x7f0e1c5a8990>

In [10]:
imgs.store_filesnames_from_list(flist) # load
imgs.filesnames.head() #show

0    ../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
1    ../cytokinin/cytokinin/tests/mocks/imgs/stone/...
2    ../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
3    ../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
4    ../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
Name: data15785056102272701, dtype: object

### Load from DataFrame

In [11]:
df = pd.DataFrame({'files': flist})
df.head()

Unnamed: 0,files
0,../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
1,../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
2,../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
3,../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
4,../cytokinin/cytokinin/tests/mocks/imgs/dog/do...


In [12]:
imgs = take_data('images')
imgs.store_filesnames_from_df(df, 'files') #hold uniques
imgs.filesnames.head()

0    ../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
1    ../cytokinin/cytokinin/tests/mocks/imgs/stone/...
2    ../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
3    ../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
4    ../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
Name: data1578505610607414, dtype: object

### Load from folder

In [16]:
dogs_folder = IMGS.joinpath('dog')
dogs = take_data('images').store_filesnames_from_folder(dogs_folder)
dogs.filesnames.head()

0    ../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
1    ../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
2    ../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
3    ../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
4    ../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
Name: data1578505628398423, dtype: object

### Load from folder interactively

Here you select a wrong folder

In [17]:
dogs = take_data('images')
try:
    dogs.store_filesnames_from_folder(gui=True, include_subdirs=False)
except Exception as e:
    print(e)
    print(f'filesnames:\n{dogs.filesnames}')

Here you select a good folder, containing images

In [18]:
dogs = take_data('images')
dogs.store_filesnames_from_folder(gui=True, include_subdirs=True)
dogs.filesnames.head()

0    /home/gian/Scrivania/github/cytokinin/cytokini...
1    /home/gian/Scrivania/github/cytokinin/cytokini...
2    /home/gian/Scrivania/github/cytokinin/cytokini...
3    /home/gian/Scrivania/github/cytokinin/cytokini...
4    /home/gian/Scrivania/github/cytokinin/cytokini...
Name: data15785056435063086, dtype: object

## Adding data

### add from Data

In [19]:
dogs_folder = IMGS.joinpath('dog')
dogs = take_data('images').store_filesnames_from_folder(dogs_folder)
stones_folder = IMGS.joinpath('stone')
stones = take_data('images').store_filesnames_from_folder(stones_folder)

In [21]:
dogs_and_stones = dogs.copy()
print(dogs_and_stones)

Object <class 'cytokinin.data.Data'>, of data type "images"
,             35 file paths stored
             0 labels stored.


In [22]:
dogs_and_stones.add_from_data(stones)
print(dogs_and_stones)

Object <class 'cytokinin.data.Data'>, of data type "images"
,             75 file paths stored
             0 labels stored.


## Label

### from filesnames folder

In [23]:
dogs_and_stones.label_from_folder()
# Let's see what it loaded
print(dogs_and_stones.labels.unique())
dogs_and_stones.labels.value_counts()

['dog' 'stone']


stone    40
dog      35
Name: data1578505667260263, dtype: int64

### from csv file

In [27]:
# Load dogs images
dogs = take_data('images').store_filesnames_from_folder(IMGS/'dog')
# Load stones images
stones = take_data('images').store_filesnames_from_folder(IMGS/'stone')
# Merge Data set
dogs_and_stones2 = dogs.copy().add_from_data(stones)
print(f'Before:\n{dogs_and_stones2}')

# Label the resulting Data set
csv_url = MOCKS/'labels'/'dogsandstones_labes.csv'
dogs_and_stones2.label_from_csv(csv_url, col='Y')
print(f'After:\n{dogs_and_stones2}')

DEBUG:root:COL: Y


Before:
Object <class 'cytokinin.data.Data'>, of data type "images"
,             75 file paths stored
             0 labels stored.
After:
Object <class 'cytokinin.data.Data'>, of data type "images"
,             75 file paths stored
             75 labels stored.


### from CSV file interactively

In [34]:
## Experimental! Available soon...
# dogs_and_stones2 = dogs.copy()
# dogs_and_stones2.add_from_data(stones)
# print(dogs_and_stones)
# dogs_and_stones.label_from_csv(csv_url, gui=True)
# print(dogs_and_stones)

## Print

In [28]:
print(dogs)

Object <class 'cytokinin.data.Data'>, of data type "images"
,             35 file paths stored
             0 labels stored.


## TO

### to Paths list

In [29]:
top_3_as_pathlist = dogs.to('pathlist')[:3]
print(f'shape: {np.shape(top_3_as_pathlist)}')
top_3_as_pathlist

shape: (3,)


[PosixPath('../cytokinin/cytokinin/tests/mocks/imgs/dog/dog_18.jpg'),
 PosixPath('../cytokinin/cytokinin/tests/mocks/imgs/dog/dog_22.jpg'),
 PosixPath('../cytokinin/cytokinin/tests/mocks/imgs/dog/dog_3.jpeg')]

### to list

In [30]:
top_3_as_list = dogs.to('list')[:3]
print(f'shape: {np.shape(top_3_as_list)}')
top_3_as_list

shape: (3,)


['../cytokinin/cytokinin/tests/mocks/imgs/dog/dog_18.jpg',
 '../cytokinin/cytokinin/tests/mocks/imgs/dog/dog_22.jpg',
 '../cytokinin/cytokinin/tests/mocks/imgs/dog/dog_3.jpeg']

### to Array

In [31]:
top_3_as_array = dogs.to('arrays')[:3] # array_mode=['rgb', 'gray', 'grey']
print(f'shape: {np.shape(top_3_as_array)}')
top_3_as_array

shape: (3,)


[array([[[ 33,  30,  26],
         [ 33,  30,  26],
         [ 35,  32,  28],
         ...,
         [147, 145, 135],
         [147, 145, 135],
         [147, 145, 135]],
 
        [[ 33,  30,  26],
         [ 33,  30,  26],
         [ 35,  32,  28],
         ...,
         [147, 145, 135],
         [147, 145, 135],
         [147, 145, 135]],
 
        [[ 33,  30,  26],
         [ 33,  30,  26],
         [ 35,  32,  28],
         ...,
         [147, 145, 135],
         [147, 145, 135],
         [147, 145, 135]],
 
        ...,
 
        [[ 67, 133, 238],
         [ 67, 133, 238],
         [ 68, 134, 239],
         ...,
         [ 98, 149, 235],
         [ 98, 149, 235],
         [ 98, 149, 235]],
 
        [[ 67, 133, 238],
         [ 67, 133, 238],
         [ 68, 134, 239],
         ...,
         [ 98, 149, 235],
         [ 98, 149, 235],
         [ 98, 149, 235]],
 
        [[ 67, 133, 238],
         [ 67, 133, 238],
         [ 68, 134, 239],
         ...,
         [ 98, 149, 235],
  

### to Pandas.DataFrame

In [32]:
top_3_as_df = dogs.to('dataframe')[:3]
print(f'shape: {np.shape(top_3_as_df)}')
top_3_as_df

shape: (3, 1)


Unnamed: 0,data1578505795709833
0,../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
1,../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
2,../cytokinin/cytokinin/tests/mocks/imgs/dog/do...


### to Pandas.Series

In [33]:
top_3_as_series = dogs.to('series')[:3]
print(f'shape: {np.shape(top_3_as_series)}')
top_3_as_series

shape: (3,)


0    ../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
1    ../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
2    ../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
Name: data1578505795709833, dtype: object