# Data retrieving

In [1]:
!pwd

/home/gian/Scrivania/github/cytokinin/tutorials


In [2]:
!pip uninstall cytokinin -y

Uninstalling cytokinin-0.0.1:
  Successfully uninstalled cytokinin-0.0.1


In [3]:
!pip install ./../cytokinin

Processing /home/gian/Scrivania/github/cytokinin/cytokinin
Building wheels for collected packages: cytokinin
  Building wheel for cytokinin (setup.py) ... [?25ldone
[?25h  Stored in directory: /tmp/pip-ephem-wheel-cache-xy46pce8/wheels/20/e3/ab/fd92d6e1db4dfa1a64713490d13b91970b9fc4d91d15b9af00
Successfully built cytokinin
Installing collected packages: cytokinin
Successfully installed cytokinin-0.0.1


In [4]:
import os
import json
import numpy as np
import pandas as pd
from pathlib import Path

In [5]:
import cytokinin as ck

In [6]:
root = Path('./../') # quite the same as str

In [7]:
# Set an example dir for images files
MOCKS = root.joinpath('./cytokinin/cytokinin/tests/mocks/')
IMGS = MOCKS/'imgs' # this is another Path object
os.listdir(str(IMGS))

['dog', 'stone']

## Load data

### Load from list

In [8]:
flist = []
for root, dirs, files in os.walk(IMGS, topdown=False):
    for f in files:
        flist.append(os.path.join(root, f))
flist[:5]

['../cytokinin/cytokinin/tests/mocks/imgs/dog/dog_35.jpeg',
 '../cytokinin/cytokinin/tests/mocks/imgs/dog/dog_19.jpeg',
 '../cytokinin/cytokinin/tests/mocks/imgs/dog/dog_31.jpeg',
 '../cytokinin/cytokinin/tests/mocks/imgs/dog/dog_8.jpg',
 '../cytokinin/cytokinin/tests/mocks/imgs/dog/dog_13.jpg']

In [9]:
from cytokinin.data import take_data
imgs = take_data('images')
imgs

INFO:root:CYTOKININ LOADED


<cytokinin.data.Data at 0x7fa74188a208>

In [10]:
imgs.store_filesnames_from_list(flist) # load
imgs.filesnames.head() #show

0    ../cytokinin/cytokinin/tests/mocks/imgs/stone/...
1    ../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
2    ../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
3    ../cytokinin/cytokinin/tests/mocks/imgs/stone/...
4    ../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
Name: data15783445960064852, dtype: object

### Load from DataFrame

In [11]:
df = pd.DataFrame({'files': flist})
df.head()

Unnamed: 0,files
0,../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
1,../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
2,../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
3,../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
4,../cytokinin/cytokinin/tests/mocks/imgs/dog/do...


In [12]:
imgs = take_data('images')
imgs.store_filesnames_from_df(df, 'files') #hold uniques
imgs.filesnames.head()

0    ../cytokinin/cytokinin/tests/mocks/imgs/stone/...
1    ../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
2    ../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
3    ../cytokinin/cytokinin/tests/mocks/imgs/stone/...
4    ../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
Name: data15783445963142507, dtype: object

### Load from folder

In [13]:
dogs_folder = IMGS.joinpath('dog')
dogs = take_data('images').store_filesnames_from_folder(dogs_folder)
dogs.filesnames.head()

0    ../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
1    ../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
2    ../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
3    ../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
4    ../cytokinin/cytokinin/tests/mocks/imgs/dog/do...
Name: data15783445963918128, dtype: object

### Load from folder interactively

Here you select a wrong folder

In [14]:
dogs = take_data('images')
try:
    dogs.store_filesnames_from_folder(gui=True, include_subdirs=False)
except Exception as e:
    print(e)
    print(f'filesnames:\n{dogs.filesnames}')

DEBUG:matplotlib.pyplot:Loaded backend module://ipykernel.pylab.backend_inline version unknown.
DEBUG:matplotlib.pyplot:Loaded backend module://ipykernel.pylab.backend_inline version unknown.


Here you select a good folder, containing images

In [15]:
dogs = take_data('images')
dogs.store_filesnames_from_folder(gui=True, include_subdirs=True)
dogs.filesnames.head()

0    /home/gian/Scrivania/github/cytokinin/tutorial...
1    /home/gian/Scrivania/github/cytokinin/tutorial...
2    /home/gian/Scrivania/github/cytokinin/tutorial...
Name: data15783446005680225, dtype: object

## Adding data

### add from Data

In [None]:
dogs_folder = IMGS.joinpath('dog')
dogs = take_data('images').store_filesnames_from_folder(dogs_folder)
stones_folder = IMGS.joinpath('stone')
stones = take_data('images').store_filesnames_from_folder(stones_folder)

In [None]:
dogs_and_stones = dogs.copy()
str(dogs_and_stones)

In [None]:
dogs_and_stones.add_from_data(stones)
str(dogs_and_stones)

## Label

### from filesnames folder

In [None]:
dogs_and_stones.label_from_folder()
print(dogs_and_stones.labels.unique())
dogs_and_stones.labels.value_counts()

### from csv file

In [None]:
# Load dogs images
dogs = take_data('images').store_filesnames_from_folder(IMGS/'dog')
# Load stones images
stones = take_data('images').store_filesnames_from_folder(IMGS/'stone')
# Merge Data set
dogs_and_stones2 = dogs.copy().add_from_data(stones)
print(dogs_and_stones2)

# Label the resulting Data set
csv_url = MOCKS/'labels'/'dogsandstones_labes.csv'
dogs_and_stones2.label_from_csv(csv_url, col='Y')
print(dogs_and_stones2)

### from CSV file interactively

In [None]:
# dogs_and_stones2 = dogs.copy()
# dogs_and_stones2.add_from_data(stones)
# print(dogs_and_stones)
# dogs_and_stones.label_from_csv(csv_url, gui=True)
# print(dogs_and_stones)

________________

To finish

In [16]:
# Load dogs images
dogs = take_data('images').store_filesnames_from_folder(IMGS/'dog')
# Load stones images
stones = take_data('images').store_filesnames_from_folder(IMGS/'stone')
# Merge Data set
dogs_and_stones2 = dogs.copy().add_from_data(stones)
print(dogs_and_stones2)

# Label the resulting Data set
csv_url = MOCKS/'labels'/'dogsandstones_labes.csv'
dogs_and_stones2.label_from_csv(csv_url, col='Y')
print(dogs_and_stones2)

Object <class 'cytokinin.data.Data'>, of data type "images"
,             75 file paths stored
             0 labels stored.
Object <class 'cytokinin.data.Data'>, of data type "images"
,             75 file paths stored
             75 labels stored.


In [13]:
from pathlib import Path

import tkinter as tk
from cytokinin.utils.funx import infer_file_cols_dtypes



In [15]:
root = Path('./../') # quite the same as str
MOCKS = root.joinpath('./cytokinin/cytokinin/tests/mocks/')
csv_url = MOCKS/'labels'/'dogsandstones_labes.csv'

chosen_col = select_df_col(csv_url, ftype='csv')
chosen_col

'Unnamed: 0'

_____________

## Print

In [None]:
print(dogs)

## TO

### to Paths list

In [None]:
top_3_as_pathlist = dogs.to('pathlist')[:3]
print(f'shape: {np.shape(top_3_as_pathlist)}')
top_3_as_pathlist

### to list

In [None]:
top_3_as_list = dogs.to('list')[:3]
print(f'shape: {np.shape(top_3_as_list)}')
top_3_as_list

### to Array

In [None]:
top_3_as_array = dogs.to('arrays')[:3] # array_mode=['rgb', 'gray', 'grey']
print(f'shape: {np.shape(top_3_as_array)}')
top_3_as_array

### to Pandas.DataFrame

In [None]:
top_3_as_df = dogs.to('dataframe')[:3]
print(f'shape: {np.shape(top_3_as_df)}')
top_3_as_df

### to Pandas.Series

In [None]:
top_3_as_series = dogs.to('series')[:3]
print(f'shape: {np.shape(top_3_as_series)}')
top_3_as_series

### Export to fastAI