In [1]:
# CSS overrides
from IPython.core.display import HTML
css = open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))

In [8]:
import os

os.chdir('/tmp')

!mkdir -p foo/bar/baz/biff
!touch foo/boff.log foo/bar/snurgle.txt
!mkdir -p zzz/zzz
!touch zzz/zzz/1_super.txt
!touch zzz/zzz/sapling.txt

# PyAtl

## April 14, 2016
## General Assembly

# PyAtl
## April 14, 2016

* Welcome
* Hello from GA!
* Announcements
* Talks

# Hello from GA!

# Announcements

<img src="PyCon16-logo.svg" width="600" class="center-block">

## Talks

*Implementing the* `tree` *command in Python* (beginner)<br/>
— Daniel Rocco

*Faster Debugging with PDB: No More Printing Out Objects*<br/>
—Benjamin Crom

&lt;br/&gt;

*Python on AWS Lambda: Practical Applications*<br/>
—Brian Morton

# sapling

### Implementing `tree` in Python

<script>$("#slide-4-0").data("backgroundImage", "Sapling_on_a_stub.jpg")</script>

In [10]:
!tree

[30;42m.[00m
├── config-err-nMfopY
├── [01;34mfoo[00m
│   ├── [01;34mbar[00m
│   │   ├── [01;34mbaz[00m
│   │   │   └── [01;34mbiff[00m
│   │   └── snurgle.txt
│   └── boff.log
├── [01;34mgpg-Rm8aoI[00m
│   └── [01;35mS.gpg-agent[00m
├── [01;34mhsperfdata_drocco[00m
│   └── 4048
├── +~JF2068339028303085085.tmp
├── +~JF3389823636842465214.tmp
├── +~JF3945977373033545895.tmp
├── +~JF4064088703298571803.tmp
├── +~JF6869405115425776864.tmp
├── [01;34mssh-cZcqyXaOUDP2[00m
│   └── [01;35magent.2167[00m
├── [01;34msystemd-private-2d95c2758e1c46ad97b31824c27f5a0e-colord.service-p0E8sR[00m [error opening dir]
├── [01;34msystemd-private-2d95c2758e1c46ad97b31824c27f5a0e-rtkit-daemon.service-hyPw0K[00m [error opening dir]
├── [01;34mtmux-1000[00m
│   └── [01;35mdefault[00m
└── [01;34mzzz[00m
    └── [01;34mzzz[00m
        ├── 1_super.txt
        └── sapling.txt

12 directories, 14 files


Q:

*How do we approach this problem?*

# Key: Work Incrementally

“code a little, test a little”

*break the work into* **small, easy chunks**


# Key: Work Incrementally

“code a little, test a little”

*even if you're not using automated testing,*

**check your work often!**

# Key: Work Incrementally

*IPython* or *Jupyter Notebook* provide 

**automatic history** 

to review progress & backtrack if necessary

# Key: Library Leverage

There are *multiple high-quality tools* in the

**standard library**

to help complete this task

In [4]:
def tree():
    print('.')

In [5]:
tree()

.


Toolbox: `os.listdir`

In [11]:
os.listdir()

['+~JF2068339028303085085.tmp',
 'ssh-cZcqyXaOUDP2',
 '+~JF6869405115425776864.tmp',
 'tmux-1000',
 '.ICE-unix',
 '.X11-unix',
 'hsperfdata_drocco',
 'config-err-nMfopY',
 '.esd-112',
 'gpg-Rm8aoI',
 '+~JF4064088703298571803.tmp',
 '+~JF3945977373033545895.tmp',
 '.font-unix',
 '+~JF3389823636842465214.tmp',
 'systemd-private-2d95c2758e1c46ad97b31824c27f5a0e-colord.service-p0E8sR',
 '.XIM-unix',
 'systemd-private-2d95c2758e1c46ad97b31824c27f5a0e-rtkit-daemon.service-hyPw0K',
 '.X0-lock',
 'foo',
 '.org.chromium.Chromium.rI50TZ',
 'zzz',
 '.Test-unix',
 '.esd-1000']

`os.listdir` will do the job, but we'd need to filter dotfiles ourselves…

It turns out…

there's already another tool that does this!

In [12]:
import glob
glob.glob('*')

['+~JF2068339028303085085.tmp',
 'ssh-cZcqyXaOUDP2',
 '+~JF6869405115425776864.tmp',
 'tmux-1000',
 'hsperfdata_drocco',
 'config-err-nMfopY',
 'gpg-Rm8aoI',
 '+~JF4064088703298571803.tmp',
 '+~JF3945977373033545895.tmp',
 '+~JF3389823636842465214.tmp',
 'systemd-private-2d95c2758e1c46ad97b31824c27f5a0e-colord.service-p0E8sR',
 'systemd-private-2d95c2758e1c46ad97b31824c27f5a0e-rtkit-daemon.service-hyPw0K',
 'foo',
 'zzz']

In [13]:
def tree():
    print('.')
    for name in glob.glob('*'):
        print('├──', name)

In [14]:
tree()

.
├── +~JF2068339028303085085.tmp
├── ssh-cZcqyXaOUDP2
├── +~JF6869405115425776864.tmp
├── tmux-1000
├── hsperfdata_drocco
├── config-err-nMfopY
├── gpg-Rm8aoI
├── +~JF4064088703298571803.tmp
├── +~JF3945977373033545895.tmp
├── +~JF3389823636842465214.tmp
├── systemd-private-2d95c2758e1c46ad97b31824c27f5a0e-colord.service-p0E8sR
├── systemd-private-2d95c2758e1c46ad97b31824c27f5a0e-rtkit-daemon.service-hyPw0K
├── foo
├── zzz


Let's sort that…

In [15]:
def tree():
    print('.')
    for name in sorted(glob.glob('*')):
        print('├──', name)

In [16]:
tree()

.
├── +~JF2068339028303085085.tmp
├── +~JF3389823636842465214.tmp
├── +~JF3945977373033545895.tmp
├── +~JF4064088703298571803.tmp
├── +~JF6869405115425776864.tmp
├── config-err-nMfopY
├── foo
├── gpg-Rm8aoI
├── hsperfdata_drocco
├── ssh-cZcqyXaOUDP2
├── systemd-private-2d95c2758e1c46ad97b31824c27f5a0e-colord.service-p0E8sR
├── systemd-private-2d95c2758e1c46ad97b31824c27f5a0e-rtkit-daemon.service-hyPw0K
├── tmux-1000
├── zzz


Hmm. Our sort

    ├── +~JF2068339028303085085.tmp
    ├── +~JF3389823636842465214.tmp
    ├── +~JF3945977373033545895.tmp
    ├── +~JF4064088703298571803.tmp
    ├── +~JF6869405115425776864.tmp
    ├── config-err-nMfopY
    
doesn't match that of `tree` actual

In [17]:
!tree -L 1 | sed '1d;6q'

├── config-err-nMfopY
├── foo
├── gpg-Rm8aoI
├── hsperfdata_drocco
├── +~JF2068339028303085085.tmp


*Well why not?!*

man tree

*“Sort the output by name (as per ls)”*

$#@%&*!

man ls

*scroll scroll scroll*

$#@%&*!

google → SO

*“ls from coreutils performs a **locale-aware sort** by default…”*

Ohh…

google “python locale aware sort”

# Python’s `locale` module

> The locale module opens access to the POSIX locale database and functionality. 

# Python’s `locale` module

> The POSIX locale mechanism allows programmers to deal with certain cultural issues in an application, without requiring the programmer to know all the specifics of each country where the software is executed.

# Python’s `locale` module

> Applications typically start with a call of

    import locale
    locale.setlocale(locale.LC_ALL, '')

> This sets the locale for all categories to the user’s default setting (typically specified in the `LANG` environment variable).

# Python’s `locale` module

> According to POSIX, a program which has not called `setlocale(LC_ALL, '')` runs using the portable `'C'` locale. Calling `setlocale(LC_ALL, '')` lets it use the default locale as defined by the LANG variable.

Pause for a moment to appreciate

Python

and the high standard of excellence set by

its *documentation*

In [18]:
from locale import strxfrm

In [19]:
import locale

locale.getlocale()

('en_US', 'UTF-8')

In [22]:
locale.setlocale(locale.LC_ALL, '')

def tree():
    print('.')
    for name in sorted(glob.glob('*'), key=locale.strxfrm):
        print('├──', name)

In [23]:
tree()

.
├── config-err-nMfopY
├── foo
├── gpg-Rm8aoI
├── hsperfdata_drocco
├── +~JF2068339028303085085.tmp
├── +~JF3389823636842465214.tmp
├── +~JF3945977373033545895.tmp
├── +~JF4064088703298571803.tmp
├── +~JF6869405115425776864.tmp
├── ssh-cZcqyXaOUDP2
├── systemd-private-2d95c2758e1c46ad97b31824c27f5a0e-colord.service-p0E8sR
├── systemd-private-2d95c2758e1c46ad97b31824c27f5a0e-rtkit-daemon.service-hyPw0K
├── tmux-1000
├── zzz


Hooray!

5 Lines of Python…

5 Lines of Python → first order approximation of `tree`

In [24]:
!tree -L 1

[30;42m.[00m
├── config-err-nMfopY
├── [01;34mfoo[00m
├── [01;34mgpg-Rm8aoI[00m
├── [01;34mhsperfdata_drocco[00m
├── +~JF2068339028303085085.tmp
├── +~JF3389823636842465214.tmp
├── +~JF3945977373033545895.tmp
├── +~JF4064088703298571803.tmp
├── +~JF6869405115425776864.tmp
├── [01;34mssh-cZcqyXaOUDP2[00m
├── [01;34msystemd-private-2d95c2758e1c46ad97b31824c27f5a0e-colord.service-p0E8sR[00m
├── [01;34msystemd-private-2d95c2758e1c46ad97b31824c27f5a0e-rtkit-daemon.service-hyPw0K[00m
├── [01;34mtmux-1000[00m
└── [01;34mzzz[00m

8 directories, 6 files


Hang on, but I don't *like* that sort!

Observe: 

building `tree` *educated* you

you understand more of how `tree` works,

which means

you can change it

Q:

*Why does* `tree` *sort this way?*

A:

`tree` *performs a locale-aware sort!*

∴ to change the sort, *change the locale!*

In [25]:
!LC_COLLATE=C tree

[30;42m.[00m
├── +~JF2068339028303085085.tmp
├── +~JF3389823636842465214.tmp
├── +~JF3945977373033545895.tmp
├── +~JF4064088703298571803.tmp
├── +~JF6869405115425776864.tmp
├── config-err-nMfopY
├── [01;34mfoo[00m
│   ├── [01;34mbar[00m
│   │   ├── [01;34mbaz[00m
│   │   │   └── [01;34mbiff[00m
│   │   └── snurgle.txt
│   └── boff.log
├── [01;34mgpg-Rm8aoI[00m
│   └── [01;35mS.gpg-agent[00m
├── [01;34mhsperfdata_drocco[00m
│   └── 4048
├── [01;34mssh-cZcqyXaOUDP2[00m
│   └── [01;35magent.2167[00m
├── [01;34msystemd-private-2d95c2758e1c46ad97b31824c27f5a0e-colord.service-p0E8sR[00m [error opening dir]
├── [01;34msystemd-private-2d95c2758e1c46ad97b31824c27f5a0e-rtkit-daemon.service-hyPw0K[00m [error opening dir]
├── [01;34mtmux-1000[00m
│   └── [01;35mdefault[00m
└── [01;34mzzz[00m
    └── [01;34mzzz[00m
        ├── 1_super.txt
        └── sapling.txt

12 directories, 14 files


Our version: fixing the last entry…

    ├── …
    ├── tmux-1000
    ├── zzz

    ^^^
    

In [26]:
def tree():
    print('.')

    names = sorted(glob.glob('*'), key=locale.strxfrm)
    
    for name in names[:-1]:
        print('├──', name)
    else:
        print('└──', names[-1])
    

In [27]:
tree()

.
├── config-err-nMfopY
├── foo
├── gpg-Rm8aoI
├── hsperfdata_drocco
├── +~JF2068339028303085085.tmp
├── +~JF3389823636842465214.tmp
├── +~JF3945977373033545895.tmp
├── +~JF4064088703298571803.tmp
├── +~JF6869405115425776864.tmp
├── ssh-cZcqyXaOUDP2
├── systemd-private-2d95c2758e1c46ad97b31824c27f5a0e-colord.service-p0E8sR
├── systemd-private-2d95c2758e1c46ad97b31824c27f5a0e-rtkit-daemon.service-hyPw0K
├── tmux-1000
└── zzz


SRP:

Split *node formatting* out from **driver/manager**

In [28]:
def visit_node(name, last=False):
    marker = '└──' if last else '├──'
    print(marker, name, sep='')


def tree(root='.'):
    print(root)

    names = sorted(glob.glob('*'), key=locale.strxfrm)
    
    for name in names[:-1]:
        visit_node(name)
    else:
        visit_node(names[-1], last=True)

Clean Architecture, Data & Transforms:

*Use an annotation transform to get rid of the special case for the last node*

In [29]:
def sorted_names(root):
    pattern = os.path.join(root, '*')
    return sorted(glob.glob(pattern), key=locale.strxfrm)
    

def annotate(names):
    yield from ((name, False) for name in names[:-1])
    
    if names:
        yield names[-1], True
        

def visit_node(name, last=False):
    marker = '└── ' if last else '├── '
    print(marker, name, sep='')


def tree(root='.'):
    print(root)

    for name, last in annotate(sorted_names(root)):
        display_name = os.path.basename(name)
        visit_node(display_name, last=last)

Why?

Why?

Separation of responsibilities allows

**growth**

The branch conceals

**duplication**

which inhibits future growth

by requiring us to make every change

**twice**

Handling subdirectories:

*lather, rinse, repeat*

In [30]:
# sorted_names, annotate, visit_node as before

def subtree(root):
    for name, last in annotate(sorted_names(root)):
        display_name = os.path.basename(name)
        visit_node(display_name, last=last)

        if os.path.isdir(name):
            subtree(name)
        
def tree(root='.'):
    print(root)
    subtree(root)

In [31]:
tree()

.
├── config-err-nMfopY
├── foo
├── bar
├── baz
└── biff
└── snurgle.txt
└── boff.log
├── gpg-Rm8aoI
└── S.gpg-agent
├── hsperfdata_drocco
└── 4048
├── +~JF2068339028303085085.tmp
├── +~JF3389823636842465214.tmp
├── +~JF3945977373033545895.tmp
├── +~JF4064088703298571803.tmp
├── +~JF6869405115425776864.tmp
├── ssh-cZcqyXaOUDP2
└── agent.2167
├── systemd-private-2d95c2758e1c46ad97b31824c27f5a0e-colord.service-p0E8sR
├── systemd-private-2d95c2758e1c46ad97b31824c27f5a0e-rtkit-daemon.service-hyPw0K
├── tmux-1000
└── default
└── zzz
└── zzz
├── 1_super.txt
└── sapling.txt


Depth tracking

In [32]:
# sorted_names, annotate, tree as before

def visit_node(name, last=False, depth=0):
    prefix = '│   ' * depth
    marker = '└── ' if last else '├── '
    print(prefix, marker, name, sep='')
        

def subtree(root, depth=0):
    for name, last in annotate(sorted_names(root)):
        display_name = os.path.basename(name)
        visit_node(display_name, last=last, depth=depth)

        if os.path.isdir(name):
            subtree(name, depth=depth + 1)

In [33]:
tree()

.
├── config-err-nMfopY
├── foo
│   ├── bar
│   │   ├── baz
│   │   │   └── biff
│   │   └── snurgle.txt
│   └── boff.log
├── gpg-Rm8aoI
│   └── S.gpg-agent
├── hsperfdata_drocco
│   └── 4048
├── +~JF2068339028303085085.tmp
├── +~JF3389823636842465214.tmp
├── +~JF3945977373033545895.tmp
├── +~JF4064088703298571803.tmp
├── +~JF6869405115425776864.tmp
├── ssh-cZcqyXaOUDP2
│   └── agent.2167
├── systemd-private-2d95c2758e1c46ad97b31824c27f5a0e-colord.service-p0E8sR
├── systemd-private-2d95c2758e1c46ad97b31824c27f5a0e-rtkit-daemon.service-hyPw0K
├── tmux-1000
│   └── default
└── zzz
│   └── zzz
│   │   ├── 1_super.txt
│   │   └── sapling.txt


Cleaning up the `last_path`

In [34]:
# sorted_names, annotate, tree as before

def visit_node(name, last=False, depth=0, last_path=False):
    prefix = '    ' if last_path else '│   ' 
    prefix *= depth
    marker = '└── ' if last else '├── '
    print(prefix, marker, name, sep='')
        

def subtree(root, depth=0, last_path=False):
    for name, last in annotate(sorted_names(root)):
        if last and not depth:
            last_path = True
            
        display_name = os.path.basename(name)
        visit_node(display_name, last=last, depth=depth,
                   last_path=last_path)

        if os.path.isdir(name):
            subtree(name, depth=depth + 1, last_path=last_path)

In [35]:
tree()

.
├── config-err-nMfopY
├── foo
│   ├── bar
│   │   ├── baz
│   │   │   └── biff
│   │   └── snurgle.txt
│   └── boff.log
├── gpg-Rm8aoI
│   └── S.gpg-agent
├── hsperfdata_drocco
│   └── 4048
├── +~JF2068339028303085085.tmp
├── +~JF3389823636842465214.tmp
├── +~JF3945977373033545895.tmp
├── +~JF4064088703298571803.tmp
├── +~JF6869405115425776864.tmp
├── ssh-cZcqyXaOUDP2
│   └── agent.2167
├── systemd-private-2d95c2758e1c46ad97b31824c27f5a0e-colord.service-p0E8sR
├── systemd-private-2d95c2758e1c46ad97b31824c27f5a0e-rtkit-daemon.service-hyPw0K
├── tmux-1000
│   └── default
└── zzz
    └── zzz
        ├── 1_super.txt
        └── sapling.txt


How do we implement solutions?

understand the problem

work in small, incremental steps

test frequently

know your tools

practice, practice, practice!

and

never stop learning

Thank you!

♥,

@drocco007