# Gathering system data - Python for System Administrators 

## Goals:
    - Gathering System Data with multiplatform and platform-dependent tools
    - Get infos from files, /proc, /sys
    - Capture command output
    - Use psutil to get IO, CPU and memory data
    - Parse files with a strategy
    
## Non-goals for this lesson:
    - use with, yield or pipes

## Modules

In [1]:
import psutil
import glob
import sys
import subprocess


In [57]:
#
# Our code is p3-ready
#
from __future__ import print_function, unicode_literals

In [3]:
def grep(needle, fpath):
    """A simple grep implementation

       goal: open() is iterable and doesn't
             need splitlines()
       goal: comprehension can filter lists
    """
    return [x for x in open(fpath) if needle in x]

# Do we have localhost?
grep("localhost", "/etc/hosts")

['127.0.0.1\tlocalhost\n', '::1\tlocalhost ip6-localhost ip6-loopback\n']

In [5]:
#The psutil module is very nice
import psutil

#Works on Windows, Linux and MacOS
psutil.cpu_percent() 

17.7

In [9]:
#And its output is very easy to manage
ret = psutil.disk_io_counters()
print(ret)

sdiskio(read_count=330969, write_count=222903, read_bytes=5484097536, write_bytes=2939428352, read_time=1637475, write_time=5610046, read_merged_count=9083, write_merged_count=20134, busy_time=712743)


In [4]:
# Exercise: Which other informations 
# does psutil provide? 
# Use this cell and the tab-completion jupyter functionalities.

In [10]:
# Exercise
def multiplatform_vmstat(count):
    # Write a vmstat-like function printing every second:
    # - cpu usage%
    # - bytes read and written in the given interval
    # Hint: use psutil and time.sleep(1)
    # Hint: use this cell or try on ipython and *then* write the function
    #       using %edit vmstat.py
    for i in range(count):
        raise NotImplementedError
        print(cpu_usage, bytes_rw)

multiplatform_vmstat(5)

NotImplementedError: 

In [15]:
%load course/multiplatform_vmstat.py

In [16]:
# Run your vmstat implementation.

multiplatform_vmstat(5)

cpu%	iops(r+w)
12.0	557884
13.8	164
9.0	54
8.3	6
7.8	4
10.8	0


In [12]:
#
# subprocess
#
# The check_output function returns the command stdout
from subprocess import check_output

# It takes a *list* as an argument!
out = check_output("ping -w1  -c1 www.google.com".split())

# and returns a string
print(out)

PING www.google.com (172.217.19.68): 56 data bytes
64 bytes from 172.217.19.68: icmp_seq=0 ttl=52 time=16.963 ms
--- www.google.com ping statistics ---
1 packets transmitted, 1 packets received, 0% packet loss
round-trip min/avg/max/stddev = 16.963/16.963/16.963/0.000 ms



In [None]:
# If you want to stream command output, use subprocess.Popen
#  and check carefully subprocess documentation!

In [13]:
def sh(cmd, shell=False, timeout=0):
    """"Returns an iterable output of a command string
        checking...
    """
    from sys import version_info as python_version
    if python_version < (3, 3): # ..before using..
        if timeout:
            raise ValueError("Timeout not supported until Python 3.3")
        output = check_output(cmd.split(), shell=shell)
    else:
        output = check_output(cmd.split(), shell=shell, timeout=timeout)
    return output.splitlines()

In [14]:
# Exercise:
# implement a multiplatform pgrep-like function.
def ppgrep(program):
    """
    A multiplatform pgrep-like function.
    Prints a list of processes executing 'program'
    @param program - eg firefox, explorer.exe
    
    Hint: use subprocess, os and list-comprehension
    eg. items = [x for x in a_list if 'firefox' in x] 
    """
    raise NotImplementedError

In [None]:
# Solution
%load course/pgrep.py


## Parsing /proc

Linux /proc filesystem is a cool place to get data

In the next example we'll see how to get:
 - thread informations;
 - disk statistics;
 
 

In [16]:
# Parsing /proc - 1
def linux_threads(pid):
    """Retrieving data from /proc
    """
    from glob import glob
    # glob emulates shell expansion of * and ?
    path = "/proc/{}/task/*/status".format(pid)
    
         
    # pick a set of fields to gather
    t_info = ('Pid', 'Tgid', 'voluntary')  # this is a tuple!
    for t in glob(path):
        # ... and use comprehension to get 
        # intersting data.
        t_info = [x 
                  for x in open(t) 
                  if x.startswith(t_info)] # startswith accepts tuples!
        print(t_info)

In [17]:
# If you're on linux try linux_threads
pid_of_init = 1 # or systemd ?
linux_threads(pid_of_init)

['Tgid:\t1\n', 'Pid:\t1\n', 'voluntary_ctxt_switches:\t446\n']


In [18]:
# On linux /proc/diskstats is the source of I/O infos
disk_l = grep("sda", "/proc/diskstats")
print(''.join(disk_l))

# To gather that data we put the header in a multiline string
from course import diskstats_headers as headers

#Take the 1st entry (sda), split the data...
disk_info = disk_l[0].split()
# ... and tie them with the header
ret = zip(headers, disk_info)

# On py3 we need to iterate over the generators
print(list(ret))

   8       0 sda 216382 11369 8765248 1773709 242383 61222 19926032 6871893 0 783857 8646230
   8       1 sda1 122 0 1856 2097 0 0 0 0 0 2095 2097
   8       2 sda2 54 0 1312 504 0 0 0 0 0 485 504
   8       3 sda3 116 0 1808 1539 0 0 0 0 0 1506 1539
   8       4 sda4 118 0 1824 498 0 0 0 0 0 471 498
   8       5 sda5 267 206 2161 2898 0 0 0 0 0 2441 2898
   8       6 sda6 120 0 1840 979 0 0 0 0 0 958 979
   8       7 sda7 228 1 4442 6227 13 1 112 636 0 5525 6862
   8       8 sda8 212805 9791 8716589 1744819 207751 35245 19690304 6654285 0 619100 8399593
   8       9 sda9 2418 1371 31440 13085 3475 25976 235616 19476 0 27651 32554

[(u'major', '8'), (u'minor', '0'), (u'device', 'sda'), (u'reads', '216382'), (u'reads_merged', '11369'), (u'reads_sectors', '8765248'), (u'reads_ms', '1773709'), (u'writes', '242383'), (u'writes_merged', '61222'), (u'writes_sectors', '19926032'), (u'writes_ms', '6871893'), (u'io_in_progress', '0'), (u'io_ms_spent', '783857'), (u'io_ms_weight', '8646230')]


In [19]:
# Try to mangle ret
print('\n'.join(str(x) for x in ret))

(u'major', '8')
(u'minor', '0')
(u'device', 'sda')
(u'reads', '216382')
(u'reads_merged', '11369')
(u'reads_sectors', '8765248')
(u'reads_ms', '1773709')
(u'writes', '242383')
(u'writes_merged', '61222')
(u'writes_sectors', '19926032')
(u'writes_ms', '6871893')
(u'io_in_progress', '0')
(u'io_ms_spent', '783857')
(u'io_ms_weight', '8646230')


In [20]:
# We can create a reusable commodity class with
from collections import namedtuple

# using the imported `headers` as attributes
# like the one provided by psutil
DiskStats = namedtuple('DiskStat', headers)

# ... and disk_info as values
dstat = DiskStats(*disk_info)
print(dstat.device, dstat.writes_ms)

# Homework: check further features with
# help(collections)

sda 6871893


In [21]:
# Exercise
# Write the following function 
def linux_diskstats(partition):
    """Print every second I/O information from /proc/diskstats
    
        @param: partition - eg sda1 or vdx1
        
        Hint: use the above `grep` function
        Hint: use zip, time.sleep, print() and *magic
    """
    diskstats_headers = ('reads reads_merged reads_sectors reads_ms'
            ' writes writes_merged writes_sectors writes_ms'
            ' io_in_progress io_ms_weight').split()
    
    while True:
        raise NotImplementedError
        print(values, sep="\t")

In [None]:
# Solution
%load course/linux_diskstats.py

In [24]:
# Using check_output with split() doesn't always work
from os import makedirs
makedirs('/tmp/course/b l a n k s', exist_ok=True)

check_output('ls "/tmp/course/b l a n k s"'.split())

TypeError: makedirs() got an unexpected keyword argument 'exist_ok'

In [30]:
# You can use
from shlex import split
# and
cmd = split('dir -a "/tmp/course/b l a n k s"')
check_output(cmd)

'.  ..\n'

In [1]:
# Pandas 101
import pandas as pd

ImportError: No module named pandas

## zip on py3 is a generator 


In [32]:
# zip_iterables():
"""The zip method joins list elements pairwise
    like a zip fastener
"""
from sys import version_info as python_version
a_list = [0, 1, 2, 3]
b_list = ["a", "b", "c", "d"]
zipper = zip(a_list, b_list)
print(zipper)

[(0, u'a'), (1, u'b'), (2, u'c'), (3, u'd')]


In [33]:
if python_version >= (3,):
    zipper = list(zipper)
assert zipper == [(0, "a"), (1, "b"), (2, "c"), (3, "d")]