In [1]:
import numpy as np

# Resources

[mattharrison/Tiny-Python-3.6-Notebook](https://github.com/mattharrison/Tiny-Python-3.6-Notebook/blob/master/python.rst)

[Python for Scientists and Engineers](http://pythonforengineers.com/python-for-scientists-and-engineers/)

- Intro: Start here
- Installing the libraries required for the book
- Beginners Start Here:
    - Create a Word Counter in Python
    - An introduction to Numpy and Matplotlib
- Introduction to Pandas with Practical Examples (New)
- Main Book
    - Image and Video Processing in Python
    - Data Analysis with Pandas
    - Audio and Digital Signal Processing (DSP)
    - Control Your Raspberry Pi From Your Phone / Tablet
- Machine Learning Section
    - Machine Learning with an Amazon like Recommendation Engine
    - Machine Learning New Stuff
    - Machine Learning For Complete Beginners: Learn how to predict how many Titanic survivors using machine learning. No previous knowledge needed!
    - Cross Validation and Model Selection: In which we look at cross validation, and how to choose between different machine learning algorithms. Working with the Iris flower dataset and the Pima diabetes dataset.
- Natural Language Processing
    - 0. Introduction to NLP and Sentiment Analysis
    - 1. Natural Language Processing with NTLK
    - 2. Intro to NTLK, Part 2
    - 3. Build a sentiment analysis program
    - 4. Sentiment Analysis with Twitter
    - 5. Analysing the Enron Email Corpus: The Enron Email corpus has half a million files spread over 2.5 GB. When looking at data this size, the question is, where do you even start?
    - 6. Build a Spam Filter using the Enron Corpus
    
[Structuring Your Project - The Hitchhiker's Guide to Python](http://python-guide-pt-br.readthedocs.io/en/latest/writing/structure/)

# Flatten list of lists

See [this stackoverflow answer](http://stackoverflow.com/questions/11264684/flatten-list-of-lists)

In [7]:
data = [[0,1], [2,3], [4,5], [6,7]]
data2 = [val for sublist in data for val in sublist]
print(data)
print(data2)

[[0, 1], [2, 3], [4, 5], [6, 7]]
[0, 1, 2, 3, 4, 5, 6, 7]


An alternative is to use the `itertools.chain(*iterables)` (see the 2nd answer to [this stackoverflow question](http://stackoverflow.com/questions/952914/making-a-flat-list-out-of-list-of-lists-in-python) or `itertools.chain.from_iterable(iterable)` (see answer [here](http://stackoverflow.com/questions/29244286/how-to-flatten-a-2d-list-to-1d-without-using-numpy)). I like the 2nd method a little better because you directly pass the `iterable` item rather than `*iterable`:

In [2]:
import itertools

list2d = [[1,2,3],[1,2],[1,4,5,6,7]]
list(itertools.chain.from_iterable(list2d))

[1, 2, 3, 1, 2, 1, 4, 5, 6, 7]

# \*args and \*\*kwargs

See Dan Bader youtube video [Python Tricks #7: *args and **kwargs Recipes for Clean Python](https://www.youtube.com/watch?v=WcTXxX3vYgY&list=TLGGFBo8LInGaKUyMTAyMjAxNw)

In [1]:
def foo(required, *args, **kwargs):
    print(required)
    if args:
        print(args)
    if kwargs:
        print(kwargs)

In [2]:
foo('hello')

hello


In [3]:
foo('hello', 1, 2, 3)

hello
(1, 2, 3)


In [4]:
foo('hello', 1, 2, 3, key1='value', key2=9999)

hello
(1, 2, 3)
{'key1': 'value', 'key2': 9999}


## Passing kwargs through multiple levels of functions

In [10]:
def bar(required, *args, **kwargs):
    print('in bar')
    foo(required, *args, **kwargs)

In [11]:
bar('hello')

in bar
hello


In [12]:
bar('hello', 1, 2, 3)

in bar
hello
(1, 2, 3)


In [13]:
bar('hello', 1, 2, 3, key1='value', key2=9999)

in bar
hello
(1, 2, 3)
{'key1': 'value', 'key2': 9999}


In [14]:
def baz(required2, required, *args, **kwargs):
    print('in baz', required2)
    bar(required, *args, **kwargs)

In [15]:
baz('temp', 'hello')

in baz temp
in bar
hello


In [16]:
baz('temp', 'hello', 'hello', 1, 2, 3)

in baz temp
in bar
hello
('hello', 1, 2, 3)


In [17]:
baz('temp', 'hello', 'hello', 1, 2, 3, key1='value', key2=9999)

in baz temp
in bar
hello
('hello', 1, 2, 3)
{'key1': 'value', 'key2': 9999}


# Dicts

## Pretty-print Python dicts with json.dumps()

Note that `numpy` arrays cannot be serialized and therefore cannot be directly used with `json.dumps()`.

In [3]:
import json

temp_dict = {'first':16, 'second': 27, 'third':55, 'fourth':'some text'}

print(json.dumps(temp_dict, indent=4, sort_keys=True))

{
    "first": 16,
    "fourth": "some text",
    "second": 27,
    "third": 55
}


## Merging two dicts  

[The Idiomatic Way to Merge Dictionaries in Python](https://treyhunner.com/2016/02/how-to-merge-dictionaries-in-python/). Check out merging 2 dicts where some of the keys are the same--beautiful!.

In [4]:
x = {'xfirst':1, 'xsecond':2}
y = {'yfirst':3, 'ysecond':4}
merged = {**x, **y}
merged

{'xfirst': 1, 'xsecond': 2, 'yfirst': 3, 'ysecond': 4}

## Ways to iterate over a dictionary 

See zehnpaard's answer at [Iterating over dict values](https://stackoverflow.com/questions/27733685/iterating-over-dict-values)

### Looping directly over the dictionary

In [1]:
z = {'x':(123,'SE',2,1),'z':(124,'CI',1,1)}
for key in z:
    print(key)

x
z


### Looping over the values of a dictionary

In [2]:
z = {'x':(123,'SE',2,1),'z':(124,'CI',1,1)}
for value in z.values():
        print(value)

(123, 'SE', 2, 1)
(124, 'CI', 1, 1)


### Looping over both the keys and values

In [3]:
z = {'x':(123,'SE',2,1),'z':(124,'CI',1,1)}
for key, value in z.items():
    print(key, value)

x (123, 'SE', 2, 1)
z (124, 'CI', 1, 1)


## Sort dict with strings as keys in alphabetical order

In [7]:
test_dict = {'first': 1, 'second': 2, 'third': 3, 'Fourth': 4, 'Fifth': 5, 'sixth': 6}
test_dict

{'Fifth': 5, 'Fourth': 4, 'first': 1, 'second': 2, 'sixth': 6, 'third': 3}

In [8]:
sorted(test_dict)

['Fifth', 'Fourth', 'first', 'second', 'sixth', 'third']

In [9]:
sorted(test_dict, key=str.lower)

['Fifth', 'first', 'Fourth', 'second', 'sixth', 'third']

## Use of `setdefault` for dicts

In [12]:
# 1. Empty dict
temp = {}
print('1.', temp)

# 2. If there is no 'key', use setdefault to create one and set it to an empty dict
temp.setdefault('key', {})
print('2.', temp)

# 3. If 'key' does exist, setdefault does nothing
temp = {}
temp['key'] = {'a' : 'value'}
temp.setdefault('key', {})
print('3.', temp)

# 4. If 'key' does not exist, use setdefault to set it to have a value of the dict {'a' : 'aaa'}
temp = {}
temp.setdefault('key2', {}).update({'a' : 'aaa'})
print('4.', temp)

# 5. Just a quick example of how update works with a dict
temp = {}
temp.update({'a' : 'aaa'})
print('5.', temp)

# 6. Illustrate use of variables with setdefault
temp = {}
keyname = 'key3'
keyname_subdict = 'b'
value = 'bbb'
temp.setdefault(keyname, {}).update({keyname_subdict : value})
print('6.', temp)

1. {}
2. {'key': {}}
3. {'key': {'a': 'value'}}
4. {'key2': {'a': 'aaa'}}
5. {'a': 'aaa'}
6. {'key3': {'b': 'bbb'}}


## Print nested dicts and skip large numpy arrays to visually show organization and elements

In [2]:
testdict = {
    "first": "first value",
    "second": {
        "a": "a value",
        "b": np.linspace(0, 100, 101),
        "c": "c value",
    },
    "third": {
        "a": "a value",
        "b": np.linspace(0, 100, 5),
        "c": "c value",
        "d": {
            "in_level3_dict": "aa value",
            "b": np.linspace(0, 1, 10),
        },
    },
}

def print_dict(dd, nspaces=4, nelements=5):
    if not isinstance(dd, dict):
        raise TypeError("print_dict(): First argument must be a dict")
    def recurse_dict(d, level=0, nspaces=nspaces):
        spaces = " "*nspaces*(level+1)
        for (key, value) in d.items():
            keystring = "'{}'".format(key) if isinstance(key, str) else "{}".format(key)
            if isinstance(value, dict):
                print("{}{}: {{".format(spaces, keystring))
                recurse_dict(value, level=level+1)
                print("{}}}".format(spaces))
            elif isinstance(value, np.ndarray):
                if len(value) > nelements:
                    print("{}{}: ***np.ndarray.shape = {}***".format(spaces, keystring, value.shape))
                else:
                    print("{}{}: {}".format(spaces, keystring, value))
            elif isinstance(value, str):
                print("{}{}: '{}'".format(spaces, keystring, value))
            else:
                print("{}{}: {}".format(spaces, keystring, value))
    print("{")
    recurse_dict(dd)
    print("}")

print_dict(testdict)
print()
print_dict(testdict, nelements=10)
print()
print_dict(testdict, nspaces = 2)

{
    'first': 'first value'
    'second': {
        'a': 'a value'
        'b': ***np.ndarray.shape = (101,)***
        'c': 'c value'
    }
    'third': {
        'a': 'a value'
        'b': [   0.   25.   50.   75.  100.]
        'c': 'c value'
        'd': {
            'in_level3_dict': 'aa value'
            'b': ***np.ndarray.shape = (10,)***
        }
    }
}

{
    'first': 'first value'
    'second': {
        'a': 'a value'
        'b': ***np.ndarray.shape = (101,)***
        'c': 'c value'
    }
    'third': {
        'a': 'a value'
        'b': [   0.   25.   50.   75.  100.]
        'c': 'c value'
        'd': {
            'in_level3_dict': 'aa value'
            'b': [ 0.          0.11111111  0.22222222  0.33333333  0.44444444  0.55555556
  0.66666667  0.77777778  0.88888889  1.        ]
        }
    }
}

{
  'first': 'first value'
  'second': {
    'a': 'a value'
    'b': ***np.ndarray.shape = (101,)***
    'c': 'c value'
  }
  'third': {
    'a': 'a value'
    'b': [

### Compare to regular print and PrettyPrint

In [5]:
print(testdict)
print()

import pprint

pp = pprint.PrettyPrinter(indent=4)
pp.pprint(testdict)

{'first': 'first value', 'second': {'a': 'a value', 'b': array([   0.,    1.,    2.,    3.,    4.,    5.,    6.,    7.,    8.,
          9.,   10.,   11.,   12.,   13.,   14.,   15.,   16.,   17.,
         18.,   19.,   20.,   21.,   22.,   23.,   24.,   25.,   26.,
         27.,   28.,   29.,   30.,   31.,   32.,   33.,   34.,   35.,
         36.,   37.,   38.,   39.,   40.,   41.,   42.,   43.,   44.,
         45.,   46.,   47.,   48.,   49.,   50.,   51.,   52.,   53.,
         54.,   55.,   56.,   57.,   58.,   59.,   60.,   61.,   62.,
         63.,   64.,   65.,   66.,   67.,   68.,   69.,   70.,   71.,
         72.,   73.,   74.,   75.,   76.,   77.,   78.,   79.,   80.,
         81.,   82.,   83.,   84.,   85.,   86.,   87.,   88.,   89.,
         90.,   91.,   92.,   93.,   94.,   95.,   96.,   97.,   98.,
         99.,  100.]), 'c': 'c value'}, 'third': {'a': 'a value', 'b': array([   0.,   25.,   50.,   75.,  100.]), 'c': 'c value', 'd': {'in_level3_dict': 'aa value', 'b': a

# Interesting data structures

[New interesting data structures in Python 3](https://github.com/topper-123/Articles/blob/master/New-interesting-data-types-in-Python3.rst). They are:

- `types.MappingProxyType` is used as a read-only dict and was added in Python 3.3. 
    - If you want to deliver data dicts to different functions or threads and want to ensure that a function is not changing data that is also used by another function, you can just deliver a MappingProxyType object to all functions, rather than the original dict, and the data dict now cannot be changed unintentionally.
- `typing.NamedTuple` is a supercharged version of the venerable collections.namedtuple and while it was added in Python 3.5, it really came into its own in Python 3.6.
- `types.SimpleNamespace` is a simple class that provides attribute access to its namespace, as well as a meaningful repr.

# How to set up package during its development prior to it being installed

The problem is how to import the package and its modules into scripts during development. Assumed file organization:

    script_using_package.py
    package_name
        __init__.py
        file1.py
        file2.py
        
One could do __relative imports__ in `__init__.py`,

    from . file1 import File1_thing
    from . file2 import File2_thing
    
but if `file2.py` needs to import `File1_thing` it will also have to use a relative import,

    from . file1 import File1_thing

Instead, we can put the path to `package_name` in the python path by first including the following code in `__init__.py` (see [this StackOverflow answer](http://stackoverflow.com/questions/279237/import-a-module-from-a-relative-path)): 

    import os
    import inspect
    import sys

    package_folder = os.path.realpath(os.path.abspath(os.path.split(inspect.getfile(inspect.currentframe()))[0]))
    if package_folder not in sys.path:
        sys.path.append(package_folder)
 
Now in `file2.py` we can do

    from file1 import File1_thing

and it will work just fine. In `script_using_package.py` we can do

    from package_name import *
    
and `File1_thing` and `File2_thing` will be imported.

# How to include `test` in package

Assume we're using pytest, which is invoked on the command line in the directory containing `package` (see below for file structure) with `py.test`.

    └── package
        ├── __init__.py
        ├── bends.py
        ├── cylinders.py
        ├── flowchannel.py
        ├── microfluidicdesign.py
        ├── multiflowchannel.py
        ├── multiplecomponents.py
        ├── point.py
        └── test
            ├── __init__.py
            └── test_point.py

Put `__init__.py` in the `tests` directory. The reason is explained in the following Note from [Good Integration Practices](http://doc.pytest.org/en/latest/goodpractices.html):

>If pytest finds a “a/b/test_module.py” test file while recursing into the filesystem it determines the import name as follows:
>
>- determine basedir: this is the first “upward” (towards the root) directory not containing an __init__.py. If e.g. both a and b contain an __init__.py file then the parent directory of a will become the basedir.
>- perform sys.path.insert(0, basedir) to make the test module importable under the fully qualified import name.
>- import a.b.test_module where the path is determined by converting path separators / into ”.” characters. This means you must follow the convention of having directory and file names map directly to the import names.

[Open Sourcing a Python Project the Right Way](https://www.jeffknupp.com/blog/2013/08/16/open-sourcing-a-python-project-the-right-way/) also puts `test` in the package directory. An interesting alternate approach is shown in the answer by Steely Wing to [Where do the Python unit tests go?](http://stackoverflow.com/questions/61151/where-do-the-python-unit-tests-go).

In [Structuring Your Project - The Hitchhiker's Guide to Python](http://python-guide-pt-br.readthedocs.io/en/latest/writing/structure/), there is a nice approach in which tests are kept in a separate directory outside of the package.

> To give the individual tests import context, create a tests/context.py file:

       import os
       import sys
       sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

       import sample
       Then, within the individual test modules, import the module like so:

       from .context import sample
   
>This will always work as expected, regardless of installation method.



# Simple web server from the command line

Assume a directory with a static html file (`index.html`). A simple http server ([21.22. http.server — HTTP servers](https://docs.python.org/3/library/http.server.html)) can be run from the command line by executing 

    python -m http.server 8000 --bind 127.0.0.1

with the directory as the present working directory. The html file can be accessed in a browser with url `http://127.0.0.1:8000/index.html`.



# Book: Automate the Boring Stuff

[Automate the Boring Stuff with Python, Al Sweigart](https://automatetheboringstuff.com)

See particularly [Chapter 18 – Controlling the Keyboard and Mouse with GUI Automation](https://automatetheboringstuff.com/chapter18/) focused on the use of [PyAutoGUI](http://pyautogui.readthedocs.io/en/latest/) which is a Python module for programmatically controlling the mouse and keyboard.

# timeit module

In [1]:
import numpy as np

import timeit

## Pure standard python

In [2]:
cmd = '''
temp = []
for i in range(1000):
    temp.append(i)
'''

num_calls = 10

time_tot = timeit.timeit(cmd, number=num_calls)
time_ave = time_tot / num_calls
print(time_tot)
print(time_ave)

0.0007284970488399267
7.284970488399267e-05


## With imported package

[See this stackoverflow question and answer](http://stackoverflow.com/questions/21216208/timeit-module-in-python-does-not-recognize-numpy-module)

In [3]:
cmd = "np.zeros((100,100))"

t_with_setup = timeit.timeit(cmd, setup='import numpy as np', number=1000)

u = timeit.Timer(lambda: np.zeros((100,100)))
t_with_lambda = u.timeit(number=1000)

print(t_with_setup)
print(t_with_lambda)

0.003353904001414776
0.0029485179111361504


# Classes

## `__call__` usage

See bradley.ayers answer to the StackOverflow question, [Python __call__ special method practical example](https://stackoverflow.com/questions/5824881/python-call-special-method-practical-example).

>I find it useful because it allows me to create APIs that are easy to use (you have some callable object that requires some specific arguments), and are easy to implement because you can use Object Oriented practices.

>The following is code I wrote yesterday that makes a version of the hashlib.foo methods that hash entire files rather than strings:

        # filehash.py
        import hashlib


        class Hasher(object):
            """
            A wrapper around the hashlib hash algorithms that allows an entire file to
            be hashed in a chunked manner.
            """
            def __init__(self, algorithm):
                self.algorithm = algorithm

            def __call__(self, file):
                hash = self.algorithm()
                with open(file, 'rb') as f:
                    for chunk in iter(lambda: f.read(4096), ''):
                        hash.update(chunk)
                return hash.hexdigest()


        md5    = Hasher(hashlib.md5)
        sha1   = Hasher(hashlib.sha1)
        sha224 = Hasher(hashlib.sha224)
        sha256 = Hasher(hashlib.sha256)
        sha384 = Hasher(hashlib.sha384)
        sha512 = Hasher(hashlib.sha512)

>This implementation allows me to use the functions in a similar fashion to the hashlib.foo functions:

        from filehash import sha1
        print sha1('somefile.txt')

>Of course I could have implemented it a different way, but in this case it seemed like a simple approach.

## Looping over instance variables of a class

### Test case

In [4]:
class TestClass():
    def __init__(self):
        self.a = 1
        self.b = 'b'
        self.c = "True"
        self.d = [10, 11, 12]
        
testclass = TestClass()
testclass

<__main__.TestClass at 0x10d49aeb8>

In [6]:
instancevars = vars(testclass)
instancevars

{'a': 1, 'b': 'b', 'c': 'True', 'd': [10, 11, 12]}

In [11]:
for var in sorted(instancevars):
    if hasattr(testclass, var):
        print( var, getattr(testclass, var) )

a 1
b b
c True
d [10, 11, 12]


### Use to copy class instance

In [25]:
class TestClass2():
    def __init__(self, a=None, b=None, c=None, d=None):
        self.a = a
        self.b = b
        self.c = c
        self.d = d
        
    def copy(self):
        newinstance = TestClass2()
        for var in sorted(vars(self)):
            setattr(newinstance, var, getattr(self, var))
        return newinstance
    
    def __str__(self):
        return "a:{}, b:{}, c:{}, d:{}".format(str(self.a), str(self.b), str(self.c), str(self.d))
    
    def __repr__(self):
        return self.__str__()
        
testclass2 = TestClass2(1, 2, 3, 4)
testclass2

a:1, b:2, c:3, d:4

In [26]:
testclass2copy = testclass2.copy()
testclass2copy

a:1, b:2, c:3, d:4