# Python General

### Function Arguments, \*args and \*kwargs

In [None]:
# *args allows for a variable number of 
# arguments to be used in a function call
def sum(*args):
    s = 0
    for i in args:
        s += i
    print("sum is", s)
sum(1, 2, 2, 123)

In [None]:
# **kwargs is the same but for keyword arguments
def sum_kwargs(**kwargs):
    s = 0
    for i, j in kwargs.items():
        s += j
    print(s)
sum_kwargs(one=1, two=2, three=3)

In [None]:
def my_func(**kwargs):
    for i, j in kwargs.items():
        print(i, j)
my_func(name='tim', sport='baseball', score=19)

In [None]:
# *args and **kwargs can be used in a function call
def my_three(a, b, c):
    print(a, b, c)
# note, a is a list
a = [1,2,3]
my_three(*a)
#note, b is a dict
b = {'a':1, 'b':2, 'c':3}
my_three(**b)

In [None]:
b

In [None]:
b["newItem"] = 4
b

### Nested dict inside a dict

In [None]:
# nested dicts in a dict
query_type = {  "simple": {},
                "atr": {"inc": "aliases+tags+ratings"},
                "aliases": {"inc": "aliases"},
                "releases": {"inc": "releases"}}
x = query_type["atr"]
x

In [None]:
# return type is a dict
type(x)

### Looping - through a dict

In [None]:
# a dict
knights = {'gallahad': 'the pure', 'robin':'the brave'}
# the dict.items() method allows for the key and corresponding
# value to be retrieved at the same time. # note that k points
# to the key and v to the value.
for k, v in knights.items():
    print(k, v)

### List Comprehension

List comprehensions are a tool for transforming one list (any iterable actually) into another list. During this transformation, elements can be conditionally included in the new list and each element can be transformed as needed.

##### Create a list with a loop

In [1]:
# create an empty list
my_list = []

In [2]:
# after looping and appending the list is filled
looping_list = [2,1,0]
for item in [2,1,0]:
    my_list.append(item)
my_list

[2, 1, 0]

##### Now create a list with list comprehension

In [3]:
my_list = []
looping_list = [2,1,0]

In [15]:
my_list = [item for item in looping_list]
my_list

[2, 1, 0]

List Comprehension can be used for operations that would also be done with vectorized operations in pandas.

In [5]:
# a function to apply to my list comprehension
def my_func(num):
    return num*.324234

In [6]:
my_list1 = [my_func(item) for item in looping_list]
my_list2 = [item*100 for item in looping_list]
my_list3 = [[item*100] for item in looping_list]
print(my_list1) # a list with my_func() applied to each item
print(my_list2) # a list with each item multiplied by 100
print(my_list3) # a list of lists with each item multiplied by 100

[0.648468, 0.324234, 0.0]
[200, 100, 0]
[[200], [100], [0]]


Place a conditional requirment on the item being grabbed from the list:

In [20]:
# only if item from reference list meets condition, place in new list
my_list= [item * 100 for item in looping_list if item == 2]
my_list

[200]

In [4]:
# figure this one out
line = '1234567890'
n = 2
new_line = [line[i:i+n] for i in range(0,len(line), n)]
# new_line
range(0,len(line))

range(0, 10)

### Modules

##### How the Python interpreter handles execution
Article [A module's \__name\__](http://ibiblio.org/g2swap/byteofpython/read/module-name.html)

Python's interpreter sets up a global name space when running a script. It uses "dunder" notation - for example "\__name\__" is it's way to keep track of what is being executed at any given time.

When a script is initially executed, \__name\__ is set to \__main\__ before any of the actual code in the script is evaluated. So the Python interpreter knows that this "main" script is the one currently being run. However, once an import declaration is evaluated (usually the first things at the top of a Python script), \__name\__ is then set to the name of the file being imported (this may be a module with a .\__init\__ hidden file in it's folder or just another python file in the same root director). The interpreter now knows it's focusing its attention on executing the code in this new file.

In the supporting-files folder there is a file1.py. It contains "if \__name\__ == '\__main\__':" in its code, which is a logical test that I can throw in any script to gain control over what code I want to be executed if the file is run directly as the "main" program, or what I want to be run if it's being imported into another script.

In the Unix prompt, running 'python file1.py' prints the first statement because \__name\__ is set to \__main\__. However, if I import file1 into another script called file2.py, then \__name\__ is changed from \__main\__ to \__file1\__ during the time the import file is being executed from within file2.py, and because at that time \__name\__ is set to \__file1\__, which is not equal to \__main\__, the second print statement is executed.

** *see file1.py and file2.py in the sample-data folder* **

### Built-in Functions

##### enumerate()

In [7]:
seasons = ['Spring', 'Summer', 'Fall', 'Winter']
list(enumerate(seasons))

[(0, 'Spring'), (1, 'Summer'), (2, 'Fall'), (3, 'Winter')]

In [11]:
# loop through both the the count and item
for i, x in enumerate(seasons):
    print("Count is: %s" %i)
    print("Item is: %s\n" %x)

Count is: 0
Item is: Spring

Count is: 1
Item is: Summer

Count is: 2
Item is: Fall

Count is: 3
Item is: Winter



With-open stuff - add

In [None]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# So, the problem is that the gigantic file is actually not a valid XML, because
# it has several root elements, and XML declarations.
# It is, a matter of fact, a collection of a lot of concatenated XML documents.
# So, one solution would be to split the file into separate documents,
# so that you can process the resulting files as valid XML documents.

"""
    Split the input file into separate files, each containing a single patent.
    As a hint - each patent declaration starts with the same line that was
    causing the error found in the previous exercises.
    
    The new files should be saved with filename in the following format:
    "{}-{}".format(filename, n) where n is a counter, starting from 0.
"""


import xml.etree.ElementTree as ET
PATENTS = 'supporting-files/patent.data'

def get_root(fname):
    tree = ET.parse(fname)
    return tree.getroot()


def split_file(filename):

    # open file and throw all rows into a list
    doc_list = []
    with open (filename, 'r') as f:
        for row in f:
            doc_list.append(row)
    
    
    # find indices where xml header appears
    search_str = '<?xml version="1.0" encoding="UTF-8"?>\n'
    indices = [i for i, x in enumerate(doc_list) if x == search_str]
    

    # make new list with nest list of each document
    indices.append(len(doc_list))
    s_doc_list = [doc_list[indices[i]:indices[i+1]] for i in range(len(indices)-1)]
    
    for n in range(len(s_doc_list)):
        content = s_doc_list[n]
        with open("{}-{}".format(filename, n), 'w') as w:
            for string in content:
                w.write(string)
    
    pass

split_file(PATENTS)