# Python. Типы.

In [3]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [38]:
# a = None
# b = None
# print id(a) == id(b)
# print id(5) == id(5)
# print id(172) == id(272)

# Модуль Collections

In [116]:
from collections import namedtuple, deque, \
                        Counter, OrderedDict, \
                        defaultdict

# Коллекции

## Базовые методы

In [46]:
print tuple(), (), (1,), (1,)*3
print list(), [1] * 2, [[]] * 2
print set()
print dict(), {}

() () (1,) (1, 1, 1)
[] [1, 1] [[], []]
set([])
{} {}


In [58]:
a = [1, 2, 3, 4, 5]
del a[1]
print a
print 1 in a, 2 in a, 'asd' in a

[1, 3, 4, 5]
True False False


In [60]:
d = {'key_1': 1, 'key_2': 2}
del d['key_1']
print d
print 'key_2' in d, 2 in d

{'key_2': 2}
True False


## Изменяемые vs Неизменяемые

In [1]:
# Изменяемые (list, set, dict, deque, Counter, OrderedDict, defaultdict)

lst = [1, 2, 3]
prev_id = id(lst)
print lst
lst += [4]  # lst.append(4)
new_id = id(lst)
print lst
print 'Has id changed (list)?', prev_id != new_id

[1, 2, 3]
[1, 2, 3, 4]
Has id changed (list)? False


In [2]:
# Неизменяемые (tuple, namedtuple)
tpl = (1, 2, 3)
prev_id = id(tpl)
print tpl
tpl += (4,)  # Запятая нужна! 
print tpl
new_id = id(tpl)
print 'Has id changed (tuple)?', prev_id != new_id

(1, 2, 3)
(1, 2, 3, 4)
Has id changed (tuple)? True


## Сложность
https://wiki.python.org/moin/TimeComplexity

# Tuple

In [94]:
# http://stackoverflow.com/questions/6153348/time-complexity-of-tuple-in-python
profile = (1, 'Pavel', 'Durov', 1984, 'Telegram')

In [84]:
name = profile[1:3]

In [121]:
name

('Pavel', 'Durov')

In [1]:
NAME_SLICE = slice(1, 3)

In [90]:
print profile[NAME_SLICE]

('Pavel', 'Durov')


# Namedtuple

In [2]:
from collections import namedtuple
fiels = ['vk_id', 'name', 'surname', 'year', 'work']
VkProfile = namedtuple('VkProfile', fiels, verbose=True)

class VkProfile(tuple):
    'VkProfile(vk_id, name, surname, year, work)'

    __slots__ = ()

    _fields = ('vk_id', 'name', 'surname', 'year', 'work')

    def __new__(_cls, vk_id, name, surname, year, work):
        'Create new instance of VkProfile(vk_id, name, surname, year, work)'
        return _tuple.__new__(_cls, (vk_id, name, surname, year, work))

    @classmethod
    def _make(cls, iterable, new=tuple.__new__, len=len):
        'Make a new VkProfile object from a sequence or iterable'
        result = new(cls, iterable)
        if len(result) != 5:
            raise TypeError('Expected 5 arguments, got %d' % len(result))
        return result

    def __repr__(self):
        'Return a nicely formatted representation string'
        return 'VkProfile(vk_id=%r, name=%r, surname=%r, year=%r, work=%r)' % self

    def _asdict(self):
        'Return a new OrderedDict which maps field names to their values'
        return OrderedDict(zip(self._fields, self))

    def _replace(_s

In [5]:
profile_nt = VkProfile(vk_id=1, name='Pavel', 
                       surname='Durov', 
                       year=1984, 
                       work='Telegram')

In [6]:
profile_nt.name, profile_nt.surname

('Pavel', 'Durov')

In [7]:
profile_nt._replace(work='new job')

VkProfile(vk_id=1, name='Pavel', surname='Durov', year=1984, work='new job')

In [8]:
profile_nt._asdict()

OrderedDict([('vk_id', 1),
             ('name', 'Pavel'),
             ('surname', 'Durov'),
             ('year', 1984),
             ('work', 'Telegram')])

# List

## Базовые операции

In [16]:
a = ['start']

# Добавление одного элемента в конец списка
a.append(1)  # O(1)
print a

['start', 1]


In [17]:
# Добавление k элементов в конец списка
a.extend([2, 3])  # O(k)
print a

['start', 1, 2, 3]


In [18]:
# Доступ к элементу по индексу
print a[0]  # O(1)

start


In [19]:
# Вставка элемента внутрь списка
a.insert(2, 'new_element')  # O(n)
print a

['start', 1, 'new_element', 2, 3]


In [20]:
# Удаление элемента из списка по индексу
ret = a.pop(1)  # O(n) - с вовзращением элемента
print ret, a
del a[1]  # O(n) - без вовзращения элемента
print ret, a

1 ['start', 'new_element', 2, 3]
1 ['start', 2, 3]


In [22]:
# Сортировка https://en.wikipedia.org/wiki/Timsort
print sorted(a)  # без изменения исходного списка
print a
a.sort()  # с изменением исходного списка
print a  

[2, 3, 'start']
['start', 2, 3]
[2, 3, 'start']


In [54]:
# Наличие элемента в списке
'start' in a  # O(n) !!!

True

## Особенности копирования

### Как не работает? 

In [3]:
x = [[0]] * 2
x

[[0], [0]]

In [4]:
x[0][0] = 1
x

[[1], [1]]

In [5]:
id(x[0]) == id(x[1])

True

In [6]:
y = x + x
y

[[1], [1], [1], [1]]

In [7]:
y[0][0] = 2
y

[[2], [2], [2], [2]]

### А как работает?

In [32]:
x = [[0] for x in xrange(2)]
print x
x[0][0] = 1
print x

[[0], [0]]
[[1], [0]]


### Как скопировать список? 

In [8]:
x = [1, 2, 3]
y = x
id(x) == id(y)

True

In [9]:
y = x[:]
print 'y=x[:]', id(x) == id(y)

y = list(x)
print 'y=list(x)', id(x) == id(y)

from copy import copy
y = copy(x)
print 'y=copy(x)', id(x) == id(y)

y=x[:] False
y=list(x) False
y=copy(x) False


In [10]:
x = [[1, 2], [3, 4]]

In [48]:
y = x[:]
print 'y=x[:]', id(x[0]) == id(y[0])

y = list(x)
print 'y=list(x)', id(x[0]) == id(y[0])

from copy import copy
y = copy(x)
print 'y=copy(x)', id(x[0]) == id(y[0])

 y=x[:] True
y=list(x) True
y=copy(x) True


In [50]:
from copy import deepcopy
y = deepcopy(x)
print id(x[0]) == id(y[0])

False


# Deque

In [62]:
from collections import deque

In [63]:
deq = deque([1, 2, 3])

In [64]:
deq.append(100)  # O(1)
print deq

deque([1, 2, 3, 100])


In [65]:
deq.appendleft(-7)  # O(1)
print deq

deque([-7, 1, 2, 3, 100])


In [73]:
# Добавление и удаление элемента с обеих сторон очереди работает за константное время.
# Но нет slice! 
deq[1:3]

TypeError: sequence index must be integer, not 'slice'

### Очереди с ограниченной длинной можно создать с помощью параметра max_len
deq = deque([1, 2, 3], maxlen=3)

# Множества (set, frozenset)

### Общее frozenset и set

### В Python — это хеш-сет, то есть оно может содержать только элементы, которые можно захешировать (неизменяемые типы - хешируемые, изменяемые - нет) 

In [99]:
{(2, 3)}

{(2, 3)}

In [100]:
{[2, 3]}

TypeError: unhashable type: 'list'

In [101]:
{set(), set()}

TypeError: unhashable type: 'set'

In [102]:
{frozenset(), frozenset()}

{frozenset()}

In [None]:
elems = {1, 2, 'py'}

In [77]:
print 1 in elems  # O(1) !!! 
print 'py' in elems  # O(1) !!!

True
True


In [81]:
# Объединение
print elems.union({1, 4})
print elems | {1, 4}

set([1, 2, 'py', 4])
set([1, 2, 'py', 4])


In [82]:
# Пересечение
print elems.intersection({1, 4})
print elems & {1, 4}

set([1])
set([1])


In [86]:
# Разность
print elems.difference({1, 4})
print elems - {1, 4}

set([2, 'py'])
set([2, 'py'])


In [12]:
# Вложения
a = {1, 3}
b = {2, 3}
print a >= b
print a > b
print a < b

False
False
False


### Только set

In [93]:
# Добавление
a.add(5)
a.update([1, 5])
# Удаление
a.remove(5) # remove удаляет из множества существующий элемент или 
            # поднимает исключение, если элемент во множестве не содержится
a.discard(5) # discard удаляет элемент, только если он содержится во множестве

# Словари

In [127]:
d = {'a': 1, 'b': 2, 'c': 5, 'd': 'yes'}
d = dict(a=1, b=2, c=5, d='yes')

In [128]:
# Проверка наличия ключа в словаре
'a' in d  # O(1) !!!

True

In [129]:
key = 'default'
default_val = 'default'
# Следующее равносильно
print d[key] if key in d else default_val
print d.get(key, default_val)
print d[key]

default
default


KeyError: 'default'

In [130]:
print d.keys()
print d.values()
print d.items()

['a', 'c', 'b', 'd']
[1, 5, 2, 'yes']
[('a', 1), ('c', 5), ('b', 2), ('d', 'yes')]


In [131]:
# Добавление в словарь
d['z'] = 0
print d
d.update({'y': -1, 'w': -2})
print d

{'a': 1, 'c': 5, 'b': 2, 'd': 'yes', 'z': 0}
{'a': 1, 'c': 5, 'b': 2, 'd': 'yes', 'w': -2, 'y': -1, 'z': 0}


In [135]:
# Удаление из словаря
del d['a']

In [136]:
for k in d:  # тоже самое, что и d.keys()
    print k, d[k]

c 5
b 2
d yes
w -2
y -1
z 0


In [18]:
# Объединение словарей без их изменения
dct1 = {'a':1, 'b':2}
dct2 = {'b':3, 'c':4}
union = dct1.copy()
union.update(dct2)
print(union, dct1, dct2)

dct1 = {'a':1, 'b':2}
dct2 = {'b':3, 'c':4}
union  = dict(dct1, **dct2)  # про ** будет чуть позже
print(union, dct1, dct2)

({'a': 1, 'c': 4, 'b': 3}, {'a': 1, 'b': 2}, {'c': 4, 'b': 3})
({'a': 1, 'c': 4, 'b': 3}, {'a': 1, 'b': 2}, {'c': 4, 'b': 3})


In [20]:
# в Python нет switch, но есть словари) 
def fail_func():
    print('Fail')
    
def success_func():
    print('Success')
    
cases = {'case_1': fail_func, 'case_2': success_func}

cases['case_2']()

Success


## OrderedDict

In [150]:
from collections import OrderedDict
# словарь с ключами, упорядоченными по времени добавления
# Изменение значения по ключу не влияет на порядок ключей в словаре

## Counter

In [22]:
from collections import Counter

In [23]:
cnt = Counter([1, 1, 2, 1, 3])
cnt

Counter({1: 3, 2: 1, 3: 1})

In [24]:
cnt[0] += 1  # Нет ошибки !!!
cnt

Counter({0: 1, 1: 3, 2: 1, 3: 1})

In [29]:
cnt.most_common()

[(1, 3), (0, 1), (2, 1), (3, 1)]

In [159]:
c1 = Counter(foo=4, bar=-1) 
c2 = Counter(foo=2, bar=2)
print c1 + c2 # c1[k] + c2[k]
print c1 - c2 # c1[k] - c2[k]
print c1 & c2 # min(c1[k], c2[k])
print c1 | c2 # max(c1[k], c2[k])

Counter({'foo': 6, 'bar': 1})
Counter({'foo': 2})
Counter({'foo': 2})
Counter({'foo': 4, 'bar': 2})


# Упаковка и распаковка

In [166]:
def _max(x, y):
    if x > y:
        return x
    return y

In [167]:
_max(2, 3), _max(3, 2)

(3, 3)

In [47]:
def _max(a, *args, **kwargs):
    print 'args:', args, type(args)
    max_x = float('-inf')
    for x in args:
        if x > max_x:
            max_x = x
    return max_x

In [33]:
lst = [3, 1, 100, -2]
ss = {'a': 1, 'b': 2}
_max(*ss)

args: ('a', 'b') <type 'tuple'>


'b'

In [39]:
def return_x(**kwargs):
    print 'kwargs:', kwargs
    return kwargs.get('x')

In [40]:
return_x({'x': 100, 'y': 50})

kwargs: ({'y': 50, 'x': 100},)


AttributeError: 'tuple' object has no attribute 'get'

In [41]:
return_x(**{'x': 100, 'y': 50})

TypeError: return_x() got an unexpected keyword argument 'y'

# Itertools

In [210]:
it = iter(xrange(7))

print (next(it))
print (next(it))
print ([x for x in it])

 
print (next(it))

0
1
[2, 3, 4, 5, 6]


StopIteration: 

In [226]:
a = iter([1, 2, 3])
b = iter([4, 5])
a + b

TypeError: unsupported operand type(s) for +: 'listiterator' and 'listiterator'

In [227]:
# Объединение итераторов
from itertools import chain
for x in chain(a, b):
    print x

1
2
3
4
5


In [228]:
list(a), list(b)

([], [])

In [230]:
# Срезы
from itertools import islice
a = iter([1, 2, 3])
b = iter([4, 5])

for x in islice(a, 1, 3):
    print x

2
3


In [241]:
# перестановки
from itertools import permutations
print list(permutations('AB'))

[('A', 'B'), ('B', 'A')]


In [242]:
# сочетания без повторений
from itertools import combinations
list(combinations('ABC', 2))

[('A', 'B'), ('A', 'C'), ('B', 'C')]

In [240]:
# сочетания c повторениями
from itertools import combinations_with_replacement
list(combinations_with_replacement('ABC', 2))

[('A', 'A'), ('A', 'B'), ('A', 'C'), ('B', 'B'), ('B', 'C'), ('C', 'C')]

In [238]:
# декартово произведение
from itertools import product

# Например, когда не знаешь глубину вложенного цикла
n = 3
cycles = [
    [1, 2],
    [3, 4],
    [5, 6]
]
for indecies in product(*cycles):
    print indecies

(1, 3, 5)
(1, 3, 6)
(1, 4, 5)
(1, 4, 6)
(2, 3, 5)
(2, 3, 6)
(2, 4, 5)
(2, 4, 6)


In [55]:
# декартово произведение
from itertools import product

# Например, когда не знаешь глубину вложенного цикла
n = 3
cycles = [
    xrange(3),
    xrange(4),
    xrange(2)
]
for i, j, k in product(*cycles):
    print i, j, k

0 0 0
0 0 1
0 1 0
0 1 1
0 2 0
0 2 1
0 3 0
0 3 1
1 0 0
1 0 1
1 1 0
1 1 1
1 2 0
1 2 1
1 3 0
1 3 1
2 0 0
2 0 1
2 1 0
2 1 1
2 2 0
2 2 1
2 3 0
2 3 1


### GroupBy

In [86]:
# данные key - value
from itertools import groupby
data = [
    ('Factory A', 'day 1', 100),
    ('Factory B', 'day 1', 200),
    ('Factory C', 'day 1', 300),
    
    ('Factory A', 'day 2', 175),
    ('Factory B', 'day 2', 115),
    ('Factory C', 'day 2', 100),
    
    ('Factory A', 'day 3', 500),
    ('Factory B', 'day 3', 800),
    ('Factory C', 'day 3', 1000)
]




In [87]:
# Сколько товара все фабрики произвели за каждый день
for (key, values) in groupby(data, lambda x: x[1]):
    print key, values

day 1 <itertools._grouper object at 0x112159690>
day 2 <itertools._grouper object at 0x112159750>
day 3 <itertools._grouper object at 0x1121597d0>


In [88]:
for (key, values) in groupby(data, lambda x: x[1]):
    s = 0
    for v in values:
        s += v[2]
    print key, s

day 1 600
day 2 390
day 3 2300


In [89]:
for (key, values) in groupby(data, lambda x: x[1]):
    print key, sum(v[2] for v in values)

day 1 600
day 2 390
day 3 2300


In [90]:
# Сколько каждая фабрика произвела за все дни
for (key, values) in groupby(data, lambda x: x[0]):
    print key, sum([v[2] for v in values])

Factory A 100
Factory B 200
Factory C 300
Factory A 175
Factory B 115
Factory C 100
Factory A 500
Factory B 800
Factory C 1000


In [91]:
data = sorted(data)

In [92]:
data

[('Factory A', 'day 1', 100),
 ('Factory A', 'day 2', 175),
 ('Factory A', 'day 3', 500),
 ('Factory B', 'day 1', 200),
 ('Factory B', 'day 2', 115),
 ('Factory B', 'day 3', 800),
 ('Factory C', 'day 1', 300),
 ('Factory C', 'day 2', 100),
 ('Factory C', 'day 3', 1000)]

In [100]:
for (key, values) in groupby(data, lambda x: x[0]):
    print key, np.mean([v[2] for v in values])

 Factory A 258.333333333
Factory B 371.666666667
Factory C 466.666666667


# Задания

In [67]:
import pandas as pd
import numpy as np

In [102]:
data = pd.read_csv('titanic.csv')

In [110]:
agg_funcs = {
    'Age': [np.mean, 'max', 'std', 'count'],
    'Sex': ['nunique', 'count'],
    'Survived': 'mean'
}

In [113]:
data.groupby(['Pclass', 'Embarked']).agg(agg_funcs)

Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Age,Age,Age,Survived,Sex,Sex
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,max,std,count,mean,nunique,count
Pclass,Embarked,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
1,C,38.027027,71.0,14.243454,74,0.694118,2,85
1,Q,38.5,44.0,7.778175,2,0.5,2,2
1,S,38.152037,80.0,15.315584,108,0.582677,2,127
2,C,22.766667,36.0,10.192551,15,0.529412,2,17
2,Q,43.5,57.0,19.091883,2,0.666667,2,3
2,S,30.386731,70.0,14.080001,156,0.463415,2,164
3,C,20.741951,45.5,11.712367,41,0.378788,2,66
3,Q,25.9375,70.5,16.807938,24,0.375,2,72
3,S,25.696552,74.0,12.110906,290,0.189802,2,353


In [120]:
from collections import defaultdict, Counter

In [126]:
d = defaultdict(list)

In [128]:
d[0].append(2)

In [129]:
d

defaultdict(list, {0: [2]})

In [105]:
data.groupby('Pclass')['Age'].mean()

Pclass
1    38.233441
2    29.877630
3    25.140620
Name: Age, dtype: float64

In [106]:
b = data.sort_values(by='Age').values

In [107]:
a = sorted(data.values, key=lambda x: x[5])

In [108]:
data_np = data.values

In [109]:
c = data_np[data_np[:, 5].argsort()]

In [131]:
from itertools import permutations

In [152]:
for n in xrange(2, 5):
    s = 0
    good_perms = []
    for perm in permutations(xrange(n)):
        if all([i != v for i, v in enumerate(perm)]):
            good_perms.append(perm)
        
    print n, good_perms

2 [(1, 0)]
3 [(1, 2, 0), (2, 0, 1)]
4 [(1, 0, 3, 2), (1, 2, 3, 0), (1, 3, 0, 2), (2, 0, 3, 1), (2, 3, 0, 1), (2, 3, 1, 0), (3, 0, 1, 2), (3, 2, 0, 1), (3, 2, 1, 0)]


In [138]:
text = ' '.join(data.Name.str.lower().values)

In [145]:
text

'braund, mr. owen harris cumings, mrs. john bradley (florence briggs thayer) heikkinen, miss. laina futrelle, mrs. jacques heath (lily may peel) allen, mr. william henry moran, mr. james mccarthy, mr. timothy j palsson, master. gosta leonard johnson, mrs. oscar w (elisabeth vilhelmina berg) nasser, mrs. nicholas (adele achem) sandstrom, miss. marguerite rut bonnell, miss. elizabeth saundercock, mr. william henry andersson, mr. anders johan vestrom, miss. hulda amanda adolfina hewlett, mrs. (mary d kingcome)  rice, master. eugene williams, mr. charles eugene vander planke, mrs. julius (emelia maria vandemoortele) masselmani, mrs. fatima fynney, mr. joseph j beesley, mr. lawrence mcgowan, miss. anna "annie" sloper, mr. william thompson palsson, miss. torborg danira asplund, mrs. carl oscar (selma augusta emilia johansson) emir, mr. farred chehab fortune, mr. charles alexander o\'dwyer, miss. ellen "nellie" todoroff, mr. lalio uruchurtu, don. manuel e spencer, mrs. william augustus (marie

In [143]:
cnt = Counter()
for i in xrange(len(text) - 2):
    trigramm = text[i:i+3]
    cnt[trigramm] += 1

In [144]:
cnt 

Counter({'rev': 7,
         'all': 25,
         'nwe': 1,
         'sch': 6,
         'xtu': 1,
         'aub': 1,
         'aue': 2,
         'aud': 1,
         'aug': 14,
         'rek': 3,
         '(jo': 3,
         'ali': 22,
         'aum': 3,
         'aul': 5,
         'aun': 4,
         'xte': 2,
         'aur': 9,
         ' ta': 10,
         'alg': 1,
         '"an': 3,
         'sca': 3,
         'rem': 3,
         'fut': 2,
         'ble': 4,
         ' (b': 4,
         'ava': 2,
         'l/c': 1,
         'k, ': 26,
         'cmi': 1,
         'upi': 1,
         ' (g': 1,
         'au,': 1,
         'cma': 1,
         ' (f': 4,
         'lst': 1,
         'ree': 6,
         ' ka': 32,
         'me ': 2,
         ' ke': 9,
         ' kh': 1,
         ' ki': 9,
         'me)': 1,
         ' ko': 3,
         ' kl': 3,
         ' kr': 5,
         ' kv': 1,
         ' ku': 1,
         'alt': 8,
         'nen': 17,
         'nel': 10,
         'oze': 1,
         'nef': 2,
    