In [1]:
import numpy as np

In [2]:
x = np.array([2, 3, 5, 7, 11, 13])
x * 2

array([ 4,  6, 10, 14, 22, 26])

In [3]:
data = ['peter', 'Paul', 'MARY', 'gUIDO']
[s.capitalize() for s in data]

['Peter', 'Paul', 'Mary', 'Guido']

In [4]:
data = ['peter', 'Paul', None, 'MARY', 'gUIDO']
[s.capitalize() for s in data]

AttributeError: 'NoneType' object has no attribute 'capitalize'

In [20]:
i = 0
cap = []
for s in data:
    if s is not None:
        cap.append(s.capitalize())
        i+=1

print(cap)

['Peter', 'Paul', 'Mary', 'Guido']


In [21]:
import pandas as pd
names = pd.Series(data)
names

0    peter
1     Paul
2     None
3     MARY
4    gUIDO
dtype: object

In [22]:
names.str.capitalize()

0    Peter
1     Paul
2     None
3     Mary
4    Guido
dtype: object

In [23]:
monte = pd.Series(['Graham Chapman', 'John Cleese', 'Terry Gilliam',
                   'Eric Idle', 'Terry Jones', 'Michael Palin'])

In [24]:
monte.str.lower()

0    graham chapman
1       john cleese
2     terry gilliam
3         eric idle
4       terry jones
5     michael palin
dtype: object

In [25]:
monte.str.len()

0    14
1    11
2    13
3     9
4    11
5    13
dtype: int64

In [26]:
monte.str.startswith('T')

0    False
1    False
2     True
3    False
4     True
5    False
dtype: bool

In [30]:
monte.str.split()

0    [Graham, Chapman]
1       [John, Cleese]
2     [Terry, Gilliam]
3         [Eric, Idle]
4       [Terry, Jones]
5     [Michael, Palin]
dtype: object

In [32]:
monte.str.findall(r'^[^AEIOU].*[^aeiou]$')

0    [Graham Chapman]
1                  []
2     [Terry Gilliam]
3                  []
4       [Terry Jones]
5     [Michael Palin]
dtype: object

In [31]:
monte.str.extract('([A-Za-z]+)')

Unnamed: 0,0
0,Graham
1,John
2,Terry
3,Eric
4,Terry
5,Michael


In [33]:
";".join(monte)

'Graham Chapman;John Cleese;Terry Gilliam;Eric Idle;Terry Jones;Michael Palin'

In [34]:
monte.str.split().str.get(-1)

0    Chapman
1     Cleese
2    Gilliam
3       Idle
4      Jones
5      Palin
dtype: object

0    Chapman
1     Cleese
2    Gilliam
3       Idle
4      Jones
5      Palin
dtype: object

In [39]:
full_monte = pd.DataFrame({'name': monte,
                           'info': ['B|C|D', 'B|D', 'A|C', 'B|D', 'B|C',
                           'B|C|D']})
full_monte

Unnamed: 0,name,info
0,Graham Chapman,B|C|D
1,John Cleese,B|D
2,Terry Gilliam,A|C
3,Eric Idle,B|D
4,Terry Jones,B|C
5,Michael Palin,B|C|D


In [40]:
full_monte['info'].str.get_dummies('|')

Unnamed: 0,A,B,C,D
0,0,1,1,1
1,0,1,0,1
2,1,0,1,0
3,0,1,0,1
4,0,1,1,0
5,0,1,1,1


In [44]:
try:
    recipes = pd.read_json('openrecipes.json')
except ValueError as e:
    print("ValueError:", e)

ValueError: Trailing data


In [52]:
# Читаем весь файл в массив Python
with open('openrecipes.json', 'r') as f:
    # Извлекаем каждую строку
    data = (line.strip() for line in f)
    # Преобразуем так, чтобы каждая строка была элементом списка
    data_json = "[{0}]".format(','.join(data))
# Читаем результат в виде JSON
recipes = pd.read_json(data_json)

In [55]:
recipes.iloc[0]

cookTime                                                        PT
datePublished                                           2013-04-01
description      Got leftover Easter eggs?    Got leftover East...
image            http://static.thepioneerwoman.com/cooking/file...
ingredients      12 whole Hard Boiled Eggs\n1/2 cup Mayonnaise\...
name                                      Easter Leftover Sandwich
prepTime                                                     PT15M
recipeYield                                                      8
url              http://thepioneerwoman.com/cooking/2013/04/eas...
Name: 0, dtype: object

In [56]:
recipes.ingredients.str.len().describe()

count    1042.000000
mean      358.645873
std       187.332133
min        22.000000
25%       246.250000
50%       338.000000
75%       440.000000
max      3160.000000
Name: ingredients, dtype: float64

In [59]:
recipes.name[np.argmax(recipes.ingredients.str.len())]

'A Nice Berry Pie'

In [60]:
recipes.description.str.contains('[Bb]reakfast').sum()

11

In [61]:
recipes.ingredients.str.contains('[Cc]innamon').sum()

79

In [62]:
recipes.ingredients.str.contains('[Cc]inamon').sum()

0

In [63]:
spice_list = ['salt', 'pepper', 'oregano', 'sage', 'parsley',
              'rosemary', 'tarragon', 'thyme', 'paprika', 'cumin']

In [66]:
import re
spice_df = pd.DataFrame(dict((spice, recipes.ingredients.str.contains(spice, re.IGNORECASE)) for spice in spice_list))
spice_df.head(200)

Unnamed: 0,salt,pepper,oregano,sage,parsley,rosemary,tarragon,thyme,paprika,cumin
0,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False
5,False,False,False,False,False,False,False,False,False,False
6,False,False,False,False,False,False,False,False,False,False
7,False,False,False,False,False,False,False,False,False,False
8,True,False,False,False,False,False,False,False,False,False
9,False,False,False,False,False,False,False,False,False,False


In [67]:
selection = spice_df.query('parsley & paprika & tarragon')
len(selection)

0