# Apply, applymap & map
Referencia: https://towardsdatascience.com/introduction-to-pandas-apply-applymap-and-map-5d3e044e93ff

In [3]:
import numpy as np
import pandas as pd

## Series methods

### Map

Map values of Series according to input correspondence.

Used for substituting each value in a Series with another value, that may be derived from a function, a dict or a Series.

**When arg is a dictionary, values in Series that are not in the dictionary (as keys) are converted to NaN.**

In [4]:
s = pd.Series(['cat', 'dog', np.nan, 'rabbit'])
s

0       cat
1       dog
2       NaN
3    rabbit
dtype: object

In [5]:
dictionary = {"cat": "kitten", "dog": "puppy"}
s.map(dictionary)

0    kitten
1     puppy
2       NaN
3       NaN
dtype: object

# format

In [4]:
a = "Dani"
b = "Madrid"
c = 15

In [5]:
print("{0} vive en {1}.\nThe Bridge está en el número {2}.".format(a,b,c))

Dani vive en Madrid.
The Bridge está en el número 15.


In [6]:
lista = ['Gabriel', 'Clara', 'Borja', "Mónica"]
lista_ciudades = ['Madrid', 'Paris']
lista_numeros = range(3)

In [7]:
for nombre in lista:
    for ciudad in lista_ciudades:
        for numero in lista_numeros:
            print("{0} vive en {1}.\nThe Bridge está en el número {2}.".format(nombre, ciudad, numero))
            break

Gabriel vive en Madrid.
The Bridge está en el número 0.
Gabriel vive en Paris.
The Bridge está en el número 0.
Clara vive en Madrid.
The Bridge está en el número 0.
Clara vive en Paris.
The Bridge está en el número 0.
Borja vive en Madrid.
The Bridge está en el número 0.
Borja vive en Paris.
The Bridge está en el número 0.
Mónica vive en Madrid.
The Bridge está en el número 0.
Mónica vive en Paris.
The Bridge está en el número 0.


------------------------------------------------------------------

In [8]:
s.map('My favourite animal is {}'.format)

0       My favourite animal is cat
1       My favourite animal is dog
2       My favourite animal is nan
3    My favourite animal is rabbit
dtype: object

In [9]:
s.map('My favourite animal is {}'.format, na_action='ignore')

0       My favourite animal is cat
1       My favourite animal is dog
2                              NaN
3    My favourite animal is rabbit
dtype: object

In [10]:
#dataframe example

In [6]:
data = pd.DataFrame({'food': ['bacon', 'pulled pork', 'bacon',
                              'Pastrami', 'corned beef', 'Bacon',
                              'pastrami', 'honey ham', 'nova lox'],
                     'ounces': [4, 3, 12, 6, 7.5, 8, 3, 5, 6]})

In [12]:
meat_to_animal = {
  'bacon': 'pig',
  'pulled pork': 'pig',
  'pastrami': 'cow',
  'corned beef': 'cow',
  'honey ham': 'pig',
  'nova lox': 'salmon'
}

In [13]:
data['food']

0          bacon
1    pulled pork
2          bacon
3       Pastrami
4    corned beef
5          Bacon
6       pastrami
7      honey ham
8       nova lox
Name: food, dtype: object

In [14]:
# en dos pasos

In [15]:
lowercased = data['food'].str.lower()
lowercased

0          bacon
1    pulled pork
2          bacon
3       pastrami
4    corned beef
5          bacon
6       pastrami
7      honey ham
8       nova lox
Name: food, dtype: object

In [16]:
data['animal'] = lowercased.map(meat_to_animal)

In [17]:
# en un paso
data['animal2'] = data['food'].map(lambda x: meat_to_animal[x.lower()])

In [18]:
data

Unnamed: 0,food,ounces,animal,animal2
0,bacon,4.0,pig,pig
1,pulled pork,3.0,pig,pig
2,bacon,12.0,pig,pig
3,Pastrami,6.0,cow,cow
4,corned beef,7.5,cow,cow
5,Bacon,8.0,pig,pig
6,pastrami,3.0,cow,cow
7,honey ham,5.0,pig,pig
8,nova lox,6.0,salmon,salmon


### Str
Vectorized string functions for Series and Index.

https://towardsdatascience.com/mastering-string-methods-in-pandas-8d3cd00b720d

- [`str.contains`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.str.contains.html)
- [`str.startswith`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.str.startswith.html#pandas.Series.str.startswith)

In [19]:
print([i for i in dir(pd.Series.str) if not i.startswith("_")])

['capitalize', 'casefold', 'cat', 'center', 'contains', 'count', 'decode', 'encode', 'endswith', 'extract', 'extractall', 'find', 'findall', 'fullmatch', 'get', 'get_dummies', 'index', 'isalnum', 'isalpha', 'isdecimal', 'isdigit', 'islower', 'isnumeric', 'isspace', 'istitle', 'isupper', 'join', 'len', 'ljust', 'lower', 'lstrip', 'match', 'normalize', 'pad', 'partition', 'repeat', 'replace', 'rfind', 'rindex', 'rjust', 'rpartition', 'rsplit', 'rstrip', 'slice', 'slice_replace', 'split', 'startswith', 'strip', 'swapcase', 'title', 'translate', 'upper', 'wrap', 'zfill']


In [20]:
data.food.str[:-1]

0          baco
1    pulled por
2          baco
3       Pastram
4    corned bee
5          Baco
6       pastram
7      honey ha
8       nova lo
Name: food, dtype: object

### Apply

Invoke function on values of Series.

Can be ufunc (a NumPy function that applies to the entire Series) or a Python function that only works on single values.

In [21]:
chipo = pd.read_csv("https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv", sep='\t')

In [22]:
chipo.columns

Index(['order_id', 'quantity', 'item_name', 'choice_description',
       'item_price'],
      dtype='object')

In [23]:
chipo['item_price'].str[1:].astype(float)

0        2.39
1        3.39
2        3.39
3        2.39
4       16.98
        ...  
4617    11.75
4618    11.75
4619    11.25
4620     8.75
4621     8.75
Name: item_price, Length: 4622, dtype: float64

In [24]:
chipo.item_price.apply(lambda x: float(x[1:]))

0        2.39
1        3.39
2        3.39
3        2.39
4       16.98
        ...  
4617    11.75
4618    11.75
4619    11.25
4620     8.75
4621     8.75
Name: item_price, Length: 4622, dtype: float64

## Dataframe methods

### Apply

Apply a function along an axis of the DataFrame.

Objects passed to the function are Series objects whose index is either the DataFrame’s index (axis=0) or the DataFrame’s columns (axis=1). By default (result_type=None), the final return type is inferred from the return type of the applied function. Otherwise, it depends on the result_type argument.

**axis{0 or ‘index’, 1 or ‘columns’}, default 0**

The second parameter axis is to specify which axis the function is applied to. 0 for applying the function to each column and 1 for applying the function to each row.

In [7]:
df = pd.DataFrame({"A":[1,2,3,4],
                  "B": [10,20,30,40],
                  "C": [20,40,60,80]},
                 index = ["Row 1", "Row 2", "Row 3", "Row 4"])
df

Unnamed: 0,A,B,C
Row 1,1,10,20
Row 2,2,20,40
Row 3,3,30,60
Row 4,4,40,80


In [8]:
def suma_serie(row):
    return row.sum()

In [9]:
df.apply(suma_serie, axis = 0) # en vertical

A     10
B    100
C    200
dtype: int64

In [10]:
df.apply(suma_serie, axis = 1) # en horizontal

Row 1     31
Row 2     62
Row 3     93
Row 4    124
dtype: int64

In [11]:
df['D'] = df.apply(suma_serie, axis = 1)
df

Unnamed: 0,A,B,C,D
Row 1,1,10,20,31
Row 2,2,20,40,62
Row 3,3,30,60,93
Row 4,4,40,80,124


In [30]:
df['A']

Row 1    1
Row 2    2
Row 3    3
Row 4    4
Name: A, dtype: int64

In [12]:
df.apply(lambda x: x.mean(), axis = 0)

A     2.5
B    25.0
C    50.0
D    77.5
dtype: float64

In [13]:
df['E'] = df.apply(lambda x: x.mean(), axis = 1)

In [33]:
df

Unnamed: 0,A,B,C,D,E
Row 1,1,10,20,31,15.5
Row 2,2,20,40,62,31.0
Row 3,3,30,60,93,46.5
Row 4,4,40,80,124,62.0


In [34]:
#la operación se aplica a toda la row
df.apply(lambda x: x**2, axis = 1)

Unnamed: 0,A,B,C,D,E
Row 1,1.0,100.0,400.0,961.0,240.25
Row 2,4.0,400.0,1600.0,3844.0,961.0
Row 3,9.0,900.0,3600.0,8649.0,2162.25
Row 4,16.0,1600.0,6400.0,15376.0,3844.0


In [35]:
serie_cualquiera = pd.Series([1, 10,20,31,15.5])

In [36]:
serie_cualquiera**2

0      1.00
1    100.00
2    400.00
3    961.00
4    240.25
dtype: float64

In [37]:
serie_cualquiera.mean()

15.5

In [38]:
suma_serie(serie_cualquiera)

77.5

In [39]:
df.apply(suma_serie, axis = 0)

A     10.0
B    100.0
C    200.0
D    310.0
E    155.0
dtype: float64

In [40]:
#crear otra row
df.loc['Row nueva'] = df.apply(suma_serie, axis = 0)

In [41]:
df

Unnamed: 0,A,B,C,D,E
Row 1,1.0,10.0,20.0,31.0,15.5
Row 2,2.0,20.0,40.0,62.0,31.0
Row 3,3.0,30.0,60.0,93.0,46.5
Row 4,4.0,40.0,80.0,124.0,62.0
Row nueva,10.0,100.0,200.0,310.0,155.0


In [42]:
# por Series
df['J'] = df['C'].apply(lambda x: x*2)

In [43]:
df

Unnamed: 0,A,B,C,D,E,J
Row 1,1.0,10.0,20.0,31.0,15.5,40.0
Row 2,2.0,20.0,40.0,62.0,31.0,80.0
Row 3,3.0,30.0,60.0,93.0,46.5,120.0
Row 4,4.0,40.0,80.0,124.0,62.0,160.0
Row nueva,10.0,100.0,200.0,310.0,155.0,400.0


### Applymap
Apply a function to a Dataframe elementwise.

This method applies a function that accepts and returns a scalar to every element of a DataFrame.

In [44]:
df.loc['Row 1'].map(np.sum)

A     1.0
B    10.0
C    20.0
D    31.0
E    15.5
J    40.0
Name: Row 1, dtype: float64

In [45]:
df.applymap(np.sum)

Unnamed: 0,A,B,C,D,E,J
Row 1,1.0,10.0,20.0,31.0,15.5,40.0
Row 2,2.0,20.0,40.0,62.0,31.0,80.0
Row 3,3.0,30.0,60.0,93.0,46.5,120.0
Row 4,4.0,40.0,80.0,124.0,62.0,160.0
Row nueva,10.0,100.0,200.0,310.0,155.0,400.0


In [46]:
data.dtypes

food        object
ounces     float64
animal      object
animal2     object
dtype: object

In [47]:
data.head()

Unnamed: 0,food,ounces,animal,animal2
0,bacon,4.0,pig,pig
1,pulled pork,3.0,pig,pig
2,bacon,12.0,pig,pig
3,Pastrami,6.0,cow,cow
4,corned beef,7.5,cow,cow


In [None]:
# Cuidado con aplicar la misma función a todas las celdas incluyendo aquellas columnas con tipos incompatibles para la función
# ejemplo: len() a una celda de la columna "ounces" de tipo float

In [49]:
#solucion 1
data.select_dtypes(exclude=['float64']).applymap(lambda x: len(x))

Unnamed: 0,food,animal,animal2
0,5,3,3
1,11,3,3
2,5,3,3
3,8,3,3
4,11,3,3
5,5,3,3
6,8,3,3
7,9,3,3
8,8,6,6


In [50]:
#solucion 2
data.drop("ounces", axis = 1).applymap(lambda x: len(x))

Unnamed: 0,food,animal,animal2
0,5,3,3
1,11,3,3
2,5,3,3
3,8,3,3
4,11,3,3
5,5,3,3
6,8,3,3
7,9,3,3
8,8,6,6


In [51]:
#solucion3
data.applymap(lambda x: len(str(x)))

Unnamed: 0,food,ounces,animal,animal2
0,5,3,3,3
1,11,3,3,3
2,5,4,3,3
3,8,3,3,3
4,11,3,3,3
5,5,3,3,3
6,8,3,3,3
7,9,3,3,3
8,8,3,6,6
