## Applying functions on a dataframe

### Apply function on a certain column

* Using a lambda function

In [1]:
import pandas as pd

d = {'col1': [1, 2, 3], 'col2': [3, 4, 5]}
df = pd.DataFrame(data=d)

df['col1'] = df['col1'].apply(lambda x: x + 1)
print(df)

   col1  col2
0     2     3
1     3     4
2     4     5


* Conditional logic on a dataframe

In [2]:
df["logic"] = df["col1"] > 3
print(df)

   col1  col2  logic
0     2     3  False
1     3     4  False
2     4     5   True


* Applying a created function on a dataframe

In [3]:
# create a function called times100
def times100(x):
    # that, if x is a string,
    if type(x) is str:
        # just returns it untouched
        return x
    # but, if not, return it multiplied by 100
    elif x:
        return 100 * x
    # and leave everything else
    else:
        return
    
df.applymap(times100)

Unnamed: 0,col1,col2,logic
0,200,300,
1,300,400,
2,400,500,100.0


Now, apply the times100 function on a single column

In [4]:
df['col1'].apply(times100)

0    200
1    300
2    400
Name: col1, dtype: int64

* Apply a function to every row in a dataframe

In [5]:
# Create a dataframe from a list of dictionaries
rectangles = [
    { 'height': 40, 'width': 10 },
    { 'height': 20, 'width': 9 },
    { 'height': 3.4, 'width': 4 }
]

rectangles_df = pd.DataFrame(rectangles)

In [6]:
# Use the height and width to calculate the area
def calculate_area(row):
    return row['height'] * row['width']

rectangles_df.apply(calculate_area, axis=1)

0    400.0
1    180.0
2     13.6
dtype: float64

## Basic stats on a Dataframe

In [7]:
import pandas as pd

df = pd.DataFrame({'categorical': pd.Categorical(['d','e','f']),
                       'numericA': [1, 2, 3],
                       'numericB' : [2, 4, 5],
                       'object': ['a', 'b', 'c']})
df.describe()

Unnamed: 0,numericA,numericB
count,3.0,3.0
mean,2.0,3.666667
std,1.0,1.527525
min,1.0,2.0
25%,1.5,3.0
50%,2.0,4.0
75%,2.5,4.5
max,3.0,5.0


### Calculate MEAN (AVERAGE) of each column in a dataframe

In [8]:
df.mean(axis=0)

numericA    2.000000
numericB    3.666667
dtype: float64

### Calculate MEAN (AVERAGE) of each row in a dataframe

In [22]:
df.mean(axis=1)

0    1.5
1    3.0
2    4.0
dtype: float64