### Pandas DataFrame.apply() Allows the user to pass a function and apply it to every single value of the Pandas series.

In [1]:
import pandas as pd
# Create dataframe
data = pd.DataFrame({'Company':['GOOG','GOOG','MSFT','MSFT','FB','FB'],
       'Person':['Sam','Charlie','Amy','Vanessa','Carl','Sarah'],
       'Sales':[200,120,340,124,243,350]})
data


Unnamed: 0,Company,Person,Sales
0,GOOG,Sam,200
1,GOOG,Charlie,120
2,MSFT,Amy,340
3,MSFT,Vanessa,124
4,FB,Carl,243
5,FB,Sarah,350


In [2]:
# I need to create the new column which contains the length of company

In [3]:
l=[]
for i in data["Company"]:
    l.append(len(i))
data["Company_length"]=pd.Series(l)
print(data)
data.dtypes

  Company   Person  Sales  Company_length
0    GOOG      Sam    200               4
1    GOOG  Charlie    120               4
2    MSFT      Amy    340               4
3    MSFT  Vanessa    124               4
4      FB     Carl    243               2
5      FB    Sarah    350               2


Company           object
Person            object
Sales              int64
Company_length     int64
dtype: object

#### or

In [4]:
###using apply(): Create a new column(comp_length) whcih contains company length
data["comp_length"]=data["Company"].apply(lambda x:len(x))  # yha pas series bna dega automatically 
data

Unnamed: 0,Company,Person,Sales,Company_length,comp_length
0,GOOG,Sam,200,4,4
1,GOOG,Charlie,120,4,4
2,MSFT,Amy,340,4,4
3,MSFT,Vanessa,124,4,4
4,FB,Carl,243,2,2
5,FB,Sarah,350,2,2


In [5]:
# create a new column contains log sales value  
import numpy as np
data["log_sales"]=data["Sales"].apply(lambda x:np.log(x))
data

Unnamed: 0,Company,Person,Sales,Company_length,comp_length,log_sales
0,GOOG,Sam,200,4,4,5.298317
1,GOOG,Charlie,120,4,4,4.787492
2,MSFT,Amy,340,4,4,5.828946
3,MSFT,Vanessa,124,4,4,4.820282
4,FB,Carl,243,2,2,5.493061
5,FB,Sarah,350,2,2,5.857933


In [14]:
#usage of apply()
info = pd.DataFrame([[2, 7]] * 4, columns=['P', 'Q']) 
print("datafarme 1\n\n",info)
sqrt=info.apply(np.sqrt,axis=1)  
print("sqrt on each of the rows as axis=1 columnwise\n\n",sqrt)
aggregate_sum=info.apply(np.sum, axis=0) 
print("aggregate on axis=1 columnwise\n\n",aggregate_sum)
aggregate_sum1=info.apply(np.sum, axis=1) 



datafarme 1

    P  Q
0  2  7
1  2  7
2  2  7
3  2  7
sqrt on each of the rows as axis=1 columnwise

           P         Q
0  1.414214  2.645751
1  1.414214  2.645751
2  1.414214  2.645751
3  1.414214  2.645751
aggregate on axis=1 columnwise

 P     8
Q    28
dtype: int64


In [15]:
#usage of apply()
info = pd.DataFrame([[2, 7]] * 4, columns=['P', 'Q']) 
print("datafarme 1\n\n",info)
df2=info.apply(lambda x: [1, 2], axis=1, result_type='expand')   #'expand': It defines the list-like results that will be turned into columns.
                                                                #'reduce': It is the opposite of 'expand'. If possible, it returns a Series \
                                                                 #rather than expanding list-like results.
print(df2)
df3=info.apply(lambda x: [1, 2], axis=1) 
print(df3)


datafarme 1

    P  Q
0  2  7
1  2  7
2  2  7
3  2  7
   0  1
0  1  2
1  1  2
2  1  2
3  1  2
0    [1, 2]
1    [1, 2]
2    [1, 2]
3    [1, 2]
dtype: object


In [16]:
#usage of apply(): broadcast': It broadcast the results to the original shape of the DataFrame, the original index, and the columns will be retained.
##The default value None depends on the return value of the applied function , i.e., list-like results returned as a Series of those.
#If apply returns a Series, it expands to the columns.
info = pd.DataFrame([[2, 7]] * 4, columns=['P', 'Q']) 
print("datafarme 1\n\n",info)
df=info.apply(lambda x: pd.Series([1, 2], index=['foo', 'bar']), axis=1)  
df1=info.apply(lambda x: [1, 2], axis=1, result_type='broadcast')  
df1 

datafarme 1

    P  Q
0  2  7
1  2  7
2  2  7
3  2  7


Unnamed: 0,P,Q
0,1,2
1,1,2
2,1,2
3,1,2
