## map() function returns a map object of the results after applying the given 
### function to each item of a given iterable (list, tuple etc.)
 
### Syntax : map(fun, iter)


In [1]:
def addition(n):
    return n + n
  
# We double all numbers using map()
numbers = (1, 2, 3, 4)
result = map(addition, numbers)
print(list(result))

[2, 4, 6, 8]


## Series.map(arg, na_action=None)[source]
## Parameters arg:function, collections.abc.Mapping subclass or Series Mapping correspondence. na_action: {None, ‘ignore’}, default None If ‘ignore’, propagate NaN values, without passing them to the mapping correspondence.

## In computing, NaN, standing for Not a Number, is a member of a numeric data type that can be interpreted as a value that is undefined or unrepresentable.

## Returns: Series-Same index as caller.


In [2]:
import pandas as pd
import numpy as np
s = pd.Series(['cat', 'dog', np.nan, 'rabbit'])
s

0       cat
1       dog
2       NaN
3    rabbit
dtype: object

In [3]:
s.map({'cat': 'kitten', 'dog': 'puppy'})

0    kitten
1     puppy
2       NaN
3       NaN
dtype: object

In [4]:
s.map({'cat': 'kitten', 'dog': 'puppy', 'rabbit':'bunny'})

0    kitten
1     puppy
2       NaN
3     bunny
dtype: object

In [5]:
s.map('I am a {}'.format)

0       I am a cat
1       I am a dog
2       I am a nan
3    I am a rabbit
dtype: object

In [6]:
s.map('I am a {}'.format, na_action='ignore')

0       I am a cat
1       I am a dog
2              NaN
3    I am a rabbit
dtype: object

## Python Lambda Functions are anonymous function means that the function is without a name. As we already know that the def keyword is used to define a normal function in Python. Similarly, the lambda keyword is used to define an anonymous function in Python. lambda functions are syntactically restricted to a single expression.

In [7]:
numbers = (1, 2, 3, 4)
result = map(lambda n: n + n, numbers)
print(list(result))
[2, 4, 6, 8]
x = lambda a, b, c : a + b + c
print(x(1, 2, 3))

[2, 4, 6, 8]
6


In [8]:
n1 = [1, 2, 3]
n2 = [4, 5, 6]
  
result = map(lambda x, y: x + y, n1, n2)
print(list(result))

[5, 7, 9]


In [9]:
import pandas as pd
  
# creating and initializing a list
values= [['Rohan',455],['Elvish',250],['Deepak',495],
         ['Soni',400],['Radhika',350],['Vansh',450]]
 
# creating a pandas dataframe
df = pd.DataFrame(values,columns=['Name','Total_Marks'])
 
# Applying lambda function to find
# percentage of 'Total_Marks' column
# using df.assign()
df = df.assign(percentage = lambda x: (x['Total_Marks'] /500 * 100))
 
# displaying the data frame
df


Unnamed: 0,Name,Total_Marks,percentage
0,Rohan,455,91.0
1,Elvish,250,50.0
2,Deepak,495,99.0
3,Soni,400,80.0
4,Radhika,350,70.0
5,Vansh,450,90.0


In [10]:
import pandas as pd
import numpy as np
 
# creating and initializing a nested list
values_list = [[15, 2.5, 100], [20, 4.5, 50], [25, 5.2, 80],
               [45, 5.8, 48], [40, 6.3, 70], [41, 6.4, 90],
               [51, 2.3, 111]]
 
# creating a pandas dataframe
df = pd.DataFrame(values_list, columns=['Field_1', 'Field_2', 'Field_3'],
                  index=['a', 'b', 'c', 'd', 'e', 'f', 'g'])
print(df)
 
# Apply function numpy.square() to square
# the values of 3 rows only i.e. with row
# index name 'a', 'e' and 'g' only
df = df.apply(lambda x: np.square(x) if x.name in [
              'a', 'e', 'g'] else x, axis=1)
 
# printing dataframe
df

   Field_1  Field_2  Field_3
a       15      2.5      100
b       20      4.5       50
c       25      5.2       80
d       45      5.8       48
e       40      6.3       70
f       41      6.4       90
g       51      2.3      111


Unnamed: 0,Field_1,Field_2,Field_3
a,225.0,6.25,10000.0
b,20.0,4.5,50.0
c,25.0,5.2,80.0
d,45.0,5.8,48.0
e,1600.0,39.69,4900.0
f,41.0,6.4,90.0
g,2601.0,5.29,12321.0


# filter() in python


### The filter() method filters the given sequence with the help of a function that tests each element in the sequence to be true or not

### Syntax: DataFrame.filter(items=None, like=None, regex=None, axis=None)


In [11]:
#Parameters:
#items : List of info axis to restrict to (must not all be present)
#like : Keep info axis where “arg in col == True”
#regex : Keep info axis with re.search(regex, col) == True
#axis : The axis to filter on. By default this is the info axis, ‘index’ for Series, ‘columns’ for DataFrame

In [12]:
# a list contains both even and odd numbers. 
seq = [0, 1, 2, 3, 5, 8, 13]
  
# result contains odd numbers of the list
result = filter(lambda x: x % 2 != 0, seq)
print(list(result))
  
# result contains even numbers of the list
result = filter(lambda x: x % 2 == 0, seq)
print(list(result))
[1, 3, 5, 13]
[0, 2, 8]
# importing pandas as pd
import pandas as pd
  
# Creating the dataframe 
df = pd.read_csv("nba.csv")
  
# Print the dataframe"=
df

[1, 3, 5, 13]
[0, 2, 8]


Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
...,...,...,...,...,...,...,...,...,...
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0


In [13]:
# applying filter function 
df.filter(["Name", "College", "Salary"])

Unnamed: 0,Name,College,Salary
0,Avery Bradley,Texas,7730337.0
1,Jae Crowder,Marquette,6796117.0
2,John Holland,Boston University,
3,R.J. Hunter,Georgia State,1148640.0
4,Jonas Jerebko,,5000000.0
...,...,...,...
453,Shelvin Mack,Butler,2433333.0
454,Raul Neto,,900000.0
455,Tibor Pleiss,,2900000.0
456,Jeff Withey,Kansas,947276.0


In [14]:
# importing pandas as pd
import pandas as pd
  
# Creating the dataframe 
df = pd.read_csv("nba.csv")

df.filter(regex ='[tT]')

Unnamed: 0,Team,Position,Height,Weight
0,Boston Celtics,PG,6-2,180.0
1,Boston Celtics,SF,6-6,235.0
2,Boston Celtics,SG,6-5,205.0
3,Boston Celtics,SG,6-5,185.0
4,Boston Celtics,PF,6-10,231.0
...,...,...,...,...
453,Utah Jazz,PG,6-3,203.0
454,Utah Jazz,PG,6-1,179.0
455,Utah Jazz,C,7-3,256.0
456,Utah Jazz,C,7-0,231.0


# Reduce FUNCTION

### The reduce(fun,seq) function is used to apply a particular function passed in its argument to all of the list elements mentioned in the sequence passed along.This function is defined in “functools” module.

In [15]:
# python code to demonstrate working of reduce()
# using operator functions
 
# importing functools for reduce()
import functools as ft
 
# importing operator for operator functions
import operator as op
 
# initializing list
lis = [1, 3, 5, 6, 2]
 
# using reduce to compute sum of list
# using operator functions
print("The sum of the list elements is : ", end="")
print(ft.reduce(op.add, lis))
 
# using reduce to compute product
# using operator functions
print("The product of list elements is : ", end="")
print(ft.reduce(op.mul, lis))

The sum of the list elements is : 17
The product of list elements is : 180


# Arithmetic function Application Mapping

In [16]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
srs1 = Series([1,9,-4,3,3])
srs2 = Series([2,3,4,5,10], index=[0,1,2,3,5])

In [17]:
print(srs1)
print(srs2)

0    1
1    9
2   -4
3    3
4    3
dtype: int64
0     2
1     3
2     4
3     5
5    10
dtype: int64


In [18]:
srs1 + srs2
srs1 * srs2

0     2.0
1    27.0
2   -16.0
3    15.0
4     NaN
5     NaN
dtype: float64

In [19]:
srs1 ** srs2
#srs1 > srs2

0      1.0
1    729.0
2    256.0
3    243.0
4      NaN
5      NaN
dtype: float64

In [20]:
srs1 > Series([1,2,3,4,5])

0    False
1     True
2    False
3    False
4    False
dtype: bool

In [22]:
#0srs1>srs2

In [23]:
np.sqrt(srs2)

0    1.414214
1    1.732051
2    2.000000
3    2.236068
5    3.162278
dtype: float64

In [24]:
np.abs(srs1)

0    1
1    9
2    4
3    3
4    3
dtype: int64

In [25]:
type(np.abs(srs1))

pandas.core.series.Series

In [26]:
@np.vectorize
def trunc(x):
    return x if x > 0 else 0

trunc(np.array([-1,5,4,-3,0]))

array([0, 5, 4, 0, 0])

In [27]:
trunc(srs1)

array([1, 9, 0, 3, 3], dtype=int64)

In [28]:
# Python type() is a built-in function that is used to return the type of 
# data stored in the objects or variables in the program.

type(trunc(srs1))

numpy.ndarray

In [29]:
# Series Methods
srs1.mean()

9

In [30]:
srs1.std()

4.669047011971501

In [31]:
srs1.max()

9

In [32]:
# The numpy. argmax() function returns indices of the max element of the 
# array in a particular axis.
srs1.argmax()

1

In [33]:
srs1.cumsum()

0     1
1    10
2     6
3     9
4    12
dtype: int64

In [34]:
srs1.abs()

0    1
1    9
2    4
3    3
4    3
dtype: int64

# function Application and Mapping

In [35]:
srs1.apply(lambda x: x if x > 2 else 2)

0    2
1    9
2    2
3    3
4    3
dtype: int64

In [36]:
srs3 = Series(['alpha','beta','gama','delta'], index=['a','b','c','d'])
print(srs3)


a    alpha
b     beta
c     gama
d    delta
dtype: object


In [37]:
obj = {"alpha":1, "beta":2, "gama":-1, "delta":-3}
srs3.map(obj)

a    1
b    2
c   -1
d   -3
dtype: int64

In [38]:
srs4 = Series(obj)
print(srs4)

alpha    1
beta     2
gama    -1
delta   -3
dtype: int64


In [39]:
srs3.map(srs4)

a    1
b    2
c   -1
d   -3
dtype: int64

In [40]:
# union of the series
union = pd.Series(np.union1d(srs1, srs2))
union

0    -4
1     1
2     2
3     3
4     4
5     5
6     9
7    10
dtype: int64

In [41]:
srs1.map(lambda  x: x if x > 2 else 2)

0    2
1    9
2    2
3    3
4    3
dtype: int64

In [42]:
#Create two series as shown using pd.series() function.
#Series_A = [1,2,3,4,5] Series_B = [4,5,6,7,8]
#Get the items not common to both.

In [43]:
import pandas as pd 
import numpy as np
  
# create the series 
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])
print(ser1,"\n",ser2)

0    1
1    2
2    3
3    4
4    5
dtype: int64 
 0    4
1    5
2    6
3    7
4    8
dtype: int64


In [44]:
# union of the series
union = pd.Series(np.union1d(ser1, ser2))
union

0    1
1    2
2    3
3    4
4    5
5    6
6    7
7    8
dtype: int64

In [45]:
# intersection of the series
intersect = pd.Series(np.intersect1d(ser1, ser2))
intersect

0    4
1    5
dtype: int64

In [46]:
# uncommon elements in both the series 
notcommonseries = union[~union.isin(intersect)]
print(notcommonseries)

0    1
1    2
2    3
5    6
6    7
7    8
dtype: int64


In [47]:
import pandas as pd
df = pd.DataFrame([[19, 14, 18, 19],
                   [18, 10, 17, 16],
                   [17, 16, 18, 15]],
                  columns=['C',  'DS', 
                           'C++', 'Java'])
df

Unnamed: 0,C,DS,C++,Java
0,19,14,18,19
1,18,10,17,16
2,17,16,18,15


In [48]:
df.describe()

Unnamed: 0,C,DS,C++,Java
count,3.0,3.0,3.0,3.0
mean,18.0,13.333333,17.666667,16.666667
std,1.0,3.05505,0.57735,2.081666
min,17.0,10.0,17.0,15.0
25%,17.5,12.0,17.5,15.5
50%,18.0,14.0,18.0,16.0
75%,18.5,15.0,18.0,17.5
max,19.0,16.0,18.0,19.0


In [49]:
df.sum()

C       54
DS      40
C++     53
Java    50
dtype: int64

In [50]:
df.max()

C       19
DS      16
C++     18
Java    19
dtype: int64

In [51]:
df.min()

C       17
DS      10
C++     17
Java    15
dtype: int64

In [52]:
df.mean()

C       18.000000
DS      13.333333
C++     17.666667
Java    16.666667
dtype: float64

In [53]:
df.count()

C       3
DS      3
C++     3
Java    3
dtype: int64

In [54]:
df.agg(['sum', 'min', 'max'])

Unnamed: 0,C,DS,C++,Java
sum,54,40,53,50
min,17,10,17,15
max,19,16,18,19


In [55]:
# Groupping

from pandas import DataFrame
df1 = DataFrame(
                [
                    ['Aryan','Singh','Full-time','adminstration',2,20000],
                    ['Rohan','Agarawal','intern','Technical',3,5000],
                    ['Rjya','Shah','Full-time','adminstration',5,10000],
                    ['Yash','Bhatia','Part-time','Technical',7,10000],
                    ['Siddhant','Khanna','Full-time','Management',6,20000],
                ], columns=['First Name','Last name','Type','Department','YoE','Salary']
              )
df1

Unnamed: 0,First Name,Last name,Type,Department,YoE,Salary
0,Aryan,Singh,Full-time,adminstration,2,20000
1,Rohan,Agarawal,intern,Technical,3,5000
2,Rjya,Shah,Full-time,adminstration,5,10000
3,Yash,Bhatia,Part-time,Technical,7,10000
4,Siddhant,Khanna,Full-time,Management,6,20000


In [56]:
df1.groupby(['Type']).sum()

Unnamed: 0_level_0,YoE,Salary
Type,Unnamed: 1_level_1,Unnamed: 2_level_1
Full-time,13,50000
Part-time,7,10000
intern,3,5000


In [57]:
df1.groupby(['Type']).mean()

Unnamed: 0_level_0,YoE,Salary
Type,Unnamed: 1_level_1,Unnamed: 2_level_1
Full-time,4.333333,16666.666667
Part-time,7.0,10000.0
intern,3.0,5000.0


In [58]:
df1.groupby(['Type','Department']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,YoE,Salary
Type,Department,Unnamed: 2_level_1,Unnamed: 3_level_1
Full-time,Management,6,20000
Full-time,adminstration,7,30000
Part-time,Technical,7,10000
intern,Technical,3,5000


In [59]:
df1.groupby(['Department','Type']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,YoE,Salary
Department,Type,Unnamed: 2_level_1,Unnamed: 3_level_1
Management,Full-time,6,20000
Technical,Part-time,7,10000
Technical,intern,3,5000
adminstration,Full-time,7,30000
