# Numpy

In [1]:
import numpy as np

## 1. Creation

- To create 1 dimentional ndarray -> array we first need a list/tuple of elements
- we use this function `np.array()` to convert that list into array

In [2]:
l1 = [10, 12, 45, 32, 12, 34, 56, 78, 21, 34, 23, 77, 54, 23, 40, 22, 16, 29]

In [10]:
res = []
for ele in l1:
    res.append(ele * 3)
print(res)

print([x*3 for x in l1])

[30, 36, 135, 96, 36, 102, 168, 234, 63, 102, 69, 231, 162, 69, 120, 66, 48, 87]
[30, 36, 135, 96, 36, 102, 168, 234, 63, 102, 69, 231, 162, 69, 120, 66, 48, 87]


In [3]:
arr1 = np.array(l1)

In [5]:
type(arr1)

numpy.ndarray

In [12]:
arr1 * 3

array([ 30,  36, 135,  96,  36, 102, 168, 234,  63, 102,  69, 231, 162,
        69, 120,  66,  48,  87])

## 2. Accessing elements

- This is similar to lists/tuples
- we'll use `[]` to extract elements by indexes
- indexing is from 0 to n-1

In [13]:
arr1[4]

12

In [15]:
arr1[5:10]

array([34, 56, 78, 21, 34])

In [16]:
arr1[-1]

29

In [17]:
# Task : From list l1, get random elements 0,1,6,10,11
req_pos = [0,1,6,10,11]
res = []
for p in req_pos:
    res.append(l1[p])
res

[10, 12, 56, 23, 77]

In [19]:
# Task : From array arr1, get random elements 0,1,6,10,11
# a list containing the required positions : [0,1,6,10,11]
# just pass this list to the [] extractor

arr1[[0,1,6,10,11]]



array([10, 12, 56, 23, 77])

## 3. Applying conditions

- we can use the regular loop on arrays to apply conditions
- Owing to the vectorized nature of numpy, we can use the [] extractor to apply conditions
    - the concept of **boolean indexing** is being used in this extraction
- ***The conditions ~~MUST~~ be put inside the ()***
> e.g. `arr1[(arr1 < 40)]`

In [22]:
# task : 
# from arr1, get all elements that are < 40

res = []
for ele in arr1:
    if ele < 40:
        res.append(ele)
res = np.array(res)
res

array([10, 12, 32, 12, 34, 21, 34, 23, 23, 22, 16, 29])

In [23]:
# from arr1, get all elements that are < 40

arr1[(arr1 < 40)]

array([10, 12, 32, 12, 34, 21, 34, 23, 23, 22, 16, 29])

In [24]:
arr1 < 40

# this thing is boolean index
# either the index is there (True) or it is not there (False)
# elements corresponding to True will be extracted into a new array

array([ True,  True, False,  True,  True,  True, False, False,  True,
        True,  True, False, False,  True, False,  True,  True,  True])

In [26]:
# Task : from arr1 get elements that are > 30 and < 70

# arr1[arr1 > 30 and arr1 < 70] # wrong
# arr1[(arr1 > 30) and (arr1 < 70)] # wrong
arr1[(arr1 > 30) & (arr1 < 70)] # correcto!

array([45, 32, 34, 56, 34, 54, 40])

## 4. Attributes

In [27]:
print(dir(arr1))

['T', '__abs__', '__add__', '__and__', '__array__', '__array_finalize__', '__array_function__', '__array_interface__', '__array_prepare__', '__array_priority__', '__array_struct__', '__array_ufunc__', '__array_wrap__', '__bool__', '__class__', '__complex__', '__contains__', '__copy__', '__deepcopy__', '__delattr__', '__delitem__', '__dir__', '__divmod__', '__doc__', '__eq__', '__float__', '__floordiv__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__iadd__', '__iand__', '__ifloordiv__', '__ilshift__', '__imatmul__', '__imod__', '__imul__', '__index__', '__init__', '__init_subclass__', '__int__', '__invert__', '__ior__', '__ipow__', '__irshift__', '__isub__', '__iter__', '__itruediv__', '__ixor__', '__le__', '__len__', '__lshift__', '__lt__', '__matmul__', '__mod__', '__mul__', '__ne__', '__neg__', '__new__', '__or__', '__pos__', '__pow__', '__radd__', '__rand__', '__rdivmod__', '__reduce__', '__reduce_ex__', '__repr__', '__rfloordiv__', '__rlshift_

In [29]:
arr1.sum()

618

In [30]:
arr1.mean()

34.333333333333336

## 5. Homogenous nature of ndarray

In [31]:
l2 = [1,3,4,2,5,6,7]

In [32]:
arr2 = np.array(l2)
arr2

array([1, 3, 4, 2, 5, 6, 7])

In [33]:
arr2.dtype

dtype('int32')

In [34]:
l3 = [4,3,6,9.4,23.4,56.7]
l3

[4, 3, 6, 9.4, 23.4, 56.7]

In [35]:
arr3 = np.array(l3)
arr3

array([ 4. ,  3. ,  6. ,  9.4, 23.4, 56.7])

In [36]:
arr3.dtype

dtype('float64')

In [40]:
L4 = [2,4,2,34.5,6.4,8.7,'a','b','c']

In [41]:
arr4 = np.array(L4)
arr4

array(['2', '4', '2', '34.5', '6.4', '8.7', 'a', 'b', 'c'], dtype='<U32')

In [42]:
L5 =[1,2,3,(6,7)]
L5

[1, 2, 3, (6, 7)]

In [47]:
L5[3][0]

6

In [43]:
np.array(L5)

array([1, 2, 3, (6, 7)], dtype=object)

In [45]:
L1 =[1,2,3,(6,7)]
type(L1)
np.array(L1)

array([1, 2, 3, (6, 7)], dtype=object)

In [48]:
l1

[10, 12, 45, 32, 12, 34, 56, 78, 21, 34, 23, 77, 54, 23, 40, 22, 16, 29]

In [50]:
print(set(l1))

{32, 34, 40, 10, 12, 45, 78, 77, 16, 21, 54, 23, 56, 22, 29}


In [52]:
set(l1)

{10, 12, 16, 21, 22, 23, 29, 32, 34, 40, 45, 54, 56, 77, 78}

# Pandas

- Pandas provides a 2d heterogenous table - **DataFrame**
- A combination of multiple one dimentional homogenous data structures
    - pandas provides a one dimentional homogenous data structure **Series**

In [53]:
import pandas as pd

## Pandas Series

### 1. Creating Pandas Series

- list/tuple/dict/set that contain our data
- convert that into pandas Series using `pd.Series()`

In [55]:
ser1 = pd.Series(l1)
type(ser1)

pandas.core.series.Series

In [57]:
ser1

# displays the location or loc or index along with the values

0     10
1     12
2     45
3     32
4     12
5     34
6     56
7     78
8     21
9     34
10    23
11    77
12    54
13    23
14    40
15    22
16    16
17    29
dtype: int64

### 2. The loc, locations or indexes 

- there are two types of indexes in pd.Series
- the regular integer locations or indexes - 0 to len-1
> `iloc`
- along with that it can also have external, user defined, key-style indexes
> `loc`
- when the user is not setting anything, then loc == iloc

In [61]:
ser2 = pd.Series(l1, index = list('abcdefghijklmnopqr'))

In [62]:
ser2

a    10
b    12
c    45
d    32
e    12
f    34
g    56
h    78
i    21
j    34
k    23
l    77
m    54
n    23
o    40
p    22
q    16
r    29
dtype: int64

In [68]:
ser3 = pd.Series(l3, index = ["ruchi","abhishek","anil","shravanti","sachin","anshul"])
ser3

# loc : ["ruchi","abhishek","anil","shravanti","sachin","anshul"]
# iloc : 0,1,2,3,4,5

ruchi         4.0
abhishek      3.0
anil          6.0
shravanti     9.4
sachin       23.4
anshul       56.7
dtype: float64

### 2. Access elements

- for series we can directly use `[]` but it's not advisable

- instead `.loc[]` and `.iloc[]`

In [71]:
ser2.iloc[3]

32

In [73]:
ser2.loc['d']

32

In [76]:
# get first 5 elements

ser2.iloc[0:5]

a    10
b    12
c    45
d    32
e    12
dtype: int64

In [81]:
# get elements a,g,h,n,m,p
ser2.loc[["a",'g','h','n','m','p']]

a    10
g    56
h    78
n    23
m    54
p    22
dtype: int64

In [82]:
ser3

ruchi         4.0
abhishek      3.0
anil          6.0
shravanti     9.4
sachin       23.4
anshul       56.7
dtype: float64

In [84]:
ser1
# loc == iloc

0     10
1     12
2     45
3     32
4     12
5     34
6     56
7     78
8     21
9     34
10    23
11    77
12    54
13    23
14    40
15    22
16    16
17    29
dtype: int64

In [86]:
ser1[3]

32

In [87]:
ser1.iloc[3]

32

In [88]:
ser1.loc[3]

32

In [89]:
ser1.iloc[0:10]

0    10
1    12
2    45
3    32
4    12
5    34
6    56
7    78
8    21
9    34
dtype: int64

In [90]:
ser1.loc[0:10]
# the upper bound is INCLUDED

0     10
1     12
2     45
3     32
4     12
5     34
6     56
7     78
8     21
9     34
10    23
dtype: int64

### 3. Applying conditions
- conditions are applied as **bollean indexes**
- boolean indexing can be implemented only on `loc[]`

In [92]:
# task : 
# from ser1, get all elements that are < 40

res = []
for ele in ser1:
    if ele < 40:
        res.append(ele)
res = pd.Series(res)
res

0     10
1     12
2     32
3     12
4     34
5     21
6     34
7     23
8     23
9     22
10    16
11    29
dtype: int64

In [93]:
# from ser1, get all elements that are < 40

ser1.loc[(ser1 < 40)]

0     10
1     12
3     32
4     12
5     34
8     21
9     34
10    23
13    23
15    22
16    16
17    29
dtype: int64

In [94]:
ser1 < 40

# this thing is boolean index
# either the index is there (True) or it is not there (False)
# elements corresponding to True will be extracted into a new array

0      True
1      True
2     False
3      True
4      True
5      True
6     False
7     False
8      True
9      True
10     True
11    False
12    False
13     True
14    False
15     True
16     True
17     True
dtype: bool

In [95]:
# Task : from ser1 get elements that are > 30 and < 70

# ser1.loc[ser1 > 30 and ser1 < 70] # wrong
# ser1.loc[(ser1 > 30) and (ser1 < 70)] # wrong
ser1.loc[(ser1 > 30) & (ser1 < 70)] # correcto!

2     45
3     32
5     34
6     56
9     34
12    54
14    40
dtype: int64

### 4. Attributes

In [96]:
print(dir(l1))

['__add__', '__class__', '__contains__', '__delattr__', '__delitem__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__iadd__', '__imul__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__mul__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__reversed__', '__rmul__', '__setattr__', '__setitem__', '__sizeof__', '__str__', '__subclasshook__', 'append', 'clear', 'copy', 'count', 'extend', 'index', 'insert', 'pop', 'remove', 'reverse', 'sort']


In [97]:
print(dir(arr1))

['T', '__abs__', '__add__', '__and__', '__array__', '__array_finalize__', '__array_function__', '__array_interface__', '__array_prepare__', '__array_priority__', '__array_struct__', '__array_ufunc__', '__array_wrap__', '__bool__', '__class__', '__complex__', '__contains__', '__copy__', '__deepcopy__', '__delattr__', '__delitem__', '__dir__', '__divmod__', '__doc__', '__eq__', '__float__', '__floordiv__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__iadd__', '__iand__', '__ifloordiv__', '__ilshift__', '__imatmul__', '__imod__', '__imul__', '__index__', '__init__', '__init_subclass__', '__int__', '__invert__', '__ior__', '__ipow__', '__irshift__', '__isub__', '__iter__', '__itruediv__', '__ixor__', '__le__', '__len__', '__lshift__', '__lt__', '__matmul__', '__mod__', '__mul__', '__ne__', '__neg__', '__new__', '__or__', '__pos__', '__pow__', '__radd__', '__rand__', '__rdivmod__', '__reduce__', '__reduce_ex__', '__repr__', '__rfloordiv__', '__rlshift_

In [98]:
print(dir(ser1))

['T', '_AXIS_ALIASES', '_AXIS_IALIASES', '_AXIS_LEN', '_AXIS_NAMES', '_AXIS_NUMBERS', '_AXIS_ORDERS', '_AXIS_REVERSED', '_HANDLED_TYPES', '__abs__', '__add__', '__and__', '__annotations__', '__array__', '__array_priority__', '__array_ufunc__', '__array_wrap__', '__bool__', '__class__', '__contains__', '__copy__', '__deepcopy__', '__delattr__', '__delitem__', '__dict__', '__dir__', '__div__', '__divmod__', '__doc__', '__eq__', '__finalize__', '__float__', '__floordiv__', '__format__', '__ge__', '__getattr__', '__getattribute__', '__getitem__', '__getstate__', '__gt__', '__hash__', '__iadd__', '__iand__', '__ifloordiv__', '__imod__', '__imul__', '__init__', '__init_subclass__', '__int__', '__invert__', '__ior__', '__ipow__', '__isub__', '__iter__', '__itruediv__', '__ixor__', '__le__', '__len__', '__long__', '__lt__', '__matmul__', '__mod__', '__module__', '__mul__', '__ne__', '__neg__', '__new__', '__nonzero__', '__or__', '__pos__', '__pow__', '__radd__', '__rand__', '__rdiv__', '__rdiv