Numpy is the fundamental package for numeric computing with python.It provides powerful ways to create,store, or manipulate data. We will learn about creating, manipulating, selecting, loading arrays

In [1]:
import numpy as np
import math
import array as arr

### Lists vs arrays

#### [Arrays](https://www.geeksforgeeks.org/array-python-set-1-introduction-functions/)
- All items are of same type.
- The size of the array is set when we create an array

In [8]:
a = arr.array('i',[1,2,3,-1])
a

array('i', [1, 2, 3, -1])

In [4]:
lt1 = [1,3,4,"No"]

In [5]:
np.array([1,2,3,"No"])

array(['1', '2', '3', 'No'], dtype='<U11')

## Numpy Array creation

In [10]:
a = np.array([1,2,3])
print(a)

[1 2 3]


Check dimensions

In [24]:
a.ndim

1

2 dimension array

In [25]:
b = np.array([[1,2,3],[2,3,4]])

In [26]:
b.ndim

2

In [27]:
b.shape

(2, 3)

3 dimension array

In [28]:
c = np.array([[[1,2,3],[2,3,4]],[[1,2,3],[2,3,4]],[[1,2,3],[2,3,4]]])

In [29]:
c.shape

(3, 2, 3)

### Zeros and Ones

In [30]:
a = np.zeros((2,3))
print(a)

b = np.ones((2,3))
print(b)

[[0. 0. 0.]
 [0. 0. 0.]]
[[1. 1. 1.]
 [1. 1. 1.]]


### Random number

In [32]:
np.random.rand(2,3)

array([[0.27677196, 0.19451144, 0.93288817],
       [0.22908707, 0.88333769, 0.80033496]])

#### Arange : To generate seqence of numbers in an array.

In [11]:
a = np.arange(10,20,2)
a

array([10, 12, 14, 16, 18])

#### linspace: To generate sequence of floats in an array

In [38]:
a = np.linspace(0,2,5)
a

array([0. , 0.5, 1. , 1.5, 2. ])

### Array operations

In [41]:
a = np.array([1,2,3])
b = np.array([2,3,4])

print(a-b)

print(a*b)

print(a@b)

[-1 -1 -1]
[ 2  6 12]
20


### Aggregation functions in array

In [44]:
print(a.sum())
print(a.max())
print(a.min())
print(a.mean())

6
3
1
2.0


In [45]:
np.arange(1,16,1).reshape(3,5)

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15]])

### Indexing, slicing and iterating

Indexing

In [46]:
a= np.array([1,2,3])
a[2]

3

In [49]:
a = np.array([[1,2,3],[2,3,4],[5,6,7]])

a[1,1]

3

Boolean indexing

In [51]:
print(a>5)

[[False False False]
 [False False False]
 [False  True  True]]


In [52]:
a[a>5]

array([6, 7])

Slicing

In [56]:
a = np.array([1,2,3,4,5])

print(a[1:2])
print(a[2:4])

[2]
[3 4]


In [60]:
a = np.array([[1,2,3],[4,5,6],[6,7,8]])
a[:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [61]:
a

array([[1, 2, 3],
       [4, 5, 6],
       [6, 7, 8]])

In [62]:
a[:2,1:3]

array([[2, 3],
       [5, 6]])

In [64]:
a[2,1]

7

## Trying Numpy with datasets

In [66]:
wines = np.genfromtxt("datasets/winequality-red.csv",delimiter = ";",skip_header = 1)

In [67]:
wines

array([[ 7.4  ,  0.7  ,  0.   , ...,  0.56 ,  9.4  ,  5.   ],
       [ 7.8  ,  0.88 ,  0.   , ...,  0.68 ,  9.8  ,  5.   ],
       [ 7.8  ,  0.76 ,  0.04 , ...,  0.65 ,  9.8  ,  5.   ],
       ...,
       [ 6.3  ,  0.51 ,  0.13 , ...,  0.75 , 11.   ,  6.   ],
       [ 5.9  ,  0.645,  0.12 , ...,  0.71 , 10.2  ,  5.   ],
       [ 6.   ,  0.31 ,  0.47 , ...,  0.66 , 11.   ,  6.   ]])

In [68]:
wines[:,0]

array([7.4, 7.8, 7.8, ..., 6.3, 5.9, 6. ])

In [70]:
wines[:,0:3]

array([[7.4  , 0.7  , 0.   ],
       [7.8  , 0.88 , 0.   ],
       [7.8  , 0.76 , 0.04 ],
       ...,
       [6.3  , 0.51 , 0.13 ],
       [5.9  , 0.645, 0.12 ],
       [6.   , 0.31 , 0.47 ]])

In [71]:
wines[:,[0,2,4]]

array([[7.4  , 0.   , 0.076],
       [7.8  , 0.   , 0.098],
       [7.8  , 0.04 , 0.092],
       ...,
       [6.3  , 0.13 , 0.076],
       [5.9  , 0.12 , 0.075],
       [6.   , 0.47 , 0.067]])

In [72]:
wines[:,-1].mean()

5.6360225140712945

### Graduate admission dataset

In [12]:
graduate_admission = np.genfromtxt('datasets/Admission_Predict.csv', dtype=None, delimiter=',', skip_header=1,
                                   names=('Serial No','GRE Score', 'TOEFL Score', 'University Rating', 'SOP',
                                          'LOR','CGPA','Research', 'Chance of Admit'))
graduate_admission

OSError: datasets/Admission_Predict.csv not found.

In [74]:
# Notice that the resulting array is actually a one-dimensional array with 400 tuples
graduate_admission.shape

(400,)

In [75]:
# Since the GPA in the dataset range from 1 to 10, and in the US it's more common to use a scale of up to 4,
# a common task might be to convert the GPA by dividing by 10 and then multiplying by 4
graduate_admission['CGPA'] = graduate_admission['CGPA'] /10 *4
graduate_admission['CGPA'][0:20] #let's get 20 values

array([3.86 , 3.548, 3.2  , 3.468, 3.284, 3.736, 3.28 , 3.16 , 3.2  ,
       3.44 , 3.36 , 3.6  , 3.64 , 3.2  , 3.28 , 3.32 , 3.48 , 3.2  ,
       3.52 , 3.4  ])

In [None]:
graduate_admission[graduate_admission['Chance_of_Admit'] > 0.8]