In [70]:
import numpy as np

In [71]:
# declaring a numpy array

array = np.array([1,2,3,4,5], dtype = np.int8)
print(array)

# slicing and indexing operations are similar to lists 
print(array[1])
print(array[1:])
print(array[:-2])

array[1] = 47

print(array)

multiplier = 10
for i, num in enumerate(array): 
    array[i] = multiplier
    multiplier += 10

print(array)

print(array.dtype)

[1 2 3 4 5]
2
[2 3 4 5]
[1 2 3]
[ 1 47  3  4  5]
[10 20 30 40 50]
int8


In [72]:
# multidimensional array 

twod_array = np.array([[1,2,3],
                       [4,5,6],
                       [7,8,9]])

print(twod_array)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [73]:
# numpy attributes 

print(twod_array.shape) # returns in the format (rows, columns), if (breadth, width, height)
print(twod_array.ndim) # returns the dimensions of the array 
print(twod_array.size) # returns the number of elements in the array
print(twod_array.dtype) # returns the data type of the elements of the array

(3, 3)
2
9
int64


In [74]:
# numpy methods

# fill an array with shape and specific value using the full() method 
names = np.full((2,3,4), "Deepak")
print(names)

print(names.dtype)

[[['Deepak' 'Deepak' 'Deepak' 'Deepak']
  ['Deepak' 'Deepak' 'Deepak' 'Deepak']
  ['Deepak' 'Deepak' 'Deepak' 'Deepak']]

 [['Deepak' 'Deepak' 'Deepak' 'Deepak']
  ['Deepak' 'Deepak' 'Deepak' 'Deepak']
  ['Deepak' 'Deepak' 'Deepak' 'Deepak']]]
<U6


In [75]:
# fill an array with ones or zeroes using the ones() and zeros() method 

ones = np.ones((2,4), dtype = np.int8) # the default is float64
zeroes = np.zeros((3,3), dtype = np.int8)

print(ones, '\n')
print(zeroes)

print(ones.dtype)
print(zeroes.dtype)

[[1 1 1 1]
 [1 1 1 1]] 

[[0 0 0]
 [0 0 0]
 [0 0 0]]
int8
int8


In [76]:
# allocate memory of specifed shape without initialising 

empty_array = np.empty((3,3,3))

print(empty_array)

[[[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]

 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]]


In [77]:
# fill arrays with special methods: arange() and linspace()

sequence = np.arange(0, 1005, 5)  # works similar to range function (start, end, step)
print(sequence)

distribute = np.linspace(1, 1000, 45) # distributes all the values evenly from start to end in the given size (start, end, size)

# distributes 1-1000 evenly over 45 elements
print(distribute)

[   0    5   10   15   20   25   30   35   40   45   50   55   60   65
   70   75   80   85   90   95  100  105  110  115  120  125  130  135
  140  145  150  155  160  165  170  175  180  185  190  195  200  205
  210  215  220  225  230  235  240  245  250  255  260  265  270  275
  280  285  290  295  300  305  310  315  320  325  330  335  340  345
  350  355  360  365  370  375  380  385  390  395  400  405  410  415
  420  425  430  435  440  445  450  455  460  465  470  475  480  485
  490  495  500  505  510  515  520  525  530  535  540  545  550  555
  560  565  570  575  580  585  590  595  600  605  610  615  620  625
  630  635  640  645  650  655  660  665  670  675  680  685  690  695
  700  705  710  715  720  725  730  735  740  745  750  755  760  765
  770  775  780  785  790  795  800  805  810  815  820  825  830  835
  840  845  850  855  860  865  870  875  880  885  890  895  900  905
  910  915  920  925  930  935  940  945  950  955  960  965  970  975
  980 

In [78]:
# two important attributes when dealing with exceptional values

print(np.nan) # can be interchanged with 0
print(np.inf) # can be used instead of throwing out a DivisionByZero exception

np.isnan(np.nan) # returns true or false 
np.isinf(np.inf) # returns true of false 

#example usage 

print(np.isnan(np.sqrt(-1)))
print(np.isinf(1 / 0))

nan
inf
True


  print(np.isnan(np.sqrt(-1)))


ZeroDivisionError: division by zero

In [None]:
# arithmetic operations on arrays

array_1 = np.arange(1, 10)
array_2 = np.arange(10, 19)

# the reshape method reshapes the array into the specified shape, but there should exactly the same number of elements for reshape to work
reshaped_array_1 = array_1.reshape(3,3)
reshaped_array_2 = array_2.reshape(3,3)

print(array_1, "\n")
print(array_2, "\n")

print(array_1 + array_2) # add 
print(array_1 - array_2) # subtract 
print(array_1 * array_2) # multiply
print(array_1 / array_2) # divide

print(reshaped_array_1 @ reshaped_array_2) # perform matrix multiplication

# reshape() doesn require assignment 
# resize() performs the same thing but it does not require assignment to variable 

In [None]:
# Adding elements to an array 

a = np.array([1,2,3,4,5])
print(a)

a = np.append(a, [6,7,8,9,10])
print(a)

In [None]:
# deleting element from an array 

b = np.array([[1,2,3,4,5],
              [6,7,8,9,10]])
print(b)

b = np.delete(b, 8) # array reshaped to 1D and the element at the 8th index is deleted
print(b)

#deleting entire rows and columns

c = np.array([[1,2,3,4,5],
              [6,7,8,9,10]])
print(c)
print(np.delete(c, 1, 0)) # deletes entire second row


## NumPy Arrays (`ndarray`)

1.  **Core Concept:** A NumPy array (specifically, the `ndarray` object) is the fundamental data structure in the NumPy library. It's a powerful N-dimensional array object, which is essentially a grid of values, *all of the same data type*.

2.  **Key Advantages over Python Lists:**
    * **Performance:** NumPy operations are implemented in C, making them much faster than equivalent operations on Python lists, especially for large datasets.

    * **Memory Efficiency:** Arrays store elements of the same type contiguously in memory, unlike Python lists which can hold objects of different types scattered in memory. This leads to smaller memory footprints.

    * **Functionality:** Provides a vast collection of high-level mathematical functions that operate efficiently on arrays (e.g., linear algebra, Fourier transforms, random number generation).

    * **Vectorization:** Allows you to perform operations on entire arrays element-wise without writing explicit loops in Python. For example, `array * 2` multiplies every element in the array by 2.

3.  **Key Attributes:** Every `ndarray` has important attributes:
    * `ndarray.ndim`: The number of dimensions (or axes) of the array.
    * `ndarray.shape`: A tuple indicating the size of the array along each dimension (e.g., `(3, 4)` for a 3x4 matrix).
    * `ndarray.size`: The total number of elements in the array.
    * `ndarray.dtype`: An object describing the data type of the elements (e.g., `int64`, `float64`, `bool`).

4.  **Creation:** Commonly created from Python lists or tuples using `np.array()`, or using built-in functions like `np.zeros()`, `np.ones()`, `np.arange()`, `np.linspace()`.

5.  **Indexing & Slicing:** Similar to Python lists for 1D arrays, but extends naturally to multiple dimensions using comma-separated indices or slices (e.g., `array[1, 2]` or `array[0, :]`).

**In short:** NumPy arrays are efficient, high-performance multi-dimensional containers for numerical data, forming the bedrock of scientific computing in Python. They enable fast, vectorized operations crucial for data analysis, machine learning, and scientific simulations.

## Data types in NumPy

NumPy provides a wide variety of data types that you can use to define the elements of your arrays. These data types are similar to the built-in Python types but with some added functionalities and more specific sizes. Here's a breakdown of the available data type categories in NumPy:

**1. Booleans:**

* `np.bool_`: Represents boolean values (True or False). It's stored as a single byte.

**2. Integers:**

* **Signed Integers:** These can represent both positive and negative whole numbers.
    * `np.int8`: Byte (-128 to 127)
    * `np.int16`: Integer (-32768 to 32767)
    * `np.int32`: Integer (-2147483648 to 2147483647)
    * `np.int64` or `np.int_`: Integer (-9223372036854775808 to 9223372036854775807). This is the default integer type in NumPy and is typically equivalent to C `long`.
    * `np.intc`: Identical to the C `int` (usually `int32` or `int64`, depending on the platform).
    * `np.intp`: Integer used for indexing, typically the same as C `ssize_t` (usually `int32` or `int64`, depending on the platform).
* **Unsigned Integers:** These can only represent non-negative whole numbers.
    * `np.uint8`: Unsigned byte (0 to 255)
    * `np.uint16`: Unsigned integer (0 to 65535)
    * `np.uint32`: Unsigned integer (0 to 4294967295)
    * `np.uint64` or `np.uint`: Unsigned integer (0 to 18446744073709551615). This is typically equivalent to C `unsigned long`.
    * `np.uintc`: Identical to the C `unsigned int` (usually `uint32` or `uint64`, depending on the platform).
    * `np.uintp`: Unsigned integer used for indexing, typically the same as C `size_t` (usually `uint32` or `uint64`, depending on the platform).

**3. Floating-Point Numbers:**

* `np.float16` or `np.half`: Half-precision float (sign bit, 5 bits exponent, 10 bits mantissa)
* `np.float32` or `np.single`: Single-precision float (sign bit, 8 bits exponent, 23 bits mantissa)
* `np.float64` or `np.double` or `np.float_`: Double-precision float (sign bit, 11 bits exponent, 52 bits mantissa). This is the default floating-point type in NumPy and is typically equivalent to Python's `float`.
* `np.longdouble`: Extended-precision float. The number of bits is platform-dependent (could be 96 or 128 bits).

**4. Complex Numbers:**

* `np.complex64` or `np.csingle`: Complex number represented by two 32-bit floats (real and imaginary parts).
* `np.complex128` or `np.cdouble` or `np.complex_`: Complex number represented by two 64-bit floats (real and imaginary parts). This is the default complex type in NumPy and is typically equivalent to Python's `complex`.
* `np.clongdouble`: Complex number represented by two extended-precision floats.

**5. Strings:**

* `np.str_` or `np.unicode_`: Fixed-length Unicode string. The length is determined when the array is created. For example, `np.array(['hello', 'world'], dtype='U5')` creates an array of Unicode strings, each with a maximum length of 5 characters.
* `np.bytes_` or `np.string_`: Fixed-length byte string. Similar to Unicode strings but stores raw bytes.

**6. Objects:**

* `np.object_` or `np.object`: Allows storing Python objects in the array. This can be useful for arrays with elements of different types, but it loses the performance benefits of NumPy arrays with fixed data types.

**7. Datetime and Timedelta:**

* `np.datetime64`: Represents dates and times with various levels of precision.
* `np.timedelta64`: Represents differences between datetimes (time durations).

**8. Fixed-size chunks of memory (void):**

* `np.void`: Represents a fixed-size sequence of bytes. It can be useful for low-level operations or when dealing with structured data types.

You can specify the data type of a NumPy array when you create it using the `dtype` argument:

```python
import numpy as np

int_array = np.array([1, 2, 3], dtype=np.int32)
float_array = np.array([1.0, 2.5, 3.7], dtype=np.float64)
string_array = np.array(['apple', 'banana'], dtype='U6')
```

You can also check the data type of an existing array using the `.dtype` attribute:

```python
print(int_array.dtype)  # Output: int32
print(string_array.dtype) # Output: <U6
```

Choosing the appropriate data type is important for memory efficiency and performance. Using a smaller data type (e.g., `int16` instead of `int64`) can save memory if the values in your array are within the range of the smaller type. NumPy's vectorized operations are also generally faster when working with arrays of a specific, fixed data type.

**User Notes**

* Numpy arrays contain homogeneous data type
* Arithmetic operations follow the rules of matrix operations when it comes to shapes, although division is an exception
 