# NumPy Programming Practice

### Author: Arunima Chaurasia
### Based on the notebook of : Santosh Muthireddy and Divin Devaiah

# Requirements
* NumPy
* python3


## Objective
In this we focus on
* ndarray
* Arithmetic operations
* Indexing
* Slicing
* Matrix operations

## Introduction
NumPy is a python library for numerical computation, it stands for "Numerical Python." It provides support for large, multi-dimensional arrays and matrices, along with a collection of mathematical functions to operate on these arrays efficiently.

In [2]:
# Importing the library
import numpy as np

# Check the documentation of any function by '?' like np.arange?

#### Different ways to initialise and declare a NumPy array

In [3]:
# Function to print 1d array
def print_array_1d(arr):
    print("data: \n",arr)
    print("array type:",type(arr))
    print("data type:",type(arr[0]))
    print("shape:",arr.shape)
    print("--------------------")
    
# Function to print 2d array    
def print_array_2d(arr):
    print("data: \n",arr)
    print("array type:",type(arr))
    print("data type:",type(arr[0][0]))
    print("shape:",arr.shape)
    print("--------------------")

In [9]:
# 1D array, int
arr1 = np.array([1,2,3,4,5])
print_array_1d(arr1)

# 1D array, float
arr2 = np.array([0.1,0.5,0.9,2.3])
print_array_1d(arr2)

# 1D array, defining dtype
arr1 = np.array([1,2,3,4,5],dtype=float)
print_array_1d(arr1)

arr2 = np.array([0.1,0.5,0.9,2.3],dtype=int)
print_array_1d(arr2)

# type casting
arr3 = np.array(['1','2','3'])
print_array_1d(arr2.astype(float))
print_array_1d(arr3.astype(float))

data: 
 [1 2 3 4 5]
array type: <class 'numpy.ndarray'>
data type: <class 'numpy.int64'>
shape: (5,)
--------------------
data: 
 [0.1 0.5 0.9 2.3]
array type: <class 'numpy.ndarray'>
data type: <class 'numpy.float64'>
shape: (4,)
--------------------
data: 
 [1. 2. 3. 4. 5.]
array type: <class 'numpy.ndarray'>
data type: <class 'numpy.float64'>
shape: (5,)
--------------------
data: 
 [0 0 0 2]
array type: <class 'numpy.ndarray'>
data type: <class 'numpy.int64'>
shape: (4,)
--------------------
data: 
 [0. 0. 0. 2.]
array type: <class 'numpy.ndarray'>
data type: <class 'numpy.float64'>
shape: (4,)
--------------------
data: 
 [1. 2. 3.]
array type: <class 'numpy.ndarray'>
data type: <class 'numpy.float64'>
shape: (3,)
--------------------


In [None]:
# 2D array, int
arr1 = np.array([[1,2],[3,4],[5,6]])
print_array_2d(arr1)

# 2D array, float
arr2 = np.array([[0.1,0.5],[0.9,2.3]])
print_array_1d(arr2)

In [None]:
# Making 2D array from 1D array
# numpy reshape can be used to change the shape of the array
arr1 = np.array([1,2,3,4,5,6])
print_array_1d(arr1)
arr1 = np.reshape(arr1,(3,2))
print_array_2d(arr1)

In [None]:
# Create arrays with ones in 1D
arr3 = np.ones(6)
print_array_1d(arr3)

# Create arrays with ones in 2D
arr3 = np.ones((3,3))
print_array_2d(arr3)


In [None]:
# create arrays with zeros in 1D
arr3 = np.zeros(5)
print_array_1d(arr3)

# Create arrays with zeros in 2D
arr3 = np.zeros((3,3))
print_array_2d(arr3)


In [None]:
# Create identity matrix
arr3 = np.identity(4)
print_array_2d(arr3)

# Create matrix with ones in diagonal, may not be identity
# diagonal considered is main by default
arr3 = np.eye(4,4)
print_array_2d(arr3)


In [None]:
# By default the data type is float
arr3 = np.eye(2,4, dtype=int)
print_array_2d(arr3)

# Creates diagonal array and also retreives the diagonal using this function
arr3 = np.diag((3,3,4,5))
print_array_2d(arr3)

### Generate arrays with random values or intervals

In [None]:
# Using np.arange(), returns evenly spaced values in given interval
#Generating numpy array with step 1
arr1 = np.arange(6)
print_array_1d(arr1)

# Start = 2, stop = 6
arr1 = np.arange(2,6)
print_array_1d(arr1)

# Generating numpy array with step 0.2
arr1 = np.arange(0,6,0.2)
print_array_1d(arr1)

In [None]:
# Using np.linspace(), returns evenly spaced numbers over speified interval
#Generating numpy array
arr1 = np.linspace(0,6,num=10)
print_array_1d(arr1)

# endpoint includes the stop value as last value of the sequence if set to True(Default)
arr1 = np.linspace(0,6,num=10,endpoint=False)
print_array_1d(arr1)

arr1 = np.linspace(0,6,num=10,endpoint=False,retstep=True)
print(arr1)


In [None]:
#generating array from random generator

#setting the seed for reproducibility
np.random.seed(0)

#generating 1 dimentional array
arr1 = np.random.normal(0, 1, (5,))
print_array_1d(arr1)

arr2 = np.random.randint(low = 0, high = 100, size = (5,))
print_array_1d(arr2)

#generating 2d dimentional array
arr1 = np.random.normal(5, 10, (2,2))
print_array_2d(arr1)

arr2 = np.random.randint(low = 0, high = 100, size = (3,4))
print_array_1d(arr2)


In [None]:
# Generate 5 random numbers between 0 and 1 from uniform distribution
random_numbers = np.random.uniform(0, 1, 5)
print("Random numbers between 0 and 1:")
print_array_1d(random_numbers)

# Generate a 3x3 array of random numbers between 0 and 10
random_matrix = np.random.uniform(0, 10, (3, 3))
print("\nRandom 3x3 matrix between 0 and 10:")
print_array_2d(random_matrix)

## Slicing NumPy array

* It gives the 'view' of the array, not the copy of the data.
* Any modification of the sliced array will be reflected in the original array.
* Use '&' and '|' instead of 'and' and 'or' for indexing. 

In [31]:
start = 3
end = 7
arr1 = np.arange(10)
print(arr1)

[0 1 2 3 4 5 6 7 8 9]


In [32]:
#slicing till a point from start
print(arr1[:end])

[0 1 2 3 4 5 6]


In [33]:
#slicing from a point to end
print(arr1[start:])

[3 4 5 6 7 8 9]


In [34]:
#slicing between points
print(arr1[start:end])

[3 4 5 6]


In [35]:
#every number with n skips
n = 2
print(arr1[::n])

[0 2 4 6 8]


In [36]:
#reverse
print(arr1[::-1])

[9 8 7 6 5 4 3 2 1 0]


## Indexing numpy array

In [37]:
#Generate random array with integers
arr1 = np.random.randint(40,size=(4,3))
print(arr1)

[[14 35 12]
 [20 11  4]
 [ 6  4  3]
 [12 36 14]]


In [38]:
#Extract rows
n = 1
arr1[:n,:]

array([[14, 35, 12]])

In [39]:
#Extract columns
n=1
arr1[:,:n]

array([[14],
       [20],
       [ 6],
       [12]])

In [45]:
n = 3
m = 2
arr1[:n,:m]
# Last value
print(arr1[-1][-1])

# Last column
print(arr1[:, -1])

# All columns other than last column
print(arr1[:, :-1])

# Transpose
print(np.transpose(arr1))
print(arr1.T)

# Shape of the array
print("\nShape of the array :")
print(arr1.shape)

# np.size
print("\nnp.size :")
print(np.size(arr1))

# Reshape
print("\nReshape :")
print(np.reshape(arr1, (3, 2)))



14
[12  4  3 14]
[[14 35]
 [20 11]
 [ 6  4]
 [12 36]]
[[14 20  6 12]
 [35 11  4 36]
 [12  4  3 14]]
[[14 20  6 12]
 [35 11  4 36]
 [12  4  3 14]]

Shape of the array example:
(4, 3)

np.size example:
12


In [48]:
arr = np.array([1, 2, 3, 4, 5])

# New_axis
# Convert a 1D array into either a row vector or a column vector
print_array_2d(arr1)
print("new axis:\n",arr1[:, np.newaxis])
print("shape: ",arr1[:, np.newaxis].shape)

# Converting it into a row vector
row_vector = arr[np.newaxis, :]
print("Row vector:")
print(row_vector)
print("Shape of row vector:", row_vector.shape)

# Converting it into a column vector
column_vector = arr[:, np.newaxis]
print("\nColumn vector:")
print(column_vector)
print("Shape of column vector:", column_vector.shape)


data: 
 [[14 35 12]
 [20 11  4]
 [ 6  4  3]
 [12 36 14]]
array type: <class 'numpy.ndarray'>
data type: <class 'numpy.int64'>
shape: (4, 3)
--------------------
new axis:
 [[[14 35 12]]

 [[20 11  4]]

 [[ 6  4  3]]

 [[12 36 14]]]
shape:  (4, 1, 3)
Row vector:
[[1 2 3 4 5]]
Shape of row vector: (1, 5)

Column vector:
[[1]
 [2]
 [3]
 [4]
 [5]]
Shape of column vector: (5, 1)


In [52]:

# Creating sample arrays
arr1 = np.array([[1, 2, 3], [4, 5, 6]])
arr2 = np.array([[7, 8, 9], [10, 11, 12]])

# np.where
print("np.where :")
print(np.where(arr1 > 2, arr1, arr2))  # If arr1 element > 2, use arr1, else use arr2

# Masking -  certain elements in an array are marked as invalid or ignored for computation purposes.
print("\nMasking :")
mask = arr1 > 2
print(arr1[mask])

# Masked aggregation functions
# performed while considering only the valid (unmasked) elements, ignoring the masked elements.
# usually used to avoid computation on inf, Nan values in the array.
print("\nMasked Aggregation functions:")
masked_arr = np.ma.masked_array(arr1, mask=(arr1 < 3))
print("np.ma.sum:", np.ma.sum(masked_arr))
print("np.ma.mean:", np.ma.mean(masked_arr))

# Aggregation functions
print("\nAggregation functions :")
print("np.sum:", np.sum(arr1))
print("np.average:", np.average(arr1))
print("np.mean:", np.mean(arr1))
print("np.var:", np.var(arr1))
print("np.std:", np.std(arr1))

# Trigonometric functions
print("\nTrigonometric functions :")
print("np.cos:", np.cos(arr1))
print("np.sin:", np.sin(arr1))
print("np.arctan", np.arctan(arr1))
# difference between acrtan vs arctan2?


np.where :
[[7 8 3]
 [4 5 6]]

Masking :
[3 4 5 6]

Masked Aggregation functions:
np.ma.sum: 18
np.ma.mean: 4.5

Aggregation functions :
np.sum: 21
np.average: 3.5
np.mean: 3.5
np.var: 2.9166666666666665
np.std: 1.707825127659933

Trigonometric functions :
np.cos: [[ 0.54030231 -0.41614684 -0.9899925 ]
 [-0.65364362  0.28366219  0.96017029]]
np.sin: [[ 0.84147098  0.90929743  0.14112001]
 [-0.7568025  -0.95892427 -0.2794155 ]]
np.arctan [[0.78539816 1.10714872 1.24904577]
 [1.32581766 1.37340077 1.40564765]]


In [53]:
# Multiplying matrices
print("\nMatrix Multiplication examples:")
# computes the dot product of two arrays. In 2D - matrix multiplication, in 1D - inner product of vectors
# for scalars- works as */mutiply
print("Using np.dot:")
print(np.dot(arr1, arr2.T))  # Matrix multiplication using np.dot

# @ is shorthand for matmul
# in matlmul, multiplication by scalars is not allowed, use */dot instead
# follows (n,k),(k,m)->(n,m) rule for matrix multiplication
print("Using @ operator:")
print(arr1 @ arr2.T)  # Matrix multiplication using @ operator
print("Using np.matmul:")
print(np.matmul(arr1, arr2.T))  # Matrix multiplication using np.matmul

# Dividing matrices
# shorthand for np.divide
print("\nMatrix Division :")
print(arr2 / arr1)


Matrix Multiplication examples:
Using np.dot:
[[ 50  68]
 [122 167]]
Using @ operator:
[[ 50  68]
 [122 167]]
Using np.matmul:
[[ 50  68]
 [122 167]]

Matrix Division :
[[7.  4.  3. ]
 [2.5 2.2 2. ]]


In [66]:
# Array comparison
# important for floating point precision
# before starting, try to compare two floating point numbers
a = 0.1888888888888888889 + 0.4111111111111111111
b = 0.6
print(a == b)
print("\nArray comparison :")
arr3 = np.array([[1, 2, 3], [4, 5, 6]])
print("np.isclose:", np.isclose(arr1, arr3))
print("np.allclose:", np.allclose(arr1, arr3))
print("np.any:", np.any(arr1 == arr3))
print("np.all:", np.all(arr1 == arr3))
print("np.equal:", np.equal(arr1, arr3))

True

Array comparison :
np.isclose: [[ True  True  True]
 [ True  True  True]]
np.allclose: True
np.any: True
np.all: True
np.equal: [[ True  True  True]
 [ True  True  True]]


In [None]:

# Square root
print("\nSquare root :")
print(np.sqrt(arr1))

# Index of max and min
print("\nIndex of max and min in an array:")
print("np.argmax:", np.argmax(arr1))
print("np.argmin:", np.argmin(arr1))

# Axis operations
print("\nAxis operations :")
print("np.sum(axis=0):", np.sum(arr1, axis=0))  # Column-wise sum
print("np.unique:", np.unique(arr1))

# Append and insert
# difference between append and insert?
print("\nAppend and insert :")
print("np.append:", np.append(arr1, [[7, 8, 9]], axis=0))
print("np.insert:", np.insert(arr1, 1, [7, 8, 9], axis=0))

# Rounding
print("\nRounding :")
arr4 = np.array([1.23, 2.35, 3.67])
print(np.around(arr4))

In [None]:

# Stack operations
print("\nStack operations :")
print("np.column_stack:")
print(np.column_stack((arr1, arr2)))
print("np.vstack:")
print(np.vstack((arr1, arr2)))
print("np.hstack:")
print(np.hstack((arr1, arr2)))

## Practice Problem 

### You are working on a project to analyze temperature data collected from weather stations across different cities. The data is stored in a CSV file, where each row represents a day's temperature readings for a specific city. Each row contains the daily high and low temperatures recorded for that day. Your task is to use NumPy to analyze the temperature data and extract useful insights.

#### Tasks:

* Load the temperature data from the CSV file into a NumPy array.
* Calculate and print the mean, median, maximum, and minimum daily high and low temperatures across all cities.
* Identify and print the city(s) with the highest average daily high temperature and the lowest average daily low temperature.
* Determine and print the number of days where the temperature difference (high minus low) exceeds 10 degrees Celsius.
* Compute and print the percentage of days where the daily high temperature exceeds 30 degrees Celsius for each city.

In [100]:
data = np.genfromtxt('city.csv', delimiter=',', dtype=None, encoding=None )
data = np.array(data)[1:]
# print(data)
# mean

daily_high = data[:, 1].astype(float)
daily_low = data[:, 2].astype(float)

# Calculate mean, median, maximum, and minimum temperatures
mean_high = np.mean(daily_high)
median_high = np.median(daily_high)
max_high = np.max(daily_high)
min_high = np.min(daily_high)

mean_low = np.mean(daily_low)
median_low = np.median(daily_low)
max_low = np.max(daily_low)
min_low = np.min(daily_low)

cities = np.unique(data[:, 0])

# Calculate average daily high and low temperatures for each city
avg_high_by_city = []
avg_low_by_city = []

for city in cities:
    city_data = data[data[:, 0] == city]
    avg_high = np.mean(city_data[:, 1].astype(float))
    avg_low = np.mean(city_data[:, 2].astype(float))
    avg_high_by_city.append((city, avg_high))
    avg_low_by_city.append((city, avg_low))
    days = np.size(city_data[city_data[:,1].astype(float) > 30.0])
    percentage = days/city_data.shape[0]
    print(percentage, days, city_data.shape[0])

# Identify city with highest average daily high temperature
highest_avg_high = max(avg_high_by_city, key=lambda x: x[1])

# Identify city with lowest average daily low temperature
lowest_avg_low = min(avg_low_by_city, key=lambda x: x[1])

indices = np.where(daily_high - daily_low > 10)
print(data[indices])



1.0 3 3
0.0 0 3
0.0 0 3
1.0 3 3
0.0 0 3
2.0 6 3
1.0 3 3
0.0 0 3
0.0 0 3
3.0 9 3
0.0 0 3
0.0 0 3
[['New York' '28' '17']
 ['Los Angeles' '30' '18']
 ['Los Angeles' '28' '16']
 ['Los Angeles' '32' '20']
 ['Denver' '28' '15']
 ['Denver' '26' '14']
 ['Denver' '30' '18']
 ['Atlanta' '29' '18']
 ['Atlanta' '28' '17']
 ['Atlanta' '31' '20']
 ['Boston' '23' '12']
 ['Boston' '22' '11']]
