In [2]:
import pandas as pd 
import numpy as np 

## Python Basics

In [25]:
x = [1,2,3,4,5]
x*3  # This will concatenate two lists

[1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5]

In [None]:
# List allows mixed data types
# Example of a list with different data types
x = [1, True, 3.14, "hello"]
print(x)
print(type(x))
print([type(i) for i in x])

[1, True, 3.14, 'hello']
<class 'list'>
[<class 'int'>, <class 'bool'>, <class 'float'>, <class 'str'>]


In [None]:
# Set allows mixed data types
# Example of a set with different data types
x = {1, True, 3.14, "hello"}
print(x)
print(type(x))
for i in x:
    print(type(i))

{'hello', 1, 3.14}
<class 'set'>
<class 'str'>
<class 'int'>
<class 'float'>


In [10]:
x = {1: 1, True: True, 3.14: 3.14, "hello": "hello"}
print(x)
print(type(x))
for k,v in x.items():
    print(type(k), type(v))

{1: True, 3.14: 3.14, 'hello': 'hello'}
<class 'dict'>
<class 'int'> <class 'bool'>
<class 'float'> <class 'float'>
<class 'str'> <class 'str'>


## Numpy is a preferred library for numerical operations in python

In [30]:
# Numpy is faster than lists because it uses contiguous memory allocation
# Numpy is written in C, which is faster than Python
# NUmpy is using vectorized operations, which are faster than for loops

votes = np.array([1, 2, 3, 4, 5])
print(votes)
print(type(votes))
print(votes.dtype)
print(votes.shape)
print(votes.ndim)

[1 2 3 4 5]
<class 'numpy.ndarray'>
int64
(5,)
1


In [28]:
votes * 2  # Example of broadcasting in numpy
# This will multiply each element in the array by 2
print(votes * 2)
print(type(votes * 2))
print(votes/2)
print(votes + 2)

[ 2  4  6  8 10]
<class 'numpy.ndarray'>
[0.5 1.  1.5 2.  2.5]
[3 4 5 6 7]


In [31]:
# example of two dimensional list
two_d_list = [[1, 2, 3], [4, 5, 6]]
print(two_d_list)
print(type(two_d_list))
print([type(i) for i in two_d_list])
print([type(i) for sublist in two_d_list for i in sublist]) 

[[1, 2, 3], [4, 5, 6]]
<class 'list'>
[<class 'list'>, <class 'list'>]
[<class 'int'>, <class 'int'>, <class 'int'>, <class 'int'>, <class 'int'>, <class 'int'>]


In [32]:
#example of two dimensional numpy array
two_d_array = np.array([[1, 2, 3], [4, 5, 6]])
print(two_d_array)
print(type(two_d_array))
print([type(i) for i in two_d_array])
print([type(i) for sublist in two_d_array for i in sublist])

[[1 2 3]
 [4 5 6]]
<class 'numpy.ndarray'>
[<class 'numpy.ndarray'>, <class 'numpy.ndarray'>]
[<class 'numpy.int64'>, <class 'numpy.int64'>, <class 'numpy.int64'>, <class 'numpy.int64'>, <class 'numpy.int64'>, <class 'numpy.int64'>]


In [None]:
x = np.array([1, True, 3.14, "hello"])
print(x)
print(type(x))
print([type(i) for i in x])

['1' 'True' '3.14' 'hello']
<class 'numpy.ndarray'>
[<class 'numpy.str_'>, <class 'numpy.str_'>, <class 'numpy.str_'>, <class 'numpy.str_'>]


In [34]:
#example of broadcasting in numpy
# This will multiply each element in the array by 2
print(votes * 2)
print(type(votes * 2))
print(votes/2)
print(votes + 2)

[ 2  4  6  8 10]
<class 'numpy.ndarray'>
[0.5 1.  1.5 2.  2.5]
[3 4 5 6 7]


In [35]:
#example of broadcasting in lists
# This will concatenate two lists
x = [1, 2, 3, 4, 5] *3
print(x)  # This will concatenate the list three times
print(type(x))
print([type(i) for i in x])

[1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5]
<class 'list'>
[<class 'int'>, <class 'int'>, <class 'int'>, <class 'int'>, <class 'int'>, <class 'int'>, <class 'int'>, <class 'int'>, <class 'int'>, <class 'int'>, <class 'int'>, <class 'int'>, <class 'int'>, <class 'int'>, <class 'int'>]


In [36]:
x = np.array([1,2,3,4,5])
y = np.array([[1,2,3,4,5]])
print(x.shape)
print(y.shape)
print(x.ndim)
print(y.ndim)


(5,)
(1, 5)
1
2


In [42]:
x = np.array([1,2,3.14,4])
print(x.dtype)
print(x.astype(int)), print(x.dtype)  # Convert to integer
print(x.astype(float)), print(x.dtype)  # Convert to float
print(x.astype(str)), print(x.dtype)  # Convert to string
print(x.astype(bool)), print(x.dtype)  # Convert to boolean

float64
[1 2 3 4]
float64
[1.   2.   3.14 4.  ]
float64
['1.0' '2.0' '3.14' '4.0']
float64
[ True  True  True  True]
float64


(None, None)

### Slicing in numpy arrays

In [50]:
x = np.array([1,2,3,4,5,8,9,10])
print(x[2:5])
print(x[6:2:-2])

[3 4 5]
[9 5]


In [51]:
#Example of slicing in numpy 2 dim arrays
two_d_array = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print(two_d_array[0:2, 1:3])

[[2 3]
 [5 6]]


### Fancy indexing (Masking)

In [60]:
x = np.array([1, 2, 3, 4, 5])
print(type(x<3))
print(x[x<3])

<class 'numpy.ndarray'>
[1 2]


In [59]:
x[[2,4]]  # Fancy indexing (masking)
print(x[[2,4]])  # This will print the elements at index 2 and 4

[3 5]


In [73]:
x = np.array([11,21,31,41,51,61,71,81,91,100])

In [None]:
#code for printing elements at even indices
y = np.arange(len(x))

In [78]:
indices = np.arange(len(x))
x[indices%2==0]

array([11, 31, 51, 71, 91])

In [70]:
x[np.where(x%2 == 0)]  # This will return the indices of the elements that are even

array([ 2,  4,  6,  8, 10])

In [81]:
np.arange(21,72,2)

array([21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53,
       55, 57, 59, 61, 63, 65, 67, 69, 71])

In [87]:
x = np.array([[1,2,3],
     [4,5,6],
     [7,8,9]])

np.rot90(x, k=1, axes=(1, 0))


array([[7, 4, 1],
       [8, 5, 2],
       [9, 6, 3]])

In [88]:
a = np.array([100, 200, 300, 400])

b = np.array([300, 200, 100, 400])

print(a == b)

[False  True False  True]


In [98]:
arr = np.array([10, 20, 30, 40, 50])
arr.astype('float64')  # Convert to float
(arr > 10) & (arr < 40)

array([False,  True,  True, False, False])

In [None]:
def ratio(marks_arr):
    # Complete the missing code
    distinction = marks_arr[marks_arr >= 80] # Use masking to get the values
    first_div = marks_arr[(marks_arr >= 60) & (marks_arr < 80)] # Use masking to get the values
    
    distinction_count = len(distinction)
    first_div_count = len(first_div)
    
    ratio = distinction_count/(1.0*first_div_count)
    
    return round(ratio,2)

ratio(np.array([100, 90, 80, 70, 60, 50, 40, 30, 20, 10]))

1.5

In [None]:
v = np.array([100, 200, 300])
print(v[0:2])

[[100]
 [200]]


In [117]:
import numpy as np
p = np.array([[0], [10], [20]])
q = np.array([10, 11, 12]) 
v = p+q
print(v) 


[[10 11 12]
 [20 21 22]
 [30 31 32]]


In [121]:
np.flip(v, axis=0)

array([[30, 31, 32],
       [20, 21, 22],
       [10, 11, 12]])

### Aggregate Functions

In [3]:
import numpy as np

In [4]:
# Take a sample of 50 restaurants
sample_votes = np.array([775, 787, 918, 88, 166, 286, 2556, 324, 504, 402, 150, 164, 424, 918, 90, 133, 144, 93, 62, 180, 62, 148, 219, 506, 172, 415, 230, 1647, 4884, 133, 286, 540, 2556, 36, 244, 804, 679, 245, 345, 618, 1047, 627, 354, 244, 163, 808, 1720, 868, 520, 299])
sample_costs = np.array([800.0, 800.0, 800.0, 300.0, 600.0, 600.0, 600.0, 700.0, 550.0, 500.0, 600.0, 500.0, 450.0, 800.0, 650.0, 800.0, 700.0, 300.0, 400.0, 500.0, 600.0, 550.0, 600.0, 500.0, 750.0, 500.0, 650.0, 600.0, 750.0, 200.0, 500.0, 800.0, 600.0, 400.0, 300.0, 450.0, 850.0, 300.0, 400.0, 750.0, 450.0, 450.0, 800.0, 800.0, 800.0, 850.0, 400.0, 1200.0, 300.0, 300.0])

# Create a 2D array: rows = restaurants, columns = [votes, costs]
restaurants_data = np.column_stack((sample_votes, sample_costs))

print("2D Array (votes, costs):\n", restaurants_data)
print("Shape:", restaurants_data.shape)
print("Dimensions:", restaurants_data.ndim)  # 2D

2D Array (votes, costs):
 [[ 775.  800.]
 [ 787.  800.]
 [ 918.  800.]
 [  88.  300.]
 [ 166.  600.]
 [ 286.  600.]
 [2556.  600.]
 [ 324.  700.]
 [ 504.  550.]
 [ 402.  500.]
 [ 150.  600.]
 [ 164.  500.]
 [ 424.  450.]
 [ 918.  800.]
 [  90.  650.]
 [ 133.  800.]
 [ 144.  700.]
 [  93.  300.]
 [  62.  400.]
 [ 180.  500.]
 [  62.  600.]
 [ 148.  550.]
 [ 219.  600.]
 [ 506.  500.]
 [ 172.  750.]
 [ 415.  500.]
 [ 230.  650.]
 [1647.  600.]
 [4884.  750.]
 [ 133.  200.]
 [ 286.  500.]
 [ 540.  800.]
 [2556.  600.]
 [  36.  400.]
 [ 244.  300.]
 [ 804.  450.]
 [ 679.  850.]
 [ 245.  300.]
 [ 345.  400.]
 [ 618.  750.]
 [1047.  450.]
 [ 627.  450.]
 [ 354.  800.]
 [ 244.  800.]
 [ 163.  800.]
 [ 808.  850.]
 [1720.  400.]
 [ 868. 1200.]
 [ 520.  300.]
 [ 299.  300.]]
Shape: (50, 2)
Dimensions: 2


In [None]:
# total votes
print(np.sum(restaurants_data[:,0])) #using no.sum
print(restaurants_data[:,0].sum()) #using sum method of the instance of the numpy array

30583.0
30583.0


In [9]:
# Average cost of 10 restaurants
print(np.mean(restaurants_data[:10,1]))
print(restaurants_data[:10,1].mean())  # Using mean method of the instance of the numpy array

625.0
625.0


In [12]:
# min and max votes
print(np.min(restaurants_data[:,0])), print(np.max(restaurants_data[:,0]))
print()
print(restaurants_data[:,0].min()), print(restaurants_data[:,0].max())  # Using min and max methods of the instance of the numpy array 

36.0
4884.0

36.0
4884.0


(None, None)

### Logical Operators

-  & is vectorized 'and', it works with arrays
- 'and' only works with single value

In [20]:
#using np.where
print(sample_costs[np.where(sample_costs > 700)]) # This will return the indices of the elements that are greater than 700

# using boolean indexing
print(sample_costs[sample_costs > 700])

[ 800.  800.  800.  800.  800.  750.  750.  800.  850.  750.  800.  800.
  800.  850. 1200.]
[ 800.  800.  800.  800.  800.  750.  750.  800.  850.  750.  800.  800.
  800.  850. 1200.]


In [21]:
print(sample_costs[np.where((sample_costs > 700) & (sample_costs < 900))])  # This will return the indices of the elements that are greater than 700 and less than 900

[800. 800. 800. 800. 800. 750. 750. 800. 850. 750. 800. 800. 800. 850.]
