# Introduction to NumPy

In [52]:
import numpy as np

## Numpy Arrays

In [53]:
#Creating NumPy arrays
arr1 = np.array([1,2,3,4,5])
print(arr1)

arr2 = np.zeros(5)
print(arr2)

arr3 = np.ones((3,3))
print(arr3)

arr4 = np.arange(0,10,2)
print(arr4)



[1 2 3 4 5]
[0. 0. 0. 0. 0.]
[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]
[0 2 4 6 8]


In [54]:
#Array attributes: shape, size, dtype
print(arr1.shape)

print(arr1.size)

print(arr1.dtype)

(5,)
5
int32


In [55]:
#Indexing and slicing arrays
print(arr1[0])

print(arr1[1:4])

1
[2 3 4]


In [56]:
#Array operations: artihmetic, aggregation, broadcasting
arr6 = arr1 + arr2
print(arr6)

print(np.sum(arr1))

arr7 = arr1 * 2
print(arr7)

[1. 2. 3. 4. 5.]
15
[ 2  4  6  8 10]


## Array Manipulation

In [57]:
arr8 = np.arange(9).reshape(3,3)
arr8

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [58]:
#Stacking and splitting arrays
arr9 = np.vstack((arr8, arr8))
print(arr9)

arr10, arr11 = np.split(arr9, 2)
print(arr10, arr11)

[[0 1 2]
 [3 4 5]
 [6 7 8]
 [0 1 2]
 [3 4 5]
 [6 7 8]]
[[0 1 2]
 [3 4 5]
 [6 7 8]] [[0 1 2]
 [3 4 5]
 [6 7 8]]


In [59]:
#Transposing arrays
arr12 = arr8.T
print(arr12)

[[0 3 6]
 [1 4 7]
 [2 5 8]]


In [60]:
#Universal functions (ufuncs)
arr13 = np.sin(arr1)
print(arr13)

[ 0.84147098  0.90929743  0.14112001 -0.7568025  -0.95892427]


## Advanced NumPy

In [61]:
#Fancy indexing
indices = np.array([0,2,4])
print(arr1[indices])

[1 3 5]


In [62]:
#Boolean indexing
bool_arr = arr1 > 3
print(arr1[bool_arr])

[4 5]


In [63]:
#Vectorized operations
arr14 = arr1 + 10
print(arr14)

[11 12 13 14 15]


In [64]:
#Broadcasting
arr15 = arr1 + np.array([[10],[20],[30],[40],[50]])
print(arr15)

[[11 12 13 14 15]
 [21 22 23 24 25]
 [31 32 33 34 35]
 [41 42 43 44 45]
 [51 52 53 54 55]]


In [65]:
arr15 = np.array([[1, 2, 3],
 [4, 5, 6],
 [7, 8, 9]])

print('Array:')
print(arr15)

row_idices = np.array([1,2])
column_indices = np.array([0,1])

selected_elements = arr15[(row_idices,column_indices)]

print('\nSelected Elements:')
print(selected_elements)

Array:
[[1 2 3]
 [4 5 6]
 [7 8 9]]

Selected Elements:
[4 8]


# Introduction to Pandas

## Working with series

In [66]:
import pandas as pd

s1 = pd.Series([1,2,3,4,5])

s2 = pd.Series(np.array([1,2,3,4,5]))

s3 = pd.Series({'a':1,'b':2,'c':3})

In [67]:
#Idexing and slicing Series
print(s3['a'])

print(s3[0])

print(s3[:2])

1
1
a    1
b    2
dtype: int64


  print(s3[0])


In [68]:
#Operations on Series
s4 = s1 + s2
print(s4)

s5 = s1 * 2
print(s5)

print(s1.sum())

0     2
1     4
2     6
3     8
4    10
dtype: int64
0     2
1     4
2     6
3     8
4    10
dtype: int64
15


In [69]:
#Handling missing data
s6 = s1.dropna()

s7 = s1.fillna(0)

print(s1.isnull())

0    False
1    False
2    False
3    False
4    False
dtype: bool


## Working with DataFrames

In [70]:
#Creating DataFrames

data = {'Name': ['Alice','Bob','Charlie'],
        'Age': [25,30,35]}

df1 = pd.DataFrame(data)
print(df1)

data = [['Alice',25],['Bob',30],['Charlie',35]]
df2 = pd.DataFrame(data, columns=['Name','Age'])
print(df2)

      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35


In [71]:
#Indexing and slicing DataFrames
print(df1.loc[0, 'Name'])

print(df1.iloc[0,0])

print(df1[:2])

Alice
Alice
    Name  Age
0  Alice   25
1    Bob   30


In [72]:
#Basic Operations

df1_sorted = df1.sort_values(by='Age')
print(df1_sorted)

df1_filtered = df1[df1['Age'] > 30]
print(df1_filtered)

names = df1['Name']
print(names)

      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
      Name  Age
2  Charlie   35
0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object


In [73]:
#Data manipulation

df1['Gender'] = ['Female','Male','Male']
print(df1)

del df1['Gender']
print(df1)

df1['Age'] = df1['Age'] + 1
print(df1)

      Name  Age  Gender
0    Alice   25  Female
1      Bob   30    Male
2  Charlie   35    Male
      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
      Name  Age
0    Alice   26
1      Bob   31
2  Charlie   36


## Data Cleaning and Preparation

In [74]:
#Handling missing values
df1_cleaned = df1.dropna()

df1_filled = df1.fillna(0)

print(df1.isnull().any())

Name    False
Age     False
dtype: bool


In [75]:
#Data normalization(numerical value only)
df1_normalized = (df1-df1.min()) / (df1.max() - df1.min())

df1_standardized = (df1 - df1.mean()) / df1.std()

TypeError: unsupported operand type(s) for -: 'str' and 'str'

In [None]:
#Data transformation
df1['Log_Age'] = np.log(df1['Age'])

df1['Gender'] = ['Female','Male','Male']
df1_encoded = pd.get_dummies(df1, columns=['Gender'])
print(df1_encoded)


      Name  Age   Log_Age  Gender_Female  Gender_Male
0    Alice   27  3.295837           True        False
1      Bob   32  3.465736          False         True
2  Charlie   37  3.610918          False         True
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Name     3 non-null      object 
 1   Age      3 non-null      int64  
 2   Log_Age  3 non-null      float64
 3   Gender   3 non-null      object 
dtypes: float64(1), int64(1), object(2)
memory usage: 228.0+ bytes


In [None]:
#Data aggragation and grouping
age_groups = df1.groupby('Age')
print(age_groups)

mean_age = age_groups['Age'].mean()
print(mean_age)

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x0000021E38A5BF50>
Age
27    27.0
32    32.0
37    37.0
Name: Age, dtype: float64


## Advance Pandas

In [None]:
#Concatenating DataFrames

pd.DataFrame({'A': ['A0', 'A1', 'A2'],
 'B': ['B0', 'B1', 'B2']})

df2 = pd.DataFrame({'A': ['A3', 'A4', 'A5'],
 'B': ['B3', 'B4', 'B5']})

result_row = pd.concat([df1,df2])
print(result_row)

result_col = pd.concat([df1,df2], axis=1)
print(result_col)


      Name   Age   Log_Age  Gender    A    B
0    Alice  27.0  3.295837  Female  NaN  NaN
1      Bob  32.0  3.465736    Male  NaN  NaN
2  Charlie  37.0  3.610918    Male  NaN  NaN
0      NaN   NaN       NaN     NaN   A3   B3
1      NaN   NaN       NaN     NaN   A4   B4
2      NaN   NaN       NaN     NaN   A5   B5
      Name  Age   Log_Age  Gender   A   B
0    Alice   27  3.295837  Female  A3  B3
1      Bob   32  3.465736    Male  A4  B4
2  Charlie   37  3.610918    Male  A5  B5


In [None]:
#Merging DataFrames

left = pd.DataFrame({'key': ['K0', 'K1', 'K2'],
 'value': ['V0', 'V1', 'V2']})

right = pd.DataFrame({'key': ['K1', 'K2', 'K3'],
 'value': ['V1', 'V2', 'V3']})

inner_join = pd.merge(left, right, on='key', how='inner')
print(inner_join)

left_join = pd.merge(left, right, on='key', how='left')
print(left_join)

right_join = pd.merge(left, right, on='key', how='right')
print(right_join)

outer_join = pd.merge(left, right, on='key',how='outer')
print(outer_join)

  key value_x value_y
0  K1      V1      V1
1  K2      V2      V2
  key value_x value_y
0  K0      V0     NaN
1  K1      V1      V1
2  K2      V2      V2
  key value_x value_y
0  K1      V1      V1
1  K2      V2      V2
2  K3     NaN      V3
  key value_x value_y
0  K0      V0     NaN
1  K1      V1      V1
2  K2      V2      V2
3  K3     NaN      V3


In [None]:
#Joining DataFrames

left = pd.DataFrame({'value1': [1, 2, 3]}, index=['a', 'b', 'c'])
right = pd.DataFrame({'value2': [4, 5, 6]}, index=['a', 'b', 'd'])

join_df = left.join(right, how='inner')
print(join_df)

   value1  value2
a       1       4
b       2       5


# Technical Level Questions and Exercises

In [None]:
## Question: Multiply matrices A and B

A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])

C = np.dot(A,B)
D = A @ B 

print(C)
print(D)

[[19 22]
 [43 50]]
[[19 22]
 [43 50]]


In [None]:
## Question: Matrix Inversion - Find the inverse of matrix A.

A_inv = np.linalg.inv(A)
print(A_inv)

[[-2.   1. ]
 [ 1.5 -0.5]]


In [None]:
## Question: Calculate the eigenvalues and eigenvectors of matrix A.

eigenvalues, eigenvectors = np.linalg.eig(A)
print(eigenvalues)
print(eigenvectors)

[-0.37228132  5.37228132]
[[-0.82456484 -0.41597356]
 [ 0.56576746 -0.90937671]]


In [None]:
## Question: Solve the system of equations 3x + y = 9 and x + 2y = 8.

A = np.array([[3,1],[1,2]])
B = np.array([9,8])
x = np.linalg.solve(A,B)
print(x)

[2. 3.]


# NumPy Practice Questions:

In [None]:
## Q1: Array Creation: Create a NumPy array of integers from 1 to 10.

array = np.arange(1,11)
print(array)

[ 1  2  3  4  5  6  7  8  9 10]


In [None]:
## Q2: Array Operations: Perform element-wise addition, subtraction, multiplication, and division on two Numpy arrays.

array1 = np.array([1,2,3,4,5])
array2 = np.array([6,7,8,9,10])

addition = array1 + array2
print(addition)

subtraction = array1 - array2
print(subtraction)

multiplication = array1 * array2
print(multiplication)

division = array1 / array2
print(division)


[ 7  9 11 13 15]
[-5 -5 -5 -5 -5]
[ 6 14 24 36 50]
[0.16666667 0.28571429 0.375      0.44444444 0.5       ]


In [None]:
## Q3: Array Sclicing: Extract the first 3 elements from a NumPy array.
arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

arr = arr[:4]
print(arr)

[1 2 3 4]


In [None]:
## Q4: Array Reshaping: Reshape a 1D NumPy array to a 2D array with 2 rows annd 3 columns.

arr = np.arange(0,6).reshape(2,3)
print(arr)

[[0 1 2]
 [3 4 5]]


In [None]:
## Q5 Array Agrregation: Calculate the mean, median, and standard deviation of a NumPy array.

arr = np.array([1,2,3,4,5,6,7,8,9,10])

mean = np.mean(arr)
print(mean)

standard_deviation = np.std(arr)
print(standard_deviation)

5.5
2.8722813232690143


# Pandas Practice Questions:

In [86]:
## Q1: DataFrame Creation: Create a Pandas DataFrame from a dictionary of lists, where each list represents a column.

dictionary = {'Name': ['Alice', 'Bob', 'Charlie',np.nan], 
              'Age': [25,30,35,0],
              'Gender': ['Female', 'Male', 'Male',np.nan], 
              'Height': [165, 170, 160,0]}

df = pd.DataFrame(dictionary)
print(df)

      Name  Age  Gender  Height
0    Alice   25  Female     165
1      Bob   30    Male     170
2  Charlie   35    Male     160
3      NaN    0     NaN       0


In [None]:
## Q2: DataFrame Operations: Add a new column to an existing DataFrame. Perform element-wise operations between two columns.

df['Weight'] = [135,150,180]

df['BMI'] = df['Weight'] / (df['Height']/100)**2
print(df)

      Name  Age  Gender  Height  Weight        BMI
0    Alice   25  Female     165     135  49.586777
1      Bob   30    Male     170     150  51.903114
2  Charlie   35    Male     160     180  70.312500


In [None]:
## Q3: Data Selection: Select rows based on a condition. Select specific columns from a DataFrame.

df_rows = df[df['Height'] >= 165]
print(df_rows)

df_columns = df[['Name','Age']]
print(df_columns)

    Name  Age  Gender  Height  Weight        BMI
0  Alice   25  Female     165     135  49.586777
1    Bob   30    Male     170     150  51.903114
      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35


In [80]:
## Q4: Data Aggregation: Group a DataFrame based on a categorical column and calculate the mean of each group.

df_grouped = df.groupby('Gender')[['Age','Height']].mean()
print(df_grouped)

         Age  Height
Gender              
Female  25.0   165.0
Male    32.5   165.0


In [88]:
## Q5: Data Cleaning: Handle missing values by either removing or replacing them with apprpriate values.

df_cleaned = df.dropna()
print(df_cleaned)

df_filled = df.fillna(0)
print(df_filled)

      Name  Age  Gender  Height
0    Alice   25  Female     165
1      Bob   30    Male     170
2  Charlie   35    Male     160
      Name  Age  Gender  Height
0    Alice   25  Female     165
1      Bob   30    Male     170
2  Charlie   35    Male     160
3        0    0       0       0
