# Import Pandas & NumPy

In [2]:
import numpy as np
import pandas as pd

# Check versions
print("NumPy version:", np.__version__)
print("Pandas version:", pd.__version__)

NumPy version: 1.24.3
Pandas version: 2.0.3


# NumPy Basics

In [4]:
# Create a simple NumPy array
arr = np.array([1, 2, 3, 4, 5])
print("NumPy Array:", arr)
print("")

# Array shape & size
print("Shape:", arr.shape)
print("Size:", arr.size)
print("")

# Reshape the array
arr_reshaped = arr.reshape(5, 1)
print("Reshaped array:\n", arr_reshaped)
print("")

# Generate random numbers
rand_arr = np.random.rand(3, 3)  # 3x3 matrix with random values
print("Random array:\n", rand_arr)
print("")

# Basic mathematical operations
arr2 = np.array([10, 20, 30, 40, 50])
print("Sum:", np.add(arr, arr2))
print("Mean:", np.mean(arr2))
print("Standard Deviation:", np.std(arr2))

NumPy Array: [1 2 3 4 5]

Shape: (5,)
Size: 5

Reshaped array:
 [[1]
 [2]
 [3]
 [4]
 [5]]

Random array:
 [[0.36413407 0.71516485 0.90659318]
 [0.72729927 0.22561783 0.87498638]
 [0.08222178 0.71172295 0.1819917 ]]

Sum: [11 22 33 44 55]
Mean: 30.0
Standard Deviation: 14.142135623730951


# NumPy (Extra Concepts)

In [8]:
# Broadcasting (Auto-expanding arrays for operations)
arr = np.array([1, 2, 3])
print(arr + 10)  # Each element gets +10 automatically
print("")

#  Stacking & Splitting Arrays
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])

# Stack vertically & horizontally
print("vstack: ",np.vstack((a, b)))
print("hstack: ",np.hstack((a, b)))
print("")

# Split array
split_arr = np.array([10, 20, 30, 40, 50, 60])
print(np.split(split_arr, 3))  # Split into 3 parts
print("")

# Aggregation Functions
arr = np.array([5, 10, 15, 20])

print("Sum:", np.sum(arr))
print("Mean:", np.mean(arr))
print("Max:", np.max(arr))
print("Min:", np.min(arr))
print("Standard Deviation:", np.std(arr))
print("")

# Linear Algebra Operations (for ML)
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])

# Matrix multiplication
result = np.dot(A, B)
print("Matrix multiplication:\n", result)

# Inverse of a matrix
inverse = np.linalg.inv(A)
print("Inverse of A:\n", inverse)

[11 12 13]

vstack:  [[1 2 3]
 [4 5 6]]
hstack:  [1 2 3 4 5 6]

[array([10, 20]), array([30, 40]), array([50, 60])]

Sum: 50
Mean: 12.5
Max: 20
Min: 5
Standard Deviation: 5.5901699437494745

Matrix multiplication:
 [[19 22]
 [43 50]]
Inverse of A:
 [[-2.   1. ]
 [ 1.5 -0.5]]


# Pandas Basics

In [7]:
# Create a simple Pandas DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie'],
        'Age': [25, 30, 35],
        'Salary': [50000, 60000, 70000]}
df = pd.DataFrame(data)

# Display the DataFrame
print("DataFrame:\n", df)
print("")

# Get info about the dataset
print("\nDataset Info:")
print(df.info())
print("")

# Get basic statistics
print("\nDescriptive Statistics:")
print(df.describe())
print("")


# Select a column
print("\nAges:", df['Age'])
print("")

# Filter rows where Age > 28
filtered_df = df[df['Age'] > 28]
print("\nFiltered Data (Age > 28):\n", filtered_df)

DataFrame:
       Name  Age  Salary
0    Alice   25   50000
1      Bob   30   60000
2  Charlie   35   70000


Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    3 non-null      object
 1   Age     3 non-null      int64 
 2   Salary  3 non-null      int64 
dtypes: int64(2), object(1)
memory usage: 204.0+ bytes
None


Descriptive Statistics:
        Age   Salary
count   3.0      3.0
mean   30.0  60000.0
std     5.0  10000.0
min    25.0  50000.0
25%    27.5  55000.0
50%    30.0  60000.0
75%    32.5  65000.0
max    35.0  70000.0


Ages: 0    25
1    30
2    35
Name: Age, dtype: int64


Filtered Data (Age > 28):
       Name  Age  Salary
1      Bob   30   60000
2  Charlie   35   70000


# Pandas (Extra Concepts)

In [11]:
# Handling Missing Data
data = {
    'Name': ['Alice', 'Bob', np.nan, 'David'],
    'Age': [25, np.nan, 35, 40],
    'Salary': [50000, 60000, 70000, np.nan]
}

df = pd.DataFrame(data)
print(df)
print("")

# Fill missing values with mean
df['Age'].fillna(df['Age'].mean(), inplace=True)
df['Salary'].fillna(df['Salary'].median(), inplace=True)
df['Name'].fillna("Unknown", inplace=True)

print(df)
print("")

# Merging & Joining DataFrames
df1 = pd.DataFrame({'ID': [1, 2, 3], 'Name': ['Alice', 'Bob', 'Charlie']})
df2 = pd.DataFrame({'ID': [1, 2, 4], 'Salary': [50000, 60000, 70000]})

# Merge based on common ID
merged_df = pd.merge(df1, df2, on='ID', how='left')
print(merged_df)
print("")

# Sorting & Filtering
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Age': [25, 30, 35, 40],
    'Salary': [50000, 60000, 70000, 80000]
})

# Sort by Salary (descending)
df_sorted = df.sort_values(by='Salary', ascending=False)
print("sorted: ",df_sorted)

# Filter people with Salary > 60000
high_salary_df = df[df['Salary'] > 60000]
print("filtered: ",high_salary_df)
print("")

# Pivot Tables
df = pd.DataFrame({
    'Department': ['HR', 'IT', 'IT', 'HR', 'Finance'],
    'Employee': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'Salary': [50000, 60000, 70000, 55000, 80000]
})

pivot = df.pivot_table(values='Salary', index='Department', aggfunc='mean')
print(pivot)
print("")

# Apply Functions to Columns
def add_bonus(salary):
    return salary * 1.1  # 10% bonus

df['Salary_Bonus'] = df['Salary'].apply(add_bonus)
print(df)

    Name   Age   Salary
0  Alice  25.0  50000.0
1    Bob   NaN  60000.0
2    NaN  35.0  70000.0
3  David  40.0      NaN

      Name        Age   Salary
0    Alice  25.000000  50000.0
1      Bob  33.333333  60000.0
2  Unknown  35.000000  70000.0
3    David  40.000000  60000.0

   ID     Name   Salary
0   1    Alice  50000.0
1   2      Bob  60000.0
2   3  Charlie      NaN

sorted:        Name  Age  Salary
3    David   40   80000
2  Charlie   35   70000
1      Bob   30   60000
0    Alice   25   50000
filtered:        Name  Age  Salary
2  Charlie   35   70000
3    David   40   80000

            Salary
Department        
Finance      80000
HR           52500
IT           65000

  Department Employee  Salary  Salary_Bonus
0         HR    Alice   50000       55000.0
1         IT      Bob   60000       66000.0
2         IT  Charlie   70000       77000.0
3         HR    David   55000       60500.0
4    Finance      Eve   80000       88000.0
