<a href="https://colab.research.google.com/github/budhiacharya/AI-Lesson/blob/main/Assignment4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Assignment 4
# Setup cell
import pandas as pd
import numpy as np
from functools import reduce

In [3]:
# Task 1 - Create a DataFrame
# Create a DataFrame with Name, Age, City and validate dtypes
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Age': [25, 30, 35, 28],
    'City': ['New York', 'San Francisco', 'Los Angeles', 'Chicago']
}
df = pd.DataFrame(data)

# Enforce data types (Age numeric)
df['Age'] = pd.to_numeric(df['Age'], errors='coerce')

print('DataFrame created:')
print(df)
print('\nDtypes:')
print(df.dtypes)

DataFrame created:
      Name  Age           City
0    Alice   25       New York
1      Bob   30  San Francisco
2  Charlie   35    Los Angeles
3    David   28        Chicago

Dtypes:
Name    object
Age      int64
City    object
dtype: object


In [4]:
# Task 2- Row and Column Manipulation (drop 'City')
# Drop the City column
try:
    df_no_city = df.drop(columns=['City'])
except KeyError:
    # If already dropped, continue gracefully
    df_no_city = df.copy()
    if 'City' in df_no_city.columns:
        print("Unexpected: 'City' present but drop failed.")

print('After dropping City:')
print(df_no_city)
print('Columns now:', list(df_no_city.columns))

After dropping City:
      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35
3    David   28
Columns now: ['Name', 'Age']


In [5]:
# Task 3 - Handling Null Values:
# Create a DataFrame with nulls in each column
df_nulls = pd.DataFrame({
    'A': [1, None, 3],
    'B': [None, 2.5, None],
    'C': ['x', None, 'z']
})
print('Original with nulls:')
print(df_nulls)

# Strategy 1: Fill numeric with 0, strings with 'Unknown'
filled = df_nulls.copy()
filled['A'] = filled['A'].fillna(0)
filled['B'] = filled['B'].fillna(0)
filled['C'] = filled['C'].fillna('Unknown')
print('\nFilled by type:')
print(filled)

# Strategy 2: Forward/backward fill (good for ordered data)
ffill_then_bfill = df_nulls.fillna(method='ffill').fillna(method='bfill')
print('\nForward then backward fill:')
print(ffill_then_bfill)

# Strategy 3: Column-wise specific values
specific = df_nulls.fillna({'A': 999, 'B': df_nulls['B'].mean(), 'C': 'NA'})
print('\nFill with specific values:')
print(specific)

Original with nulls:
     A    B     C
0  1.0  NaN     x
1  NaN  2.5  None
2  3.0  NaN     z

Filled by type:
     A    B        C
0  1.0  0.0        x
1  0.0  2.5  Unknown
2  3.0  0.0        z

Forward then backward fill:
     A    B  C
0  1.0  2.5  x
1  1.0  2.5  x
2  3.0  2.5  z

Fill with specific values:
       A    B   C
0    1.0  2.5   x
1  999.0  2.5  NA
2    3.0  2.5   z


  ffill_then_bfill = df_nulls.fillna(method='ffill').fillna(method='bfill')


In [7]:
# Task 4 - GroupBy and Describe
df_group = pd.DataFrame({
    'Category': ['A', 'B', 'A', 'B', 'A', 'C'],
    'Value': [10, 20, 15, 25, 30, 35]
})
desc = df_group.groupby('Category')['Value'].describe()
print('GroupBy describe on Value by Category:')
print(desc)

GroupBy describe on Value by Category:
          count       mean        std   min    25%   50%    75%   max
Category                                                             
A           3.0  18.333333  10.408330  10.0  12.50  15.0  22.50  30.0
B           2.0  22.500000   3.535534  20.0  21.25  22.5  23.75  25.0
C           1.0  35.000000        NaN  35.0  35.00  35.0  35.00  35.0


Category A has higher frequency(3) and also it has higher standard deviation. Category B has very low standard deviation as compared to A. Category C has just 1 count.

In [8]:
# Task 5 - Concatenation and Merging:
df1 = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
df2 = pd.DataFrame({'A': [5, 6], 'B': [7, 8]})
df3 = pd.DataFrame({'C': [9, 10], 'D': [11, 12]})

# Vertical concat (stack rows)
vertical = pd.concat([df1, df2], ignore_index=True)
print('Vertical concat:')
print(vertical)

# Horizontal merge (align by index)
merged = pd.concat([vertical.reset_index(drop=True), df3.reset_index(drop=True)], axis=1)
print('\nHorizontal merge result:')
print(merged)
print('Shape should be (4, 4):', merged.shape)

Vertical concat:
   A  B
0  1  3
1  2  4
2  5  7
3  6  8

Horizontal merge result:
   A  B     C     D
0  1  3   9.0  11.0
1  2  4  10.0  12.0
2  5  7   NaN   NaN
3  6  8   NaN   NaN
Shape should be (4, 4): (4, 4)


In [9]:
# Task 6 - Tuples and Sets
fruits = ('apple', 'banana', 'cherry')
nums = {1, 2, 3, 4, 5}

print('Tuple:', fruits)
print('Set before add:', nums)

# Attempt to add to tuple (should raise TypeError because tuples are immutable)
try:
    fruits += ('dragonfruit',)  # creates a new tuple; original immutability demonstrated
    print('New tuple after concatenation:', fruits)
except TypeError as e:
    print('Cannot add to tuple:', e)

# Add to set (mutable)
nums.add(6)
print('Set after add:', nums)
print('Explanation: tuples are immutable (cannot change in place); sets are mutable and allow add/remove.')

Tuple: ('apple', 'banana', 'cherry')
Set before add: {1, 2, 3, 4, 5}
New tuple after concatenation: ('apple', 'banana', 'cherry', 'dragonfruit')
Set after add: {1, 2, 3, 4, 5, 6}
Explanation: tuples are immutable (cannot change in place); sets are mutable and allow add/remove.


In [10]:
# Task 7 - Dictionaries:
scores = {'Alice': 88, 'Bob': 92, 'Charlie': 79}
print('Original:', scores)

# Update a score
scores['Bob'] = 95

# Add a new student
scores['David'] = 83
print('Updated:', scores)

Original: {'Alice': 88, 'Bob': 92, 'Charlie': 79}
Updated: {'Alice': 88, 'Bob': 95, 'Charlie': 79, 'David': 83}


In [11]:
# Task 8 - Functions and Lambda:
def square(x):
    """Return the square of x. Raises TypeError for non-numeric inputs."""
    if not isinstance(x, (int, float, np.number)):
        raise TypeError('square() expects a number')
    return x * x

square_lambda = lambda x: x * x

print('square(3)=', square(3))
print('square_lambda(4)=', square_lambda(4))

# Demonstrate with two inputs
for val in [2, 5.5]:
    print(f'{val}^2 (def) =', square(val), '| (lambda) =', square_lambda(val))

square(3)= 9
square_lambda(4)= 16
2^2 (def) = 4 | (lambda) = 4
5.5^2 (def) = 30.25 | (lambda) = 30.25


In [12]:
# Task 9 - Iterators and Generators
class EvenIterator:
    """Iterator that yields the first n even numbers (starting from 0)."""
    def __init__(self, n):
        if n < 0:
            raise ValueError('n must be non-negative')
        self.n = n
        self.i = 0
    def __iter__(self):
        return self
    def __next__(self):
        if self.i >= self.n:
            raise StopIteration
        val = 2 * self.i
        self.i += 1
        return val

def even_gen(n):
    """Generator that yields the first n even numbers (starting from 0)."""
    if n < 0:
        raise ValueError('n must be non-negative')
    for i in range(n):
        yield 2 * i

print('Iterator (first 5 evens):', list(EvenIterator(5)))
print('Generator (first 5 evens):', list(even_gen(5)))

Iterator (first 5 evens): [0, 2, 4, 6, 8]
Generator (first 5 evens): [0, 2, 4, 6, 8]


In [13]:
# Task 10 - Map, Reduce, and Filter:
from functools import reduce
numbers = [1, 2, 3, 4, 5]

squared = list(map(lambda x: x * x, numbers))
product = reduce(lambda a, b: a * b, numbers, 1)
evens = list(filter(lambda x: x % 2 == 0, numbers))

print('Numbers:     ', numbers)
print('Squared (map):', squared)
print('Product (reduce):', product)
print('Evens (filter):', evens)

Numbers:      [1, 2, 3, 4, 5]
Squared (map): [1, 4, 9, 16, 25]
Product (reduce): 120
Evens (filter): [2, 4]


In [14]:
# Task 11 - Object-Oriented Programming: Rectangle class
class Rectangle:
    def __init__(self, length, width):
        if length <= 0 or width <= 0:
            raise ValueError('length and width must be positive')
        self.length = float(length)
        self.width = float(width)

    def area(self):
        return self.length * self.width

    def perimeter(self):
        return 2 * (self.length + self.width)

    def __repr__(self):
        """Unambiguous string representation of the object"""
        return f"Rectangle(length={self.length}, width={self.width})"


# Demonstrate with two rectangles
r1 = Rectangle(3, 4)
r2 = Rectangle(5.5, 2)

print(r1)  # uses __repr__
print('r1 area:', r1.area(), '| r1 perimeter:', r1.perimeter())
print(r2)  # uses __repr__
print('r2 area:', r2.area(), '| r2 perimeter:', r2.perimeter())

Rectangle(length=3.0, width=4.0)
r1 area: 12.0 | r1 perimeter: 14.0
Rectangle(length=5.5, width=2.0)
r2 area: 11.0 | r2 perimeter: 15.0


In [16]:
# Task 12 - Pandas Data Analysis:
df_employees = pd.DataFrame({
    'Name': ['John', 'Jane', 'Bob', 'Alice', 'Charlie'],
    'Department': ['IT', 'HR', 'IT', 'Finance', 'HR'],
    'Salary': [55000, 65000, 70000, 60000, 58000]
})

# 1) Average salary by department
avg_salary = df_employees.groupby('Department', as_index=False)['Salary'].mean().rename(columns={'Salary':'AvgSalary'})
print('Average salary by department:')
print(avg_salary)

# 2) Names with salary > 60000
high_earners = df_employees.loc[df_employees['Salary'] > 60000, 'Name'].tolist()
print('\nEmployees with salary > 60000:')
print(high_earners)

# 3) Bonus column = 10% of salary
df_employees['Bonus'] = df_employees['Salary'] * 0.10
print('\nEmployees with Bonus column:')
print(df_employees)

Average salary by department:
  Department  AvgSalary
0    Finance    60000.0
1         HR    61500.0
2         IT    62500.0

Employees with salary > 60000:
['Jane', 'Bob']

Employees with Bonus column:
      Name Department  Salary   Bonus
0     John         IT   55000  5500.0
1     Jane         HR   65000  6500.0
2      Bob         IT   70000  7000.0
3    Alice    Finance   60000  6000.0
4  Charlie         HR   58000  5800.0
