#  Part I: NumPy Questions

##  Task 1: Creating Arrays

In [None]:

import numpy as np

# Creating a 1D array of integers from 1 to 10
arr_1d = np.arange(1, 11)
print("1D Array:\n", arr_1d)

# Creating a 2D array of shape (3, 3) with values 1 to 9
arr_2d = np.arange(1, 10).reshape(3, 3)
print("\n2D Array:\n", arr_2d)

# Creating a 3D array with random floating-point numbers (3, 5, 3)
arr_3d = np.random.rand(3, 5, 3)
print("\n3D Array:\n", arr_3d)

# Displaying shape, size, and dtype of each
print("\nShapes:", arr_1d.shape, arr_2d.shape, arr_3d.shape)
print("Sizes:", arr_1d.size, arr_2d.size, arr_3d.size)
print("Data Types:", arr_1d.dtype, arr_2d.dtype, arr_3d.dtype)


## ✅ Task 2: Array Indexing and Slicing

In [None]:

data = np.array([10, 20, 30, 40, 50, 60, 70, 80, 90])

# First three elements
first_three = data[:3]
print("First 3 elements:", first_three)

# Every alternate element
alternate = data[::2]
print("Alternate elements:", alternate)

# Reversed array
reversed_array = data[::-1]
print("Reversed array:", reversed_array)


## ✅ Task 3: Mathematical Operations

In [None]:

# Random integers between 1 and 20
A = np.random.randint(1, 21, 5)
B = np.random.randint(1, 21, 5)

print("Array A:", A)
print("Array B:", B)

# Element-wise operations
print("\nAddition:", A + B)
print("Subtraction:", A - B)
print("Multiplication:", A * B)
print("Division:", A / B)

# Dot product
dot_product = np.dot(A, B)
print("\nDot Product:", dot_product)

# Stats for A
print("\nMean of A:", np.mean(A))
print("Median of A:", np.median(A))
print("Std Dev of A:", np.std(A))
print("Variance of A:", np.var(A))

# Max and Min of B
print("\nMax of B:", np.max(B), "at index", np.argmax(B))
print("Min of B:", np.min(B), "at index", np.argmin(B))


## ✅ Task 4: Reshaping and Transposing

In [None]:

arr = np.arange(1, 13)

# Reshape to (4, 3)
arr_2d = arr.reshape(4, 3)
print("2D Reshaped (4x3):\n", arr_2d)

# Reshape to (2, 2, 3)
arr_3d = arr.reshape(2, 2, 3)
print("\n3D Reshaped (2x2x3):\n", arr_3d)

# Transpose 2D array
transposed = arr_2d.T
print("\nTransposed 2D Array:\n", transposed)
print("New shape:", transposed.shape)


## ✅ Task 5: Boolean Masking and Filtering

In [None]:

arr = np.random.randint(10, 51, 15)
print("Original Array:\n", arr)

# Elements greater than 25
greater_than_25 = arr[arr > 25]
print("\n> 25:\n", greater_than_25)

# Replace elements < 30 with 0
arr_modified = np.where(arr < 30, 0, arr)
print("\n< 30 replaced with 0:\n", arr_modified)

# Count elements divisible by 5
count_div_5 = np.sum(arr % 5 == 0)
print("\nCount divisible by 5:", count_div_5)


## ✅ Task 6: Built-in Functions

In [None]:

# Equally spaced values
equally_spaced = np.linspace(0, 1, 10)
print("Equally spaced (0 to 1):\n", equally_spaced)

# Identity matrix
identity_matrix = np.eye(4)
print("\n4x4 Identity Matrix:\n", identity_matrix)

# Random integers, sorted, top 5
random_vals = np.random.randint(1, 101, 20)
sorted_vals = np.sort(random_vals)
top_5 = sorted_vals[-5:]
print("\nSorted array:\n", sorted_vals)
print("Top 5 values:", top_5)


## ✅ Task 7: Generic Performance Task

In [None]:

import time

# Two large random arrays
A = np.random.rand(100, 100)
B = np.random.rand(100, 100)

start = time.time()
# Matrix multiplication
C = np.matmul(A, B)

# Determinant
det = np.linalg.det(C)

# Inverse (check if invertible)
try:
    inv = np.linalg.inv(C)
    print("Inverse calculated.")
except np.linalg.LinAlgError:
    print("Matrix not invertible.")

end = time.time()

print("Time taken:", end - start, "seconds")
print("Determinant:", det)


# 🧠 Part II: Pandas Questions

## ✅ Task 1: Working with Series

In [None]:

import pandas as pd

# Creating Series
data = [25, 30, 35, 40, 45]
series = pd.Series(data, index=['A', 'B', 'C', 'D', 'E'])

# Displaying first 3 elements
print("First 3 elements:", series.head(3))

# Mean, Median, Std Dev
print("Mean:", series.mean())
print("Median:", series.median())
print("Standard Deviation:", series.std())


## ✅ Task 2: Creating and Inspecting DataFrames

In [None]:

# Creating the DataFrame
df = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Carol', 'David', 'Eve'],
    'Age': [20, 22, 19, 21, 20],
    'Gender': ['Female', 'Male', 'Female', 'Male', 'Female'],
    'Marks': [85, 78, 92, 74, 88]
})

# First 2 rows
print("First 2 rows:", df.head(2))

# Column info
print("Column Names:", df.columns.tolist())
print("Data Types:", df.dtypes)
print("Summary Stats:", df.describe())

# Adding 'Passed' column
df['Passed'] = df['Marks'] >= 80
print("DataFrame with 'Passed' column:", df)


## ✅ Task 3: Data Selection and Filtering

In [None]:

# Selecting Name and Marks columns
print("Name and Marks:", df[['Name', 'Marks']])

# Students with marks > 80
print("Marks > 80:", df[df['Marks'] > 80])

# Student with highest marks
highest = df[df['Marks'] == df['Marks'].max()]
print("Student with Highest Marks:", highest)


## ✅ Task 4: Handling Missing Data

In [None]:

# Introducing missing values
df.loc[1, 'Marks'] = None
df.loc[4, 'Age'] = None

# Identify missing values
print("Missing values:", df.isnull())

# Fill missing Marks with mean
df['Marks'].fillna(df['Marks'].mean(), inplace=True)

# Drop rows with missing Age
df_cleaned = df.dropna(subset=['Age'])

print("Cleaned DataFrame:", df_cleaned)


## ✅ Task 5: Grouping and Aggregation

In [None]:

# Grouping by Gender and calculating means
grouped_mean = df.groupby('Gender')[['Age', 'Marks']].mean()
print("Mean Age and Marks by Gender:", grouped_mean)

# Count of students in each gender
gender_counts = df['Gender'].value_counts()
print("Student count by Gender:", gender_counts)


## ✅ Task 6: Reading and Writing Data

In [None]:

# Saving to CSV
df.to_csv("students_data.csv", index=False)

# Reading it back
df_new = pd.read_csv("students_data.csv")
print("First 5 rows from CSV:", df_new.head())


## ✅ Task 7: General EDA with Public Dataset

In [None]:

# Using seaborn's built-in dataset for demo
import seaborn as sns
import matplotlib.pyplot as plt

# Load dataset
tips = sns.load_dataset("tips")
print("Dataset Overview:", tips.head())

# Summary stats
print("Summary:", tips.describe())

# Missing values
print("Missing Values:", tips.isnull().sum())

# Visualization - Total bill distribution
plt.figure(figsize=(6, 4))
sns.histplot(tips['total_bill'], kde=True)
plt.title("Distribution of Total Bill")
plt.xlabel("Total Bill")
plt.ylabel("Frequency")
plt.show()