## Take-Home Assignment#2 (Due: 10/16)
Data Science Bootcamp_gl2996

### Setup

In [14]:
# pip install numpy pandas

In [15]:
# Core libraries for this assignment
import numpy as np
import pandas as pd


### Problem 1 (NumPy): Stack arrays vertically and horizontally
**Task:** Define two custom NumPy arrays `A` and `B`. Generate two new arrays by stacking `A` and `B` vertically and horizontally.

In [16]:
# Define two 1D arrays A and B
A = np.array([1, 4, 5, 6, 8, 10, 12])
B = np.array([3, 5, 7, 8, 10, 14, 16])

# Vertical stack -> rows become A and B; result shape is (2, len)
v_stacked = np.vstack([A, B])

# Horizontal stack -> arrays concatenated along the single axis
h_stacked = np.hstack([A, B])

print("A:", A)
print("B:", B)
print("\nVertical stack (vstack) shape:", v_stacked.shape)
print(v_stacked)
print("\nHorizontal stack (hstack) shape:", h_stacked.shape)
print(h_stacked)


A: [ 1  4  5  6  8 10 12]
B: [ 3  5  7  8 10 14 16]

Vertical stack (vstack) shape: (2, 7)
[[ 1  4  5  6  8 10 12]
 [ 3  5  7  8 10 14 16]]

Horizontal stack (hstack) shape: (14,)
[ 1  4  5  6  8 10 12  3  5  7  8 10 14 16]


### Problem 2 (NumPy): Common elements (intersection)
**Task:** Find the common elements between `A` and `B`. (Hint: Intersection of two sets)

In [17]:
# Use NumPy's set operation to get the intersection
common = np.intersect1d(A, B)
print("Common elements between A and B:", common)


Common elements between A and B: [ 5  8 10]


### Problem 3 (NumPy): Extract values from A within a range
**Task:** Extract all numbers from `A` within a specific range, for example between 5 and 10. We treat this as inclusive range [5, 10]. (Hint: np.where() might be useful or boolean masks)

In [18]:
# Boolean masking for inclusive range [5, 10]
mask = (A >= 5) & (A <= 10)
in_range = A[mask]
print("Values in A with 5 ≤ x ≤ 10:", in_range)

# (Optional) Using np.where to obtain indices first
idx = np.where(mask)[0]
print("Indices:", idx)
print("Values via indices:", A[idx])


Values in A with 5 ≤ x ≤ 10: [ 5  6  8 10]
Indices: [2 3 4 5]
Values via indices: [ 5  6  8 10]


### Problem 4 (NumPy): Filter iris_2d with conditions
**Task:** Filter the rows of `iris_2d` that have `petallength` (3rd column) > 1.5 and `sepallength` (1st column) < 5.0.

Use the following:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])

In [19]:
# Load the first 4 numeric columns (sepal length/width, petal length/width)
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype=float, usecols=[0,1,2,3])

# Condition: petallength (col 2) > 1.5 AND sepallength (col 0) < 5.0
cond = (iris_2d[:, 2] > 1.5) & (iris_2d[:, 0] < 5.0)
filtered = iris_2d[cond]
print("Filtered shape:", filtered.shape)
print(filtered[:10])  # preview first 10 rows


Filtered shape: (6, 4)
[[4.8 3.4 1.6 0.2]
 [4.8 3.4 1.9 0.2]
 [4.7 3.2 1.6 0.2]
 [4.8 3.1 1.6 0.2]
 [4.9 2.4 3.3 1. ]
 [4.9 2.5 4.5 1.7]]


### Problem 5 (Pandas): Every 20th row starting from row 0
**Task:** From `df`, filter the columns `'Manufacturer'`, `'Model'`, and `'Type'` for every 20th row starting from 1st (row 0).

Dataset:
df = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/Cars93_miss.csv')

In [20]:
# Load the Cars93 dataset with missing values
df = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/Cars93_miss.csv')

# Take every 20th row starting at 0 and select specified columns
cols = ['Manufacturer', 'Model', 'Type']
result_every_20th = df.loc[::20, cols].reset_index(drop=True)
result_every_20th


Unnamed: 0,Manufacturer,Model,Type
0,Acura,Integra,Small
1,Chrysler,LeBaron,Compact
2,Honda,Prelude,Sporty
3,Mercury,Cougar,Midsize
4,Subaru,Loyale,Small


### Problem 6 (Pandas): Impute missing with column mean
**Task:** Replace missing values in `Min.Price` and `Max.Price` with their respective mean values.

Dataset:
df = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/Cars93_miss.csv')

In [21]:
# Reload to start from original state
df = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/Cars93_miss.csv')

# Compute means and fill missing values
for c in ['Min.Price', 'Max.Price']:
    mean_val = df[c].mean()
    df[c] = df[c].fillna(mean_val)

# Verify no missing values remain in these columns
print(df[['Min.Price', 'Max.Price']].isna().sum())


Min.Price    0
Max.Price    0
dtype: int64


### Problem 7 (Pandas): Row sum > 100
**Task:** Given `df = pd.DataFrame(np.random.randint(10, 40, 60).reshape(-1, 4))`, get the rows where the row-wise sum is greater than 100.

In [22]:
# Create the DataFrame with random integers in [10, 40)
df = pd.DataFrame(np.random.randint(10, 40, 60).reshape(-1, 4))

# Compute row-wise sums and filter
rows_gt_100 = df[df.sum(axis=1) > 100]
rows_gt_100


Unnamed: 0,0,1,2,3
4,29,24,19,32
6,39,23,39,34
12,25,30,32,19
13,25,38,29,12
14,23,30,26,31
