In [2]:
import numpy as np

In [3]:
arr1 = np.array([[1,2,3,4], [1,2,3,4]]) # define first array of shape (2,4)
arr2 = np.array([[5,6,7,8], [5,6,7,8]]) # define second array of shape (2,4)

print('arr1:', arr1)
print('\n arr2:', arr2)

arr1: [[1 2 3 4]
 [1 2 3 4]]

 arr2: [[5 6 7 8]
 [5 6 7 8]]


### ```np.concatenate((a, b), axis=0)```

In [4]:
# concat along the rows (axis=0)
cat = np.concatenate((arr1, arr2), axis=0)        # shape (4,4)

print(cat)

[[1 2 3 4]
 [1 2 3 4]
 [5 6 7 8]
 [5 6 7 8]]


In [None]:
# concat along the column (axis=1)
cat = np.concatenate((arr1, arr2), axis=1)    
print(cat)

[[1 2 3 4 5 6 7 8]
 [1 2 3 4 5 6 7 8]]


### ```np.vstack((a, b))``` 

In [6]:
# stack arrays vertically
cat = np.vstack((arr1, arr2))
print(cat)

[[1 2 3 4]
 [1 2 3 4]
 [5 6 7 8]
 [5 6 7 8]]


### ```np.hstack((a, b))```


In [8]:
# stack arrays horizontally
cat = np.hstack((arr1, arr2))
print(cat)

[[1 2 3 4 5 6 7 8]
 [1 2 3 4 5 6 7 8]]


### split an array 

In [10]:
rs = np.random.RandomState(321) # create a random state object
arr = rs.rand(6,6)
print(arr)

[[0.88594794 0.07791236 0.97964616 0.24767146 0.75288472 0.52667564]
 [0.90755375 0.8840703  0.08926896 0.5173446  0.34362129 0.21229369]
 [0.36067344 0.27077517 0.76162502 0.4780419  0.09899468 0.27539478]
 [0.79442731 0.51397031 0.45329481 0.25515125 0.1139766  0.82431305]
 [0.3177535  0.15230703 0.21497959 0.91211032 0.04311515 0.37595241]
 [0.31796557 0.35403302 0.93335757 0.3885452  0.89593944 0.14550322]]


In [None]:
# split the array vertically into n evenly spaced chunks
varr1, varr2 = np.vsplit(arr, 2) # split into 2 arrays
print('varr1:\n', varr1)
print('\nvarr2:\n', varr2)

arr1:
 [[0.88594794 0.07791236 0.97964616 0.24767146 0.75288472 0.52667564]
 [0.90755375 0.8840703  0.08926896 0.5173446  0.34362129 0.21229369]
 [0.36067344 0.27077517 0.76162502 0.4780419  0.09899468 0.27539478]]

arr2:
 [[0.79442731 0.51397031 0.45329481 0.25515125 0.1139766  0.82431305]
 [0.3177535  0.15230703 0.21497959 0.91211032 0.04311515 0.37595241]
 [0.31796557 0.35403302 0.93335757 0.3885452  0.89593944 0.14550322]]


In [None]:
# split the array horizontally into n evenly spaced chunks
harr1, harr2 = np.hsplit(arr, 2) # split into 2 arrays
print('harr1:\n', harr1)
print('\nharr2:\n', harr2)

harr1:
 [[0.88594794 0.07791236 0.97964616]
 [0.90755375 0.8840703  0.08926896]
 [0.36067344 0.27077517 0.76162502]
 [0.79442731 0.51397031 0.45329481]
 [0.3177535  0.15230703 0.21497959]
 [0.31796557 0.35403302 0.93335757]]

harr2:
 [[0.24767146 0.75288472 0.52667564]
 [0.5173446  0.34362129 0.21229369]
 [0.4780419  0.09899468 0.27539478]
 [0.25515125 0.1139766  0.82431305]
 [0.91211032 0.04311515 0.37595241]
 [0.3885452  0.89593944 0.14550322]]


# Exercises
Combining datasets. Merging precipitation records with streamflow observationsâ€”is where NumPy's joining and splitting functions shine. These exercises simulate real-world data "wrangling" for hydrological modeling.

## Exercise 1: Merging Regional Rain Gauges (vstack, hstack)

**Goal:** Learn to combine data from different stations into a single matrix.

**Scenario:** You have two automated rain gauges in a watershed. Over a 5-hour storm, they record the following rainfall (mm):Station A: [2.1, 5.4, 0.0, 1.2, 4.4] Station B: [1.8, 4.9, 0.2, 0.8, 3.9]

**Task:** 
* Stack Vertically: Use np.vstack() to create a $2 \times 5$ matrix where each row represents a station.
* Stack Horizontally: Use np.hstack() to create a single 1D array of 10 values, representing the total chronological sequence of Station A followed by Station B.
* Dimension Check: Print the .shape of both results to see how the stacking changed the structure.

In [8]:
Station_A = np.array([2.1, 5.4, 0.0, 1.2, 4.4])
Station_B = np.array([1.8, 4.9, 0.2, 0.8, 3.9])
data_vertical = np.vstack((Station_A, Station_B))
print('Vertical:', data_vertical.shape)
data_horizontal = np.hstack((Station_A, Station_B))
print('Horizontal:',data_horizontal.shape)

Vertical: (2, 5)
Horizontal: (10,)


## Exercise 2: Connecting Drivers to Responses (concatenate)

**Goal:** Use np.concatenate to build a multi-feature dataset for analysis.

**Scenario:** To train a flood prediction model, you need to align your "input" (Precipitation) with your "output" (Streamflow).
```
precip = np.array([[10.2], [15.5], [2.1]]) (3 hours of rain)
flow = np.array([[45.0], [62.3], [58.1]]) (3 hours of flow)
```

**Task:**
* Concatenate: Join these two arrays along Axis 1 so that you have a $3 \times 2$ matrix. Each row should contain [precipitation, streamflow].
* Append Data: You just received a 4th hour of data: 
```
new_data = np.array([[0.0, 50.5]])
```
Concatenate this to your $3 \times 2$ matrix along Axis 0 to result in a $4 \times 2$ matrix.

In [9]:
precip = np.array([[10.2], [15.5], [2.1]]) #(3 hours of rain)
flow = np.array([[45.0], [62.3], [58.1]]) #(3 hours of flow)

# join these arrays along axis 1 to create a 3x2 array using np.concatenate
data = np.concatenate((precip, flow), axis=1)
print('Data:\n', data)

new_data = np.array([[0.0,50.5]]) # new data for a 4th hour
data = np.concatenate((data, new_data), axis=0)
print('Data with new hour:\n', data)

Data:
 [[10.2 45. ]
 [15.5 62.3]
 [ 2.1 58.1]]
Data with new hour:
 [[10.2 45. ]
 [15.5 62.3]
 [ 2.1 58.1]
 [ 0.  50.5]]


There are two additional modules to explore at your leisure. 
