In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Indexing and slicing arrays

## Slicing and indexing trees

In [4]:
tree_census = np.load("data/tree_census.npy")

In [5]:
tree_census

array([[     3, 501451,     24,      0],
       [     4, 501451,     20,      0],
       [     7, 501911,      3,      0],
       ...,
       [  1198, 227387,     11,      0],
       [  1199, 227387,     11,      0],
       [  1210, 227386,      6,      0]], dtype=int64)

In [10]:
# Select the tenth block ID from block_ids
tenth_block_id = block_ids[9]

print(tenth_block_id)

501911


In [12]:
# Select five block IDs from block_ids starting with the tenth ID
block_id_slice = block_ids[9:14]

print(block_id_slice)

[501911 501911 501911 501909 501909]


## Stepping into 2D

In [14]:
tree_census.shape

(1000, 4)

In [15]:
# Create an array of the first 100 trunk diameters from tree_census
hundred_diameters = tree_census[99]
print(hundred_diameters)

[   134 503178      4      0]


In [18]:
hundred_diameters = tree_census[50:101,2]
print(hundred_diameters)

[ 5  4  5  8 51  7  4 15  3  8  6  6  3  4  3  2  3  3  6  5  5  5  5  9
  4  4  7  7  6  5  4  4  5  5  5  7  3  5  3  3  6  6  8  7  4  5  4  4
  4  4  6]


In [19]:
# Create an array of trunk diameters with even row indices from 50 to 100 inclusive
every_other_diameter = tree_census[50:101:2, 2]
print(every_other_diameter)

[ 5  5 51  4  3  6  3  3  3  6  5  5  4  7  6  4  5  5  3  3  6  8  4  4
  4  6]


## Sorting trees

In [20]:
tree_census

array([[     3, 501451,     24,      0],
       [     4, 501451,     20,      0],
       [     7, 501911,      3,      0],
       ...,
       [  1198, 227387,     11,      0],
       [  1199, 227387,     11,      0],
       [  1210, 227386,      6,      0]], dtype=int64)

In [21]:
# Extract trunk diameters information and sort from smallest to largest
sorted_trunk_diameters = np.sort(tree_census[:,2])

print(sorted_trunk_diameters)

[ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  0  0  0  0  1  1  1  1  1  1  2  2  2  2  2  2
  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2
  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2  2
  2  2  2  2  2  2  2  2  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3
  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3
  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3
  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  3  4  4  4  4
  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4
  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4
  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4
  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4  4
  4  4  4  4  4  4  4  4  5  5  5  5  5  5  5  5  5  5  5  5  5  5  5  5
  5  5  5  5  5  5  5  5  5  5  5  5  5  5  5  5  5

# Filtering arrays

## Filtering with masks

In [26]:
np.set_printoptions(threshold=np.inf)

In [30]:
tree_census

array([[     3, 501451,     24,      0],
       [     4, 501451,     20,      0],
       [     7, 501911,      3,      0],
       [     8, 501911,      3,      0],
       [     9, 501911,      4,      0],
       [    10, 501911,      4,      0],
       [    11, 501911,      4,      0],
       [    12, 501911,      4,      0],
       [    13, 501911,      4,      0],
       [    14, 501911,      3,      0],
       [    15, 501911,      3,      0],
       [    16, 501911,      4,      0],
       [    17, 501909,      2,      0],
       [    18, 501909,      2,      0],
       [    19, 501909,      3,      0],
       [    20, 501909,      4,      0],
       [    21, 501909,      4,      0],
       [    22, 501909,      4,      0],
       [    23, 501909,      0,      3],
       [    26, 501967,     14,      0],
       [    27, 501967,      3,      0],
       [    28, 501967,      4,      0],
       [    29, 501134,      7,      0],
       [    30, 501134,      8,      0],
       [    31, 

In [32]:
# Create an array which contains row data on the largest tree in tree_census
largest_tree_data = tree_census[tree_census[:,2] == 51]

print(largest_tree_data)

[[    61 501882     51      0]]


In [33]:
# Slice largest_tree_data to get only the block id
largest_tree_block_id = largest_tree_data[:,1]
print(largest_tree_block_id)

[501882]


In [35]:
# Create an array which contains row data on all trees with largest_tree_block_id
trees_on_largest_tree_block = tree_census[tree_census[:,1] == largest_tree_block_id]

print(trees_on_largest_tree_block)

[[    60 501882      8      0]
 [    61 501882     51      0]
 [    62 501882      7      0]
 [    63 501882      4      0]
 [    64 501882     15      0]
 [    65 501882      3      0]
 [    66 501882      8      0]
 [    67 501882      6      0]
 [    68 501882      6      0]
 [    69 501882      3      0]]


## Fancy indexing vs. np.where()

In [36]:
# Create the block_313879 array containing trees on block 313879
block_313879 = tree_census[tree_census[:,1] == 313879]

print(block_313879)

[[  1115 313879      3      0]
 [  1116 313879     17      0]]


In [37]:
# Create an array of row_indices for trees on block 313879
row_indices = np.where(tree_census[:,1] == 313879)

# Create an array which only contains data for trees on block 313879
block_313879 = tree_census[row_indices]
print(block_313879)

[[  1115 313879      3      0]
 [  1116 313879     17      0]]


## Creating arrays from conditions

In [39]:
# Create and print a 1D array of tree and stump diameters
trunk_stump_diameters = np.where(tree_census[:,2] == 0, tree_census[:,3], tree_census[:,2])

print(trunk_stump_diameters)

[24 20  3  3  4  4  4  4  4  3  3  4  2  2  3  4  4  4  3 14  3  4  7  8
  7  8  7  5  6  5  5 17 31 19 21 18  4  5  3  4  3  4 13 13 13  5  4  4
  4 11  5  4  5  8 51  7  4 15  3  8  6  6  3  4  3  2  3  3  6  5  5  5
  5  9  4  4  7  7  6  5  4  4  5  5  5  7  3  5  3  3  6  6  8  7  4  5
  4  4  4  4  6  5  3  4 12 12 12  5  6  6  6  6  6  5  5  6  7  7 25  5
  5  4  6  6  7 11  6 17 13 14 14 20 15 13  7  7 10 17 14  4  6  7  8  7
  7  6  7  5  2  2  2  2 26 25  2 15  6 20  5  9 15 13 15  3  2 13  6 12
 15 18 22 18 18 15 17  7  3  7  8  4 12 11 12  3  9 12 11 10  8  6  6  7
  7  3 15 12 12  4  5  5  5  4  4  5  4  9  2  4  4  6  5  5  2  5  5  4
  4  5  5  6 11  4  5  7  3 14 11 10  7 15 10  5  6 10 10  6  5  4  4  3
  5  4 14 12 11  8 14 12  9 12 11  7  8 10 10 12 11 12  5  5  6  9  9  8
  5  5  5  6  6 12 12 11 12  8  9  5  5  5  8  2  2  2 14 18 14 14 22 15
 19 14 18  7  7  7  8  8  5 10 14  2  2  2  2 11 12 12  3  3  3  3  3  6
  6  8  2  2 11 11 11  9 11 12 13  9 11  6  4  5  5

# Adding and removing data

## Adding rows

In [40]:
new_trees = np.array([[1211, 227386, 20, 0], [1212, 227386, 8, 0]])

In [41]:
new_trees

array([[  1211, 227386,     20,      0],
       [  1212, 227386,      8,      0]])

In [42]:
# Print the shapes of tree_census and new_trees
print(tree_census.shape, new_trees.shape)

(1000, 4) (2, 4)


In [46]:
# Add rows to tree_census which contain data for the new trees
updated_tree_census = np.concatenate((tree_census, new_trees), axis=0)
print(updated_tree_census)

[[     3 501451     24      0]
 [     4 501451     20      0]
 [     7 501911      3      0]
 [     8 501911      3      0]
 [     9 501911      4      0]
 [    10 501911      4      0]
 [    11 501911      4      0]
 [    12 501911      4      0]
 [    13 501911      4      0]
 [    14 501911      3      0]
 [    15 501911      3      0]
 [    16 501911      4      0]
 [    17 501909      2      0]
 [    18 501909      2      0]
 [    19 501909      3      0]
 [    20 501909      4      0]
 [    21 501909      4      0]
 [    22 501909      4      0]
 [    23 501909      0      3]
 [    26 501967     14      0]
 [    27 501967      3      0]
 [    28 501967      4      0]
 [    29 501134      7      0]
 [    30 501134      8      0]
 [    31 501134      7      0]
 [    32 501134      8      0]
 [    33 501134      7      0]
 [    34 501134      5      0]
 [    35 501134      6      0]
 [    36 501134      5      0]
 [    37 501966      5      0]
 [    38 501966     17      0]
 [    39

## Adding columns

In [50]:
# Print the shapes of tree_census and trunk_stump_diameters
print(trunk_stump_diameters.shape, tree_census.shape)

(1000,) (1000, 4)


In [49]:
# Reshape trunk_stump_diameters
reshaped_diameters = trunk_stump_diameters.reshape((1000, 1))
print(trunk_stump_diameters.shape)

(1000,)


In [51]:
# Concatenate reshaped_diameters to tree_census as the last column
concatenated_tree_census = np.concatenate((tree_census, reshaped_diameters), axis=1)
print(concatenated_tree_census)

[[     3 501451     24      0     24]
 [     4 501451     20      0     20]
 [     7 501911      3      0      3]
 [     8 501911      3      0      3]
 [     9 501911      4      0      4]
 [    10 501911      4      0      4]
 [    11 501911      4      0      4]
 [    12 501911      4      0      4]
 [    13 501911      4      0      4]
 [    14 501911      3      0      3]
 [    15 501911      3      0      3]
 [    16 501911      4      0      4]
 [    17 501909      2      0      2]
 [    18 501909      2      0      2]
 [    19 501909      3      0      3]
 [    20 501909      4      0      4]
 [    21 501909      4      0      4]
 [    22 501909      4      0      4]
 [    23 501909      0      3      3]
 [    26 501967     14      0     14]
 [    27 501967      3      0      3]
 [    28 501967      4      0      4]
 [    29 501134      7      0      7]
 [    30 501134      8      0      8]
 [    31 501134      7      0      7]
 [    32 501134      8      0      8]
 [    33 501

## Deleting with np.delete()

In [55]:
# Delete the stump diameter column from tree_census
tree_census_no_stumps = np.delete(tree_census, 3, axis=1)

# Save the indices of the trees on block 313879
private_block_indices = np.where(tree_census_no_stumps[:,1] == 313879)

print(tree_census_no_stumps.shape)

(1000, 3)


In [53]:
# Delete the rows for trees on block 313879 from tree_census_no_stumps
tree_census_clean = np.delete(tree_census_no_stumps, private_block_indices, axis=0)

# Print the shape of tree_census_clean
print(tree_census_clean.shape)

(998, 3)
