#### Pandas Part 74: Boolean Data Types and Index Objects

This notebook explores boolean data types with missing values and various Index object methods.

In [None]:
import pandas as pd
import numpy as np

##### 1. Boolean Data with Missing Values

The boolean dtype (with the alias "boolean") provides support for storing boolean data (True, False values) with missing values, which is not possible with a standard NumPy boolean array.

### Creating BooleanArray

You can create a BooleanArray using `pd.array()` with dtype="boolean".

In [None]:
# Create a BooleanArray
bool_array = pd.array([True, False, None], dtype="boolean")
print(f"BooleanArray: {bool_array}")
print(f"Type: {type(bool_array)}")
print(f"Dtype: {bool_array.dtype}")

### BooleanArray vs NumPy Boolean Array

Let's compare BooleanArray with a standard NumPy boolean array.

In [None]:
# Create a NumPy boolean array
try:
    np_bool_array = np.array([True, False, None], dtype=bool)
    print(f"NumPy boolean array: {np_bool_array}")
except Exception as e:
    print(f"Error: {e}")
    
# NumPy converts None to False
np_bool_array = np.array([True, False, None])
print(f"NumPy array with None: {np_bool_array}")
print(f"Dtype: {np_bool_array.dtype}")

# Convert to boolean
np_bool_array = np_bool_array.astype(bool)
print(f"Converted to boolean: {np_bool_array}")
print(f"Dtype: {np_bool_array.dtype}")

### Creating Series with Boolean Dtype

In [None]:
# Create a Series with boolean dtype
s = pd.Series([True, False, None], dtype="boolean")
print(s)
print(f"Dtype: {s.dtype}")

### Kleene Logic for Boolean Operations

BooleanArray implements Kleene logic (three-value logic) for logical operations.

In [None]:
# Create two Series with boolean dtype
s1 = pd.Series([True, False, True, None], dtype="boolean")
s2 = pd.Series([True, False, None, None], dtype="boolean")
print("s1:")
print(s1)
print("\ns2:")
print(s2)

In [None]:
# Logical operations with Kleene logic
print("s1 & s2 (AND):")
print(s1 & s2)

print("\ns1 | s2 (OR):")
print(s1 | s2)

print("\n~s1 (NOT):")
print(~s1)

### Kleene Logic Truth Tables

Let's demonstrate the truth tables for Kleene logic.

In [None]:
# Create all possible combinations for AND operation
values = [True, False, None]
print("AND Truth Table (Kleene Logic):")
print("   | True  | False | None  ")
print("---+-------+-------+-------")
for a in values:
    row = f"{a} | "
    for b in values:
        result = pd.array([a], dtype="boolean") & pd.array([b], dtype="boolean")
        row += f"{result[0]:<5} | "
    print(row)

In [None]:
# Create all possible combinations for OR operation
print("OR Truth Table (Kleene Logic):")
print("   | True  | False | None  ")
print("---+-------+-------+-------")
for a in values:
    row = f"{a} | "
    for b in values:
        result = pd.array([a], dtype="boolean") | pd.array([b], dtype="boolean")
        row += f"{result[0]:<5} | "
    print(row)

### Comparison Operations with BooleanArray

In [None]:
# Create a BooleanArray
bool_arr = pd.array([True, False, None], dtype="boolean")
print(f"BooleanArray: {bool_arr}")

# Compare with a boolean
result = bool_arr == True
print(f"Result of bool_arr == True: {result}")
print(f"Result type: {type(result)}")
print(f"Result dtype: {result.dtype}")

##### 2. Index Objects

Index objects are immutable arrays that implement various methods for data indexing and alignment.

### Creating Index Objects

In [None]:
# Create an Index
idx = pd.Index(['a', 'b', 'c'])
print(f"Index: {idx}")
print(f"Type: {type(idx)}")
print(f"Dtype: {idx.dtype}")

### get_level_values Method

The `get_level_values` method returns an Index of values for the requested level. This is primarily useful for MultiIndex, but is also available on Index for compatibility.

In [None]:
# Get level values
level_values = idx.get_level_values(0)
print(f"Level values: {level_values}")

### get_loc Method

The `get_loc` method gets the integer location, slice, or boolean mask for the requested label.

In [None]:
# Get location for a unique index
unique_index = pd.Index(['a', 'b', 'c'])
loc = unique_index.get_loc('b')
print(f"Location of 'b' in unique_index: {loc}")

In [None]:
# Get location for a monotonic index with duplicates
monotonic_index = pd.Index(['a', 'b', 'b', 'c'])
loc = monotonic_index.get_loc('b')
print(f"Location of 'b' in monotonic_index: {loc}")
print(f"Type of location: {type(loc)}")

In [None]:
# Get location for a non-monotonic index with duplicates
non_monotonic_index = pd.Index(['a', 'b', 'c', 'b'])
loc = non_monotonic_index.get_loc('b')
print(f"Location of 'b' in non_monotonic_index: {loc}")
print(f"Type of location: {type(loc)}")

### get_loc with Method Parameter

The `get_loc` method can use different methods for inexact matches.

In [None]:
# Create a numeric index
num_index = pd.Index([1, 3, 5, 7, 9])
print(f"Numeric index: {num_index}")

# Exact match
try:
    loc = num_index.get_loc(4)
    print(f"Location of 4: {loc}")
except KeyError as e:
    print(f"Error: {e}")

In [None]:
# Using 'pad'/'ffill' method (find the previous index value)
loc = num_index.get_loc(4, method='pad')
print(f"Location of 4 using 'pad' method: {loc}")
print(f"Value at this location: {num_index[loc]}")

# Using 'backfill'/'bfill' method (use next index value)
loc = num_index.get_loc(4, method='backfill')
print(f"\nLocation of 4 using 'backfill' method: {loc}")
print(f"Value at this location: {num_index[loc]}")

# Using 'nearest' method
loc = num_index.get_loc(4, method='nearest')
print(f"\nLocation of 4 using 'nearest' method: {loc}")
print(f"Value at this location: {num_index[loc]}")

# Using 'nearest' method with a tie
loc = num_index.get_loc(6, method='nearest')
print(f"\nLocation of 6 using 'nearest' method: {loc}")
print(f"Value at this location: {num_index[loc]}")

### get_loc with Tolerance Parameter

The `tolerance` parameter specifies the maximum distance from the index value for inexact matches.

In [None]:
# Using 'pad' method with tolerance
try:
    loc = num_index.get_loc(4, method='pad', tolerance=0.5)
    print(f"Location of 4 using 'pad' method with tolerance 0.5: {loc}")
except KeyError as e:
    print(f"Error: {e}")

# Using 'pad' method with larger tolerance
loc = num_index.get_loc(4, method='pad', tolerance=1.0)
print(f"Location of 4 using 'pad' method with tolerance 1.0: {loc}")
print(f"Value at this location: {num_index[loc]}")

### get_slice_bound Method

The `get_slice_bound` method calculates the slice bound that corresponds to a given label.

In [None]:
# Create an index
idx = pd.Index(['a', 'b', 'c', 'd', 'e'])
print(f"Index: {idx}")

# Get left slice bound
left_bound = idx.get_slice_bound('c', side='left', kind='loc')
print(f"Left slice bound for 'c': {left_bound}")

# Get right slice bound
right_bound = idx.get_slice_bound('c', side='right', kind='loc')
print(f"Right slice bound for 'c': {right_bound}")

# Create a slice using these bounds
sliced_idx = idx[left_bound:right_bound]
print(f"Sliced index: {sliced_idx}")

### get_value Method

The `get_value` method provides a fast lookup of a value from a 1-dimensional ndarray.

In [None]:
# Create a Series
s = pd.Series([10, 20, 30], index=['a', 'b', 'c'])
print(s)

# Get value using get_value
value = s.index.get_value(s, 'b')
print(f"\nValue for 'b': {value}")