# Pandas Part 75: More Index Methods

This notebook explores additional methods available on pandas Index objects.

In [None]:
import pandas as pd
import numpy as np

## 1. Index Groupby Method

The `groupby` method groups the index labels by a given array of values.

In [None]:
# Create an Index
idx = pd.Index(['apple', 'banana', 'cherry', 'date', 'elderberry'])
print(f"Index: {idx}")

# Group by first letter
groups = idx.groupby([x[0] for x in idx])
print("\nGroups by first letter:")
for group_name, group_indices in groups.items():
    print(f"Group '{group_name}': {idx[group_indices]}")

In [None]:
# Group by length of string
groups = idx.groupby([len(x) for x in idx])
print("Groups by string length:")
for group_name, group_indices in groups.items():
    print(f"Group {group_name}: {idx[group_indices]}")

## 2. holds_integer Method

The `holds_integer` method checks whether the index type is an integer type.

In [None]:
# Create different types of indices
int_idx = pd.Index([1, 2, 3, 4, 5])
float_idx = pd.Index([1.0, 2.0, 3.0, 4.0, 5.0])
str_idx = pd.Index(['a', 'b', 'c', 'd', 'e'])

print(f"Integer index: {int_idx}")
print(f"holds_integer: {int_idx.holds_integer()}")

print(f"\nFloat index: {float_idx}")
print(f"holds_integer: {float_idx.holds_integer()}")

print(f"\nString index: {str_idx}")
print(f"holds_integer: {str_idx.holds_integer()}")

## 3. identical Method

The `identical` method checks if two Index objects have equal elements and the same type.

In [None]:
# Create indices
idx1 = pd.Index([1, 2, 3])
idx2 = pd.Index([1, 2, 3])
idx3 = pd.Index([1, 2, 3], name='numbers')
idx4 = pd.Index([1.0, 2.0, 3.0])

print(f"idx1: {idx1}, type: {type(idx1)}")
print(f"idx2: {idx2}, type: {type(idx2)}")
print(f"idx3: {idx3}, type: {type(idx3)}")
print(f"idx4: {idx4}, type: {type(idx4)}")

# Check if indices are identical
print(f"\nidx1.identical(idx2): {idx1.identical(idx2)}")
print(f"idx1.identical(idx3): {idx1.identical(idx3)}")
print(f"idx1.identical(idx4): {idx1.identical(idx4)}")

## 4. insert Method

The `insert` method creates a new Index by inserting a new item at a specified location.

In [None]:
# Create an Index
idx = pd.Index(['a', 'b', 'd', 'e'])
print(f"Original index: {idx}")

# Insert an item
new_idx = idx.insert(2, 'c')
print(f"After inserting 'c' at position 2: {new_idx}")

# Insert at the beginning
new_idx = idx.insert(0, 'start')
print(f"After inserting 'start' at position 0: {new_idx}")

# Insert at the end
new_idx = idx.insert(len(idx), 'end')
print(f"After inserting 'end' at the end: {new_idx}")

# Insert with negative index
new_idx = idx.insert(-1, 'before_last')
print(f"After inserting 'before_last' at position -1: {new_idx}")

## 5. intersection Method

The `intersection` method forms the intersection of two Index objects, returning a new Index with elements common to both.

In [None]:
# Create two indices
idx1 = pd.Index([1, 2, 3, 4])
idx2 = pd.Index([3, 4, 5, 6])
print(f"idx1: {idx1}")
print(f"idx2: {idx2}")

# Find the intersection
intersection = idx1.intersection(idx2)
print(f"\nIntersection: {intersection}")

In [None]:
# Create indices with different types
idx1 = pd.Index(['a', 'b', 'c', 'd'])
idx2 = pd.Index(['c', 'd', 'e', 'f'])
print(f"idx1: {idx1}")
print(f"idx2: {idx2}")

# Find the intersection
intersection = idx1.intersection(idx2)
print(f"\nIntersection: {intersection}")

In [None]:
# Intersection with sort parameter
idx1 = pd.Index([5, 3, 1, 4, 2])
idx2 = pd.Index([4, 3, 6, 5])
print(f"idx1: {idx1}")
print(f"idx2: {idx2}")

# Default: sort=False
intersection = idx1.intersection(idx2)
print(f"\nIntersection (sort=False): {intersection}")

# With sort=None
intersection = idx1.intersection(idx2, sort=None)
print(f"Intersection (sort=None): {intersection}")

## 6. is_ Method

The `is_` method provides a more flexible, faster check like the Python `is` operator but works through views.

In [None]:
# Create an Index
idx1 = pd.Index([1, 2, 3])
print(f"idx1: {idx1}")

# Create a reference to the same Index
idx2 = idx1
print(f"idx2 (reference to idx1): {idx2}")

# Create a copy of the Index
idx3 = idx1.copy()
print(f"idx3 (copy of idx1): {idx3}")

# Check using is_
print(f"\nidx1.is_(idx2): {idx1.is_(idx2)}")
print(f"idx1.is_(idx3): {idx1.is_(idx3)}")

# Compare with Python's is operator
print(f"idx1 is idx2: {idx1 is idx2}")
print(f"idx1 is idx3: {idx1 is idx3}")

## 7. is_categorical Method

The `is_categorical` method checks if the Index holds categorical data.

In [None]:
# Create a regular Index
idx1 = pd.Index(["Watermelon", "Orange", "Apple", "Watermelon"])
print(f"Regular index: {idx1}")
print(f"is_categorical: {idx1.is_categorical()}")

# Convert to categorical
idx2 = pd.Index(["Watermelon", "Orange", "Apple", "Watermelon"]).astype("category")
print(f"\nCategorical index: {idx2}")
print(f"is_categorical: {idx2.is_categorical()}")

# Create a numeric Index
idx3 = pd.Index([1, 3, 5, 7])
print(f"\nNumeric index: {idx3}")
print(f"is_categorical: {idx3.is_categorical()}")

## 8. to_numpy Method

The `to_numpy` method converts the Index to a NumPy array.

In [None]:
# Create different types of indices
int_idx = pd.Index([1, 2, 3, 4, 5])
float_idx = pd.Index([1.0, 2.0, 3.0, 4.0, 5.0])
str_idx = pd.Index(['a', 'b', 'c', 'd', 'e'])
cat_idx = pd.Index(['a', 'b', 'a', 'c']).astype('category')

# Convert to NumPy arrays
int_array = int_idx.to_numpy()
print(f"Integer index to NumPy: {int_array}")
print(f"Type: {type(int_array)}, dtype: {int_array.dtype}")

float_array = float_idx.to_numpy()
print(f"\nFloat index to NumPy: {float_array}")
print(f"Type: {type(float_array)}, dtype: {float_array.dtype}")

str_array = str_idx.to_numpy()
print(f"\nString index to NumPy: {str_array}")
print(f"Type: {type(str_array)}, dtype: {str_array.dtype}")

cat_array = cat_idx.to_numpy()
print(f"\nCategorical index to NumPy: {cat_array}")
print(f"Type: {type(cat_array)}, dtype: {cat_array.dtype}")

### to_numpy with copy Parameter

In [None]:
# Create an Index
idx = pd.Index([1, 2, 3, 4, 5])

# Convert to NumPy array with copy=False (default)
array1 = idx.to_numpy(copy=False)
print(f"Array with copy=False: {array1}")

# Convert to NumPy array with copy=True
array2 = idx.to_numpy(copy=True)
print(f"Array with copy=True: {array2}")

# Check if they're the same object
print(f"\narray1 is array2: {array1 is array2}")

### to_numpy with na_value Parameter

In [None]:
# Create an Index with missing values
idx = pd.Index([1, 2, None, 4, 5])
print(f"Index with None: {idx}")

# Convert to NumPy array with default na_value
array1 = idx.to_numpy()
print(f"\nArray with default na_value: {array1}")

# Convert to NumPy array with custom na_value
array2 = idx.to_numpy(na_value=-999)
print(f"Array with na_value=-999: {array2}")

### to_numpy with Different dtypes

In [None]:
# Create a datetime Index
date_idx = pd.date_range('2023-01-01', periods=3, tz="CET")
print(f"Datetime index: {date_idx}")

# Convert to NumPy array with default dtype
array1 = date_idx.to_numpy()
print(f"\nArray with default dtype: {array1}")
print(f"Type: {type(array1)}, dtype: {array1.dtype}")

# Convert to NumPy array with object dtype
array2 = date_idx.to_numpy(dtype=object)
print(f"\nArray with dtype=object: {array2}")
print(f"Type: {type(array2)}, dtype: {array2.dtype}")
print(f"First element type: {type(array2[0])}")

# Convert to NumPy array with datetime64[ns] dtype
array3 = date_idx.to_numpy(dtype="datetime64[ns]")
print(f"\nArray with dtype=datetime64[ns]: {array3}")
print(f"Type: {type(array3)}, dtype: {array3.dtype}")