##### Data Manipulation and Types

In [1]:
import pandas as pd
import numpy as np

##### Elementwise Functions
Apply functions to individual elements

In [2]:
# Create sample DataFrame
df = pd.DataFrame({
    'one': [1.39, 0.34, 0.69, np.nan],
    'two': [1.77, 1.91, 1.47, 0.27],
    'three': [np.nan, -0.05, 1.22, -0.61]
}, index=['a', 'b', 'c', 'd'])

print("Sample DataFrame:")
print(df)

Sample DataFrame:
    one   two  three
a  1.39  1.77    NaN
b  0.34  1.91  -0.05
c  0.69  1.47   1.22
d   NaN  0.27  -0.61


In [3]:
# Apply custom function to each element
def get_str_length(x):
    return len(str(x))

print("String lengths of 'one' column:")
print(df['one'].map(get_str_length))

print("\nString lengths of all elements:")
print(df.applymap(get_str_length))

String lengths of 'one' column:
a    4
b    4
c    4
d    3
Name: one, dtype: int64

String lengths of all elements:
   one  two  three
a    4    4      3
b    4    4      5
c    4    4      4
d    3    4      5


  print(df.applymap(get_str_length))


In [4]:
# Map values using another Series
s1 = pd.Series(['six', 'seven', 'six', 'seven', 'six'],
               index=['a', 'b', 'c', 'd', 'e'])
s2 = pd.Series({'six': 6., 'seven': 7.})

print("Original Series:")
print(s1)
print("\nMapped values:")
print(s1.map(s2))

Original Series:
a      six
b    seven
c      six
d    seven
e      six
dtype: object

Mapped values:
a    6.0
b    7.0
c    6.0
d    7.0
e    6.0
dtype: float64


##### Reindexing
Conform data to match new labels

In [5]:
# Create sample Series
s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])
print("Original Series:")
print(s)

print("\nReindexed Series:")
print(s.reindex(['e', 'b', 'f', 'd']))

Original Series:
a    0.293166
b   -0.410848
c   -0.067568
d    0.320194
e    0.640690
dtype: float64

Reindexed Series:
e    0.640690
b   -0.410848
f         NaN
d    0.320194
dtype: float64


In [6]:
# Reindex DataFrame (both index and columns)
print("Original DataFrame:")
print(df)

print("\nReindexed DataFrame:")
print(df.reindex(index=['c', 'f', 'b'], columns=['three', 'two', 'one']))

Original DataFrame:
    one   two  three
a  1.39  1.77    NaN
b  0.34  1.91  -0.05
c  0.69  1.47   1.22
d   NaN  0.27  -0.61

Reindexed DataFrame:
   three   two   one
c   1.22  1.47  0.69
f    NaN   NaN   NaN
b  -0.05  1.91  0.34


##### Sorting
Sort by values or index

In [7]:
# Create sample DataFrame with MultiIndex
df_multi = pd.DataFrame({
    ('a', 'one'): [2, 1, 1, 1],
    ('a', 'two'): [1, 2, 3, 4],
    ('b', 'three'): [5, 3, 4, 2]
})

print("DataFrame with MultiIndex columns:")
print(df_multi)

DataFrame with MultiIndex columns:
    a         b
  one two three
0   2   1     5
1   1   2     3
2   1   3     4
3   1   4     2


In [8]:
# Sort by MultiIndex column
print("Sorted by ('a', 'two'):")
print(df_multi.sort_values(by=('a', 'two')))

Sorted by ('a', 'two'):
    a         b
  one two three
0   2   1     5
1   1   2     3
2   1   3     4
3   1   4     2


##### Data Types
Understanding pandas dtypes

In [9]:
# Create DataFrame with different types
df_types = pd.DataFrame({
    'integers': [1, 2, 3],
    'floats': [1.1, 2.2, 3.3],
    'strings': ['a', 'b', 'c'],
    'booleans': [True, False, True],
    'dates': pd.date_range('20250219', periods=3)
})

print("DataFrame with different types:")
print(df_types)
print("\nData types of each column:")
print(df_types.dtypes)

DataFrame with different types:
   integers  floats strings  booleans      dates
0         1     1.1       a      True 2025-02-19
1         2     2.2       b     False 2025-02-20
2         3     3.3       c      True 2025-02-21

Data types of each column:
integers             int64
floats             float64
strings             object
booleans              bool
dates       datetime64[ns]
dtype: object


In [10]:
# Create Series with categorical data
cat_series = pd.Series(['a', 'b', 'a', 'c'], dtype='category')
print("Categorical Series:")
print(cat_series)
print("\nCategories:")
print(cat_series.cat.categories)

Categorical Series:
0    a
1    b
2    a
3    c
dtype: category
Categories (3, object): ['a', 'b', 'c']

Categories:
Index(['a', 'b', 'c'], dtype='object')


In [11]:
# String operations with StringDtype
str_series = pd.Series(['apple', 'banana', 'cherry'], dtype='string')
print("String Series:")
print(str_series)
print("\nUppercase:")
print(str_series.str.upper())

String Series:
0     apple
1    banana
2    cherry
dtype: string

Uppercase:
0     APPLE
1    BANANA
2    CHERRY
dtype: string
