##### Indexing and Data Alignment

In [None]:
import pandas as pd
import numpy as np

##### Basic Indexing Operations
Different ways to select data

In [None]:
# Create sample DataFrame
df = pd.DataFrame({
    'one': pd.Series([1., 2., 3., np.nan]),
    'flag': pd.Series([False, False, True, False]),
    'foo': 'bar'
}, index=['a', 'b', 'c', 'd'])

print("Original DataFrame:")
print(df)

print("\nSelect row by label:")
print(df.loc['b'])

print("\nSelect row by integer location:")
print(df.iloc[2])

##### Data Alignment
Automatic alignment of DataFrame objects

In [None]:
# Create two DataFrames with different shapes
df1 = pd.DataFrame(np.random.randn(10, 4), columns=['A', 'B', 'C', 'D'])
df2 = pd.DataFrame(np.random.randn(7, 3), columns=['A', 'B', 'C'])

print("DataFrame 1:")
print(df1.head())
print("\nDataFrame 2:")
print(df2.head())

print("\nResult of addition (note NaN values):")
print((df1 + df2).head())

In [None]:
# Time series data alignment
index = pd.date_range('1/1/2000', periods=8)
df_time = pd.DataFrame(np.random.randn(8, 3), index=index, columns=list('ABC'))

print("Time series DataFrame:")
print(df_time)

print("\nSubtract column A using proper axis alignment:")
print(df_time.sub(df_time['A'], axis=0))

##### SQL-like Operations
Filtering and grouping data

In [None]:
# Load tips dataset
tips = pd.read_csv('data/tips.csv')
print("Tips dataset:")
print(tips.head())

In [None]:
# WHERE clause equivalent
print("Dinner tips:")
print(tips[tips['time'] == 'Dinner'].head())

print("\nDinner tips over $5.00:")
print(tips[(tips['time'] == 'Dinner') & (tips['tip'] > 5.00)])

In [None]:
# NULL checking
frame = pd.DataFrame({
    'col1': ['A', 'B', np.NaN, 'C', 'D'],
    'col2': ['F', np.NaN, 'G', 'H', 'I']
})

print("Original DataFrame:")
print(frame)

print("\nRows where col2 is NULL:")
print(frame[frame['col2'].isna()])

print("\nRows where col1 is NOT NULL:")
print(frame[frame['col1'].notna()])

In [None]:
# GROUP BY operations
print("Count of tips by sex:")
print(tips.groupby('sex').size())

print("\nAverage tip by sex and time:")
print(tips.groupby(['sex', 'time'])['tip'].mean())