In [None]:
import pandas as pd

##### DataFrame Basics
A 2D data structure for different data types, similar to spreadsheets or SQL tables

In [None]:
# Create sample DataFrame
df = pd.DataFrame({
    "Name": ["Braund, Mr. Owen Harris",
             "Allen, Mr. William Henry",
             "Bonnell, Miss. Elizabeth"],
    "Age": [22, 35, 58],
    "Sex": ["male", "male", "female"]
})

In [None]:
# Display DataFrame structure
print("DataFrame:")
print(df)

print("\nDataFrame info:")
print(df.info())

##### Working with Series
Single columns from DataFrames are Series objects

In [None]:
# Extract Series from DataFrame
age_series = df["Age"]
print("Age Series from DataFrame:")
print(age_series)

In [None]:
# Create standalone Series
ages = pd.Series([22, 35, 58], name="Age")
print("Standalone Series:")
print(ages)

##### Series Truth Values and Gotchas

In [None]:
# Incorrect boolean operation
try:
    if pd.Series([False, True, False]):
        print("I was true")
except ValueError as e:
    print(f"Error: {e}")

In [None]:
# Correct boolean operations
s = pd.Series([False, True, False])
print("Is empty?", s.empty)
print("Any True?", s.any())
print("All True?", s.all())

##### Basic Statistics

In [None]:
# Maximum values
print("Max age (DataFrame):", df["Age"].max())
print("Max age (Series):", ages.max())

In [None]:
# Descriptive statistics
print("Statistics summary:")
print(df.describe())

##### Titanic Dataset Analysis

In [None]:
# Load dataset
titanic = pd.read_csv("data/titanic.csv")

# View first rows
print("First rows:")
print(titanic.head())

In [None]:
# Passenger class analysis
print("Unique classes:", titanic["Pclass"].unique())
print("\nClass is categorical, not numerical")

##### Grouping Operations

In [None]:
# Mean fare by groups
print("Mean fare by sex and class:")
print(titanic.groupby(["Sex", "Pclass"])["Fare"].mean())

In [None]:
# Counting methods
print("Using value_counts():")
print(titanic["Pclass"].value_counts())

print("\nUsing groupby():")
print(titanic.groupby("Pclass")["Pclass"].count())

In [None]:
# NaN handling
print("Size vs Count:")
print("With NaN:", titanic.groupby("Pclass").size())
print("Without NaN:", titanic.groupby("Pclass").count())

##### Air Quality Analysis

In [None]:
# Load data
air_quality = pd.read_csv("data/air_quality_long.csv")

print("First rows:")
print(air_quality.head())

In [None]:
# City analysis
print("Mean by city and parameter:")
print(air_quality.groupby(["city", "parameter"])["value"].mean())

In [None]:
# Station analysis
print("Measurements per station:")
print(air_quality["station"].value_counts())

print("\nMeasurements by parameter:")
print(air_quality["parameter"].value_counts())