In [None]:
"""
tutorial.py

An introductory Python script demonstrating:
- Basic Python syntax
- Reading and manipulating data with pandas
- Creating scatterplots with matplotlib

Assumes a file named 'penguins.csv' exists in the current directory.
"""

# -------------------------
# 1. Basic Python concepts
# -------------------------

# Variables
message = "Welcome to Python data analysis!"
year = 2026
print(message)
print("Year:", year)

In [None]:
# Lists
numbers = [1, 2, 3, 4, 5]
print("Numbers:", numbers)
print("First number:", numbers[0])

In [None]:
# Dictionaries
penguin_info = {
    "species": "Adelie",
    "island": "Torgersen",
    "body_mass_g": 3750
}
print("Penguin info:", penguin_info)

In [None]:
# Loops
print("\nLooping over numbers:")
for n in numbers:
    print(n * 2)

In [None]:
# Functions
def kilograms_from_grams(grams):
    """Convert grams to kilograms."""
    return grams / 1000

print("3750 g in kg:", kilograms_from_grams(3750))

In [None]:
# -------------------------
# 2. Import libraries
# -------------------------

import pandas as pd

# Importing libraries doesn't produce output, unless there's an error

In [None]:
import WilliamDoane as wd # expected to produce an error!

In [None]:
# -------------------------
# 3. Read data with pandas
# -------------------------

# Read the CSV file into a DataFrame
df = pd.read_csv("data/penguins.csv")

# Look at the first few rows
print("\nFirst 5 rows of the dataset:")
print(df.head())

In [None]:
# Basic information about the dataset
print("\nDataset info:")
print(df.info())

In [None]:
# Summary statistics for numeric columns
print("\nSummary statistics:")
print(df.describe())

In [None]:
# -------------------------
# 4. Data manipulation
# -------------------------

# Select specific columns
measurements = df[
    ["species", "bill_length_mm", "bill_depth_mm", "flipper_length_mm", "body_mass_g"]
]
print(measurements)

In [None]:
# Drop rows with missing values
measurements_clean = measurements.dropna()

print("\nRows before cleaning:", len(measurements))
print("Rows after cleaning:", len(measurements_clean))

In [None]:
# Create a new column
measurements_clean["body_mass_kg"] = (
    measurements_clean["body_mass_g"] / 1000
)
print(measurements_clean)

In [None]:
# Filter data
adelie_penguins = measurements_clean[
    measurements_clean["species"] == "Adelie"
]

print("\nNumber of Adelie penguins:", len(adelie_penguins))

In [None]:
# -------------------------
# 5. Basic plotting
# -------------------------

import matplotlib.pyplot as plt

# Scatterplot: flipper length vs body mass
plt.figure()
plt.scatter(
    measurements_clean["flipper_length_mm"],
    measurements_clean["body_mass_g"]
)
plt.xlabel("Flipper length (mm)")
plt.ylabel("Body mass (g)")
plt.title("Flipper Length vs Body Mass (All Penguins)")
plt.show()

In [None]:
# Scatterplot by species (simple example)
plt.figure()

for species in measurements_clean["species"].unique():
    subset = measurements_clean[measurements_clean["species"] == species]
    plt.scatter(
        subset["bill_length_mm"],
        subset["bill_depth_mm"],
        label=species
    )

plt.xlabel("Bill length (mm)")
plt.ylabel("Bill depth (mm)")
plt.title("Bill Dimensions by Species")
plt.legend()
plt.show()

In [None]:
# -------------------------
# 6. Wrap-up
# -------------------------

print("\nAnalysis complete!")
print("We loaded data, cleaned it, explored it, and made scatterplots.")