In [None]:
import pandas as pd

# Read in file
df = pd.read_csv('data/wetlands_seasonal_bird_diversity.csv')

# Check the first 5 rows
df.head()

In [None]:
# Default plot(): one line plot per column with numeric data
df.plot()

# Line Plots

In [None]:
df.plot(x='x_values_column', y='y_values_column')

In [None]:
# Birds species registered during winter at CSM yearly
df.plot(x='year', y='CSM_winter')

In [None]:
df.plot(x='year', 
        y='CSM_winter',
        title='Bird species registered during winter at Carpinteria Salt Marsh',
        xlabel='Year',
        ylabel='Number of bird species',
        color='green',
        legend=False
        )

## Check-in

1. Plot a graph of the spring bird surveys at Mugu Lagoon with respect to the years. Include some basic customization.

2. Use the isna() method for pandas.Series and row selection to select the rows in which Mugu Lagoon has NAs during the spring survey.

### Multiple Line Plot

In [None]:
df.plot(x='year', 
        y=['TJE_spring', 'TJE_fall'],
        title = 'Seasonal bird surveys at Tijuana Estuary',
        xlabel='Year',
        ylabel='Number of bird species',        
        color = {'TJE_spring':'#F48FB1',
                 'TJE_fall': '#AB47BC'
                 }
        )

In [None]:
df.plot(x='year', 
        y=['TJE_spring', 'TJE_fall'],
        title = 'Seasonal bird surveys at Tijuana Estuary',
        xlabel='Year',
        ylabel='Number of bird species',        
        color = {'TJE_spring':'#F48FB1',
                 'TJE_fall': '#AB47BC'
                 },
        subplots=True
        )

### Updating the index

In [None]:
df = df.set_index(new_index)

In [None]:
# Set `column_name` column in df as the new index (reassignment)
df = df.set_index('column_name')

In [None]:
# Set `column_name` column in df as the new index (modify df in-place)
df.set_index('column_name', inplace=True)

#### Example

In [None]:
# Update index to be the year column
df = df.set_index('year')
df.head()

In [None]:
# Simple plot of Carpinteria Salt Marsh winter surveys
df.plot(y='CSM_winter')

In [None]:
df = df.reset_index()
df.head()

## Check-in
Without running the code, give a step-by-step breakdown of what this code is doing:
```
df.set_index('year').loc[:,'SDW_winter':'TJE_fall'].plot()
```

Is this code modifying the data frame df? Why or why not?

Run the code and examine the graph. Review the data description. Do we have all the necessary information to make sure it makes sense to directly compare the surveys at these different sites?

## Method Chaining

In [None]:
df.set_index('year').loc[:,'SDW_winter':'TJE_fall'].plot()

In [None]:
(df.set_index('year')
  .loc[:,'SDW_winter':'TJE_fall']
  .plot()
)

In [None]:
year_index_df = df.set_index('year')
subset_df = year_index_df.loc[:,'SDW_winter':'TJE_fall']
subset_df.plot()

### Data exploration

In [None]:
# Read in data
URL = 'https://raw.githubusercontent.com/allisonhorst/palmerpenguins/main/inst/extdata/penguins.csv'
penguins = pd.read_csv(URL)

penguins.head()

In [None]:
# Check column data types and NA values
penguins.info()

In [None]:
# Simple statistics about numeric columns
penguins.describe()

In [None]:
# Count unique values in categorical columns and year
penguins[['species', 'island', 'sex', 'year']].nunique()

In [None]:
# Get unique values in species column
penguins['species'].unique()

In [None]:
# Number of values per unique value in species column
penguins['species'].value_counts()

### `kind` arguement in `plot()`

## Scatter plot

In [None]:
penguins.plot(kind='scatter',
              x='flipper_length_mm', 
              y='body_mass_g')

In [None]:
penguins.plot(kind='scatter',
              x='flipper_length_mm', 
              y='body_mass_g',
              title='Flipper length and body mass for Palmer penguins',
              xlabel='Flipper length (mm)',
              ylabel='Body mass (g)',
              color='#ff3b01',
              alpha=0.4  # Controls transparency
              )

### Bar plots

In [None]:
smallest = penguins['body_mass_g'].nsmallest(10)
smallest

In [None]:
smallest.plot(kind='bar')

In [None]:
penguins.nsmallest(10, 'body_mass_g')

### Histograms

In [None]:
# Using plot without subsetting data - a mess again
penguins.plot(kind='hist')

In [None]:
# Distribution of flipper length measurements
# First select data, then plot
penguins['flipper_length_mm'].plot(kind='hist',
                                title='Penguin flipper lengths',
                                xlabel='Flipper length (mm)',
                                grid=True)

### Check-in
1. Select the bill_length_mm and bill_depth_mm columns in the penguins dataframe and then update the kind parameter to box to make boxplots of the bill length and bill depth.



### Check-in
2. Create a simple histogram of the flipper length of female gentoo penguins.