Time to create some visualizations with Plotly!. We're still using the [Aerial Waterfowl Survey Data](https://data.delaware.gov/Energy-and-Environment/Aerial-Waterfowl-Survey-Data/bxyv-7mgn). 


import plotly.offline as offline
import plotly.graph_objs as go

offline.init_notebook_mode()

offline.iplot({'data': [{'y': [4, 2, 3, 4]}], 
               'layout': {'title': 'Test Plot', 
                          'font': dict(size=16)}})

In [None]:
# We're importing the Plotly libraries for the first time
import pandas as pd

import plotly.offline as offline
import plotly.graph_objs as go

# Run this in offline mode
offline.init_notebook_mode()

offline.iplot({'data': [{'y': [4, 2, 3, 4]}], 
               'layout': {'title': 'Test Plot', 
                          'font': dict(size=16)}})

In [None]:
# Run the transformations from the the first (01) file to make sure everyone is 
# at the same place.
url = "https://data.delaware.gov/api/views/bxyv-7mgn/rows.csv?accessType=DOWNLOAD"
waterfowl_df = pd.read_csv(url)
waterfowl_df_january = waterfowl_df[waterfowl_df['Month']=='January']
waterfowl_df_january_sub = waterfowl_df_january[waterfowl_df_january['Time Period']!='Late']

# Run below to check table
#waterfowl_df_january_sub.groupby('Year').count()

In [None]:
#Look at the first few rows of data. Compare to the data on the data portal!
waterfowl_df_january_sub.head()

In [None]:
# Look at the last few rows:
waterfowl_df_january_sub.tail()

In [None]:
# Pandas has a handy describe() function
# count tells the number of values that column has (some columns can be NaN (Not a Number))
# Look at the mean, median (50%) and max
waterfowl_df_january_sub.describe()

In [None]:
# Check the sums again. Remember, this will be for january of each year
waterfowl_df_january_sub.groupby('Year').sum()

In [None]:
# Let's look at just 1979
waterfowl_df_january_sub[waterfowl_df_january_sub.Year==1979].groupby('Year').sum()

In [None]:
# Compare the above to this one.
waterfowl_df_january_sub.groupby('Year').sum()[4:5]

In [None]:
# ***********************
# Why do you think they are the same? (Hint: Look at the table that sums all years.)
# Which do you think is eaiser to read?
# Copy and paste your favorite of the two and assign to the variable below
waterfowl_1979 = waterfowl_df_january_sub.groupby('Year').sum()[4:5]

# Print the variable
waterfowl_1979

In [None]:
# We need just the bird column names. First, print all
waterfowl_1979.columns

In [None]:
# Why isn't 'Year' a column?
# Remember the groupBy we used?

# Check the dataframe's index:
waterfowl_1979.index

In [None]:
# **********************************

# There's the year!

# Get bird names, we only need to skip the first column
birds = waterfowl_1979.columns[1:]

# Explore the first bird in the list
bird = birds[0]
print('bird:', bird)
print('full:', waterfowl_1979[bird])
print('values:', waterfowl_1979[bird].values)
print('first value:', waterfowl_1979[bird].values[0])
print('Set as an integer:', int(waterfowl_1979[bird]))

#print('full:'); print(waterfowl_1979[birds[0]])

In [None]:
# Get the bird counts into a list
# Use a comprehension!
bird_counts = [int(waterfowl_1979[bird]) for bird in birds]

"""
# Long way:
bird_counts = []
for bird in birds:
    bird_counts.append(int(waterfowl_1979[bird]))
"""

bird_counts


In [None]:
# Uh oh, no need to chart the birds that weren't counted
# 

birds = [bird for bird in waterfowl_1979.columns[1:] if int(waterfowl_1979[bird]) > 0]

bird_counts = [int(waterfowl_1979[bird]) for bird in birds]
bird_counts


In [None]:
# The zip() function can be handy
z = zip(birds, bird_counts)

for i in z:
    print(i)

In [None]:
# Plotly can make a pretty table
#init_notebook_mode(connected=False)
#table = offline.create_table(waterfowl_df_january_sub)
#py.iplot(table, filename='jupyter/table1')


data = [go.Bar(x=birds,
            y=bird_counts)]

offline.iplot(data)

In [None]:
# ******************************************
# Still too many birds, and it would look better ordered.
# First, return to our completion, edit below to get birds with at least 1000 views

birds = [bird for bird in waterfowl_1979.columns[1:] if int(waterfowl_1979[bird]) > 1000]

bird_counts = [int(waterfowl_1979[bird]) for bird in birds]
bird_counts

In [None]:
# waterfowl_1979.columns[1:]
#pd.__version__

# Use zip to make a list of tuples
bird_tuples = [tuple(i) for i in zip(birds, bird_counts)]
bird_tuples

In [None]:
# Now sort
bird_tuples.sort(key=lambda tup: tup[1], reverse=True)

bird_tuples

In [None]:
# And try the graph again

data = [go.Bar(x=[b[0] for b in bird_tuples],
            y=[b[1] for b in bird_tuples])]

offline.iplot(data)

In [None]:
# Now let's chart the population of a bird over the years. First, let's create a dataframe
# of sums by year, similar to what we did for just 1979
waterfowl_df_january_sub_by_year = waterfowl_df_january_sub.groupby('Year').sum()

waterfowl_df_january_sub_by_year

In [None]:
# ********************************

# Let's chart just 'Canada Goose'
bird_name = 'Canada Goose'
#bird_name = 'Mallard'

#single_bird = waterfowl_df_january_sub[['Year', bird_name]].groupby('Year').sum()
single_bird = waterfowl_df_january_sub_by_year[bird_name]

single_bird.head()

In [None]:
# Some more data exploring

print(single_bird.index)
print('first:', single_bird.index[0])

In [None]:
#single_bird[bird_name]

years = [str(year) for year in single_bird.index]
years
    

In [None]:
# 

bird_counts = [int(total) for total in single_bird]
bird_counts

In [None]:
# Make a line chart (scatter)

trace1 = go.Scatter(x=years, y=bird_counts, mode="markers+lines")
                                               
data=go.Data([trace1])

layout=go.Layout(title="First Plot", xaxis={'title':'Year'}, yaxis={'title':bird_name})

figure=go.Figure(data=data,layout=layout)

offline.iplot(figure, filename='pyguide_1')


In [None]:
bird_names = ['Canada Goose', 'American Black Duck', 'Mallard']

three_birds = waterfowl_df_january_sub_by_year[bird_names]

three_birds.head()

In [None]:
# Now let's plot the top three

bird_names = ['Canada Goose', 'American Black Duck', 'Mallard']

data = []

for bird_name in bird_names:
    
    single_bird = waterfowl_df_january_sub[['Year', bird_name]].groupby('Year').sum()

    bird_counts = [int(total) for total in single_bird[bird_name]]
    
    # Cheat and re-usse the years variable from before
    data.append(go.Scatter(x=years, y=bird_counts, mode="markers+lines", name=bird_name))

layout=go.Layout(title="Top three birds", xaxis={'title':'Year'}, yaxis={'title':'Number counted'})

figure=go.Figure(data=data,layout=layout)

offline.iplot(figure, filename='top_three')

### Done Part 2
Let's make some nicer looking charts in Part 3