---
title: "TidyTuesday Week 38 in Python"
output: html_notebook
---

In [None]:
%%R
library(reticulate)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import seaborn as sns

In [None]:
park_visits_py = pd.read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-09-17/national_parks.csv")

In [None]:
park_visits_py.head()

park_visits_py.dtypes

# Note: Automatic exclusion of “nuisance” columns happens when 'groupby'.
# In this case “nuisance” columns are those that are not 'sum'-ed.
pv_grouped = park_visits_py[park_visits_py['unit_type'] == 'National Park']
pv_grouped = pv_grouped[pv_grouped['year'] != 'Total']
pv_grouped = pv_grouped.groupby('year')['visitors'].sum().reset_index()
pv_grouped['year'] = pd.to_numeric(pv_grouped['year'])

pv_grouped

pv_grouped.dtypes

In [None]:
#Matplotlib pandas plotting
plt.style.use('fivethirtyeight')
pv_grouped.plot(x='year', y='visitors', kind='area', color = 'darkgreen', alpha=0.3)

In [None]:
#Matplotlib obejt-oriented, because: https://pbpython.com/effective-matplotlib.html

#Function for number format on axis
def numbers(x, pos):
    'The two args are the value and tick position'
    if x >= 1000000:
        return '{:1.0f}M'.format(x*1e-6)
    return '{:1.0f}K'.format(x*1e-3)


plt.style.use('fivethirtyeight')

fig, ax = plt.subplots()
pv_grouped.plot(x='year', y='visitors', kind='area', color = 'darkgreen', alpha=0.3, ax=ax)
ax.set_xlabel('')
ax.set_ylabel('')
ax.set_title('U.S. national parks have never been so popular')
formatter = FuncFormatter(numbers)
ax.yaxis.set_major_formatter(formatter)
ax.legend().set_visible(False)

plt.show()

In [None]:
#Seaborn plot
sns.set()
sns.relplot(data=pv_grouped, x='year', y='visitors', kind='line')
#this, at least in Rstudio, needs a 'plt.show()'
plt.show()