# Best practices

Let's start with pep8 (https://www.python.org/dev/peps/pep-0008/)

> Imports should be grouped in the following order:

> - standard library imports
> - related third party imports
> - local application/library specific imports

> You should put a blank line between each group of imports.
Put any relevant __all__ specification after the imports.



In [1]:
%matplotlib inline
%config InlineBackend.figure_format='retina' 

# Add this to python2 code to make life easier
from __future__ import absolute_import, division, print_function

from itertools import combinations
import string

from IPython.display import IFrame, HTML, YouTubeVideo
import matplotlib as mpl
from matplotlib import pyplot as plt
from matplotlib.pyplot import GridSpec
import seaborn as sns
import mpld3
import numpy as np
# don't do:
# from numpy import *

In [2]:
import pandas as pd
import os, sys
import warnings

sns.set();
plt.rcParams['figure.figsize'] = (12, 8)
sns.set_style("darkgrid")
sns.set_context("poster", font_scale=1.3)

warnings.filterwarnings('ignore')

# Look at Pandas Dataframes

_this is italicized_

In [3]:
df = pd.read_csv("../data/coal_prod_cleaned.csv")

In [4]:
# !conda install qgrid -y

In [5]:
df.head()

Unnamed: 0,MSHA_ID,Average_Employees,Company_Type,Labor_Hours,Mine_Basin,Mine_County,Mine_Name,Mine_State,Mine_Status,Mine_Type,Operating_Company,Operating_Company_Address,Operation_Type,Production_short_tons,Union_Code,Year
0,103295,18.0,Independent Producer Operator,39175.0,Appalachia Southern,Bibb,Seymour Mine,Alabama,Active,Surface,Hope Coal Company Inc,"P.O. Box 249, Maylene, AL 35114",Mine only,105082.0,,2008
1,103117,19.0,Operating Subsidiary,29926.0,Appalachia Southern,Cullman,"Mine #2, #3, #4",Alabama,"Active, men working, not producing",Surface,Twin Pines Coal Company Inc,"1874 County Road 15, Bremen, AL 35033",Mine only,10419.0,,2008
2,103361,20.0,Operating Subsidiary,42542.0,Appalachia Southern,Cullman,Cold Springs West Mine,Alabama,Active,Surface,Twin Pines Coal Company,"74 Industrial Parkway, Jasper, AL 35502",Mine only,143208.0,,2008
3,100759,395.0,Operating Subsidiary,890710.0,Appalachia Southern,Fayette,North River # 1 Underground Mi,Alabama,Active,Underground,Chevron Mining Inc,"3114 County Road 63 S, Berry, AL 35546",Mine and Preparation Plant,2923261.0,United Mine Workers of America,2008
4,103246,22.0,Independent Producer Operator,55403.0,Appalachia Southern,Franklin,Bear Creek,Alabama,Active,Surface,"Birmingham Coal & Coke Co., In","912 Edenton Street, Birmingham, AL 35242",Mine only,183137.0,,2008


In [6]:
# Check out http://nbviewer.ipython.org/github/quantopian/qgrid/blob/master/qgrid_demo.ipynb for more (including demo)

In [7]:
df.shape

(9042, 16)

In [18]:
# This broke w/ Notebooks 5.0 

In [8]:
import qgrid # Put imports at the top
qgrid.nbinstall(overwrite=True)

In [9]:
qgrid.show_grid(df[['MSHA_ID', 'Year', 'Mine_Name', 'Mine_State', 'Mine_County']], remote_js=True)

In [10]:
ls

00-Overview.ipynb
01-Tips-and-tricks.ipynb
02-Visualization-and-code-organization.ipynb
03-Pandas-and-Plotting.ipynb
04-SQL-Example.ipynb
05-interactive-splines.ipynb
06-R-stuff.ipynb
07-Some_basics.ipynb
08-More_basics.ipynb
09-Extras.ipynb
Data_Cleaning.ipynb
Untitled.ipynb
autoreload-example.ipynb


# Pivot Tables w/ pandas

http://nicolas.kruchten.com/content/2015/09/jupyter_pivottablejs/

In [11]:
# !conda install pivottablejs -y

In [12]:
df = pd.read_csv("../data/mps.csv", encoding="ISO-8859-1")

In [13]:
df.head(10)

Unnamed: 0,Name,Party,Province,Age,Gender
0,"Liu, Laurin",NDP,Quebec,22.0,Female
1,"Mourani, Maria",Bloc Quebecois,Quebec,43.0,Female
2,"Sellah, Djaouida",NDP,Quebec,,Female
3,"St-Denis, Lise",NDP,Quebec,72.0,Female
4,"Fry, Hedy",Liberal,British Columbia,71.0,Female
5,"Turmel, Nycole",NDP,Quebec,70.0,Female
6,"Sgro, Judy",Liberal,Ontario,68.0,Female
7,"Raynault, Francine",NDP,Quebec,67.0,Female
8,"Davidson, Patricia",Conservative,Ontario,66.0,Female
9,"Smith, Joy",Conservative,Manitoba,65.0,Female


# Enhanced Pandas Dataframe Display

In [14]:
# Province, Party, Average, Age, Heatmap

In [15]:
from pivottablejs import pivot_ui

In [16]:
pivot_ui(df)

# Keyboard shortcuts

For help, `ESC` + `h`

In [17]:
# in select mode, shift j/k (to select multiple cells at once)
# split cell with ctrl shift -

In [None]:
first = 1

In [None]:
second = 2

In [None]:
third = 3

### Different heading levels

With text and $\LaTeX$ support.

You can also get monospaced fonts by indenting 4 spaces:

    mkdir toc
    cd toc

Wrap with triple-backticks and language:

```bash
mkdir toc
cd toc
wget https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
```

In [1]:
import numpy as np

In [None]:
np.linspace(start=, )

```SQL
SELECT *
FROM tablename
```

In [None]:
```sql

SELECT first_name,
       last_name,
       year_of_birth
FROM presidents
WHERE year_of_birth > 1800;

```

In [16]:
%%bash
pwd 
for i in *.ipynb
do
    du -h $i
done
echo "break"
echo
du -h *ipynb

/Users/jonathan/github/jupyter-tips-and-tricks/deliver
4.0K	00-Overview.ipynb
 44K	01-Tips-and-tricks.ipynb
8.0K	02-Visualization-and-code-organization.ipynb
8.0K	03-Pandas-and-Plotting.ipynb
8.0K	04-SQL-Example.ipynb
 36K	05-interactive-splines.ipynb
212K	06-R-stuff.ipynb
 16K	07-Some_basics.ipynb
 20K	08-More_basics.ipynb
 12K	09-Extras.ipynb
 24K	Data_Cleaning.ipynb
break

4.0K	00-Overview.ipynb
 44K	01-Tips-and-tricks.ipynb
8.0K	02-Visualization-and-code-organization.ipynb
8.0K	03-Pandas-and-Plotting.ipynb
8.0K	04-SQL-Example.ipynb
 36K	05-interactive-splines.ipynb
212K	06-R-stuff.ipynb
 16K	07-Some_basics.ipynb
 20K	08-More_basics.ipynb
 12K	09-Extras.ipynb
 24K	Data_Cleaning.ipynb


## Other cell-magics

In [21]:
%%writefile ../scripts/temp.py
from __future__ import absolute_import, division, print_function

I'm not cheating!

Overwriting ../scripts/temp.py


In [22]:
!cat ../scripts/temp.py

from __future__ import absolute_import, division, print_function

I'm not cheating!

# Tab; shift-tab; shift-tab-tab; shift-tab-tab-tab-tab; and more!

In [2]:
def silly_absolute_value_function(xval):
    """Takes a value and returns the value."""
    xval_sq = xval ** 2.0
    1 + 4
    xval_abs = np.sqrt(xval_sq)
    return xval_abs

In [3]:
silly_absolute_value_function?

In [25]:
silly_absolute_value_function??

In [None]:
silly_absolute_value_function()

In [26]:
import numpy as np

In [28]:
# This doesn't work because ufunc 
np.linspace??

In [None]:
# Indent/dedent/comment
for _ in range(5):
    df["one"] = 1
    df["two"] = 2
    df["three"] = 3
    df["four"] = 4



## Multicursor magic

In [None]:
df["one_better_name"] = 1
df["two_better_name"] = 2
df["three_better_name"] = 3
df["four_better_name"] = 4