In [1]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

%load_ext version_information
%load_ext rpy2.ipython

# Getting Started with Python

## Installation

### To install Anaconda for Python 3.5

Download and install Anaconda Python 3.5 from https://www.continuum.io/downloads

Open a terminal
```bash
conda update conda
conda update anaconda
```

### (OPTIONAL) Using Jupyter remotely

For people with a valid Duke NetID, you can get access to running Jupyter and RStudio instances [here](https://vm-manage.oit.duke.edu/containers)

### (OPTIONAL) To install R

- If you want `conda` to manage your R packages

```bash
conda install -y -c r r-irkernel r-recommended r-essentials
```

- If you have an existing R installation that you want to use

Start R
```R
install.packages(c('rzmq','repr','IRkernel','IRdisplay'),
                 repos = c('http://irkernel.github.io/', getOption('repos')))
IRkernel::installspec()
```


### (OPTIONAL) Integrated Development Environments (IDE)

- [RStudio for R](https://www.rstudio.com)
- [Rodeo for Python](https://www.yhat.com/products/rodeo)


### Check

Open terminal
```
jupyter notebook
```

See if the installed kernels are found in the drop-down menu.

## Live Demo of Jupyter Features

* Administration interface
    * Files
    * Running
    * Uploading notebooks
    * New notebook
* Notebook interface
    * Menu
    * Cells
    * Keyboard shortcuts
    * Getting help
* Notebook magics
    * `bash`
    * `R`
    * Other

## Using Markdown in Jupyter for Literate Programming

[Markdown Syntax](https://help.github.com/articles/basic-writing-and-formatting-syntax)

## Elements of Python

### Types

In [2]:
True, False, 1, 2, 3, 3.14, 2.78, "hello", 'world'

(True, False, 1, 2, 3, 3.14, 2.78, 'hello', 'world')

### Operators

In [3]:
-1, 2+3, 7%3, 7/2, 7//2, 2**4, True & True, True & False, True | False, 3 <= 4

(-1, 5, 1, 3.5, 3, 16, True, False, True, True)

### Variables and Assignment

In [4]:
a = 3
b = 4
c = a + b

In [5]:
a, b, c

(3, 4, 7)

### Containers (Collections)

In [6]:
a_tuple = (1, 2, 3, 4)
a_list = ['a', 'b', 'c', 'd']
a_set = {1, 2, 2, 3, 3, 3}
a_dict = {'c': 1, 'b': 2, 'a': 3}

In [7]:
a_tuple

(1, 2, 3, 4)

In [8]:
a_list

['a', 'b', 'c', 'd']

In [9]:
a_set

{1, 2, 3}

In [10]:
a_dict

{'a': 3, 'b': 2, 'c': 1}

In [11]:
from collections import OrderedDict

In [12]:
a_ordereddict = OrderedDict([('c', 1), ('b', 2), ('a', 3)])
a_ordereddict

OrderedDict([('c', 1), ('b', 2), ('a', 3)])

## Indexing a container

In [13]:
a_tuple[0]

1

In [14]:
a_list[1:4]

['b', 'c', 'd']

In [15]:
a_dict['b']

2

In [16]:
a_ordereddict['c']

1

## Controlling program flow

In [17]:
'a' if 3 < 4 else 'b'

'a'

In [18]:
'a' if 4 < 3 else 'b'

'b'

In [19]:
score = np.random.uniform(60, 100)

if score > 90:
    print('A')
elif score > 80:
    print('B')
else:
    print('C')

A


### Looping

In [20]:
list(range(10, 20, 2))

[10, 12, 14, 16, 18]

In [21]:
for i in range(10, 20, 2):
    print(i, i**2)

10 100
12 144
14 196
16 256
18 324


In [22]:
max_count = 5
count = 0
while (count < max_count):
    print(count)
    count += 1

0
1
2
3
4


### List Comprehension

In [23]:
[x**2 for x in range(5)]

[0, 1, 4, 9, 16]

In [24]:
[x**2 for x in range(5) if x % 2 == 0]

[0, 4, 16]

### User-defined functions

In [25]:
def f(x):
    return x

In [26]:
f(3.14)

3.14

In [27]:
def g(a, b):
    return a + b

In [28]:
g(3, 4)

7

#### Default arguments

In [29]:
def h(a= 0 , b = 1, c = 2):
    return a + 2*b + 3*c

In [30]:
h()

8

In [31]:
h(1)

9

In [32]:
h(1, 2)

11

In [33]:
h(1, 2, 3)

14

In [34]:
h(c = 1, b = 2, a = 3)

10

### Using Libraries

In [35]:
import math

math.pi

3.141592653589793

In [36]:
import numpy as np

np.linspace(0, 1, 11)

array([ 0. ,  0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9,  1. ])

In [37]:
from numpy.random import rand

In [38]:
rand(4)

array([ 0.12365436,  0.57294035,  0.08970509,  0.70075327])

## Working with vectors and arrays

In [39]:
A = np.random.random((3,4))
A

array([[ 0.72558757,  0.62773131,  0.84824601,  0.23886442],
       [ 0.52841628,  0.71965435,  0.32039652,  0.29125637],
       [ 0.81117304,  0.96326656,  0.63946624,  0.51574475]])

### Indexing a matrix

In [40]:
A[0,0]

0.7255875679496957

In [41]:
A[2,3]

0.5157447477346595

In [42]:
A[1]

array([ 0.52841628,  0.71965435,  0.32039652,  0.29125637])

In [43]:
A[1, :]

array([ 0.52841628,  0.71965435,  0.32039652,  0.29125637])

In [44]:
A[:, 2]

array([ 0.84824601,  0.32039652,  0.63946624])

In [45]:
A[:2, 1:]

array([[ 0.62773131,  0.84824601,  0.23886442],
       [ 0.71965435,  0.32039652,  0.29125637]])

In [46]:
A[1:3, 1:3]

array([[ 0.71965435,  0.32039652],
       [ 0.96326656,  0.63946624]])

### Vectorized functions

In [47]:
A * 10

array([[ 7.25587568,  6.27731313,  8.48246012,  2.38864421],
       [ 5.28416285,  7.19654349,  3.20396517,  2.9125637 ],
       [ 8.11173037,  9.63266563,  6.39466244,  5.15744748]])

In [48]:
A.sum()

7.2298034265026363

In [49]:
A.sum(axis = 0)

array([ 2.06517689,  2.31065222,  1.80810877,  1.04586554])

In [50]:
A.sum(axis = 1)

array([ 2.44042931,  1.85972352,  2.92965059])

In [51]:
A.max(axis = 0)

array([ 0.81117304,  0.96326656,  0.84824601,  0.51574475])

In [52]:
A.T @ A

array([[ 1.46370278,  1.61712698,  1.30349727,  0.7455799 ],
       [ 1.61712698,  1.83983145,  1.37902178,  0.85634626],
       [ 1.30349727,  1.37902178,  1.2310923 ,  0.62573468],
       [ 0.7455799 ,  0.85634626,  0.62573468,  0.40787913]])

## Exercises

## Version information

In [53]:
%load_ext version_information
%version_information

The version_information extension is already loaded. To reload it, use:
  %reload_ext version_information


Software,Version
Python,3.5.2 64bit [GCC 4.2.1 Compatible Apple LLVM 4.2 (clang-425.0.28)]
IPython,5.0.0
OS,Darwin 15.6.0 x86_64 i386 64bit
Tue Aug 16 09:04:41 2016 EDT,Tue Aug 16 09:04:41 2016 EDT
