# Working with data frames

In [1]:
import numpy as np
import pandas as pd

## Creation

### Serires

In [5]:
s = pd.Series(np.arange(4), index=list('ABCD'))
s

A    0
B    1
C    2
D    3
dtype: int64

#### Indexing

In [6]:
s[:2]

A    0
B    1
dtype: int64

In [7]:
s['B':'D']

B    1
C    2
D    3
dtype: int64

### DataFrame

In [13]:
data = np.arange(1,21).reshape(4,-1)
df = pd.DataFrame(data, columns=list('ABCDE'), index=range(1,5))
df

Unnamed: 0,A,B,C,D,E
1,1,2,3,4,5
2,6,7,8,9,10
3,11,12,13,14,15
4,16,17,18,19,20


#### Indexing

In [14]:
df['A']

1     1
2     6
3    11
4    16
Name: A, dtype: int64

In [16]:
df[['A', 'C']]

Unnamed: 0,A,C
1,1,3
2,6,8
3,11,13
4,16,18


#### Using the `loc` indexing operator

Note: `loc` works on labels.

In [17]:
df.loc[1:3]

Unnamed: 0,A,B,C,D,E
1,1,2,3,4,5
2,6,7,8,9,10
3,11,12,13,14,15


In [18]:
df.loc[1:3, 'B':'D']

Unnamed: 0,B,C,D
1,2,3,4
2,7,8,9
3,12,13,14


#### Using the `iloc` indexing operator

Note: `iloc` works on position indices (like `numpy`)

In [19]:
df.iloc[1:3]

Unnamed: 0,A,B,C,D,E
2,6,7,8,9,10
3,11,12,13,14,15


In [23]:
df.iloc[1:3, 1:3]

Unnamed: 0,B,C
2,7,8
3,12,13


## Other ways to get a data frame

### Read from file

In [24]:
%%file data.csv
a,b,c
1,2,3
4,5,6
7,8,9
10,11,12

Writing data.csv


In [26]:
df = pd.read_csv('data.csv')
df

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9
3,10,11,12


### Read from web

In [28]:
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

There are many tables in the Wikipedia entry for Durham.

In [29]:
dfs = pd.read_html('https://en.wikipedia.org/wiki/Durham,_North_Carolina')

In [35]:
len(dfs)

15

In [34]:
dfs[4]

Unnamed: 0,Employer,No. of employees
0,Duke University & Duke Univ. Health System,34863
1,IBM,10000
2,Durham Public Schools,4600
3,GlaxoSmithKline,3700
4,Blue Cross & Blue Shield of NC,3200
5,City of Durham,2437
6,Fidelity Investments,2400
7,IQVIA,2400
8,RTI International,2300
9,Durham VA Medical Center,2162
