In [None]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

**DataFrame**

* A DataFrame represents a rectangular table of data and contains an ordered, named
collection of columns, each of which can be a different value type (numeric, string,
Boolean, etc.). The DataFrame has both a row and column index; it can be thought of
as a dictionary of Series all sharing the same index.


In [None]:
data = {"state": ["Ohio", "Ohio", "Ohio", "Nevada", "Nevada", "Nevada"],
 "year": [2000, 2001, 2002, 2001, 2002, 2003],
 "pop": [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}

In [None]:
frame = DataFrame(data)

In [None]:
frame

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9
5,Nevada,2003,3.2


**For large DataFrames, the head method selects only the first five rows:**

In [None]:
frame.head()

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9


**Similarly, tail returns the last five rows:**

In [None]:
frame.tail()

Unnamed: 0,state,year,pop
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9
5,Nevada,2003,3.2


**If you specify a sequence of columns, the DataFrame’s columns will be arranged in
that order:**

In [None]:
frame1 = pd.DataFrame(data, columns = ["year","state","pop"],index = ["k","b","c","d","e","f"])

In [None]:
frame1

Unnamed: 0,year,state,pop
k,2000,Ohio,1.5
b,2001,Ohio,1.7
c,2002,Ohio,3.6
d,2001,Nevada,2.4
e,2002,Nevada,2.9
f,2003,Nevada,3.2


**If you pass a column that isn’t contained in the dictionary, it will appear with missing
values in the result:**

In [None]:
frame2 = pd.DataFrame(data, columns = ["year","state",'pop',"debt"])

In [None]:
frame2.columns

Index(['year', 'state', 'pop', 'debt'], dtype='object')

**A column in a DataFrame can be retrieved as a Series either by dictionary-like
notation or by using the dot attribute notation:**

In [None]:
frame2["state"]

Unnamed: 0,state
0,Ohio
1,Ohio
2,Ohio
3,Nevada
4,Nevada
5,Nevada


In [None]:
frame2.year

Unnamed: 0,year
0,2000
1,2001
2,2002
3,2001
4,2002
5,2003


In [None]:
frame2.loc[1]

Unnamed: 0,1
year,2001
state,Ohio
pop,1.7
debt,1


In [None]:
frame2.iloc[1]

Unnamed: 0,1
year,2001
state,Ohio
pop,1.7
debt,


**Columns can be modified by assignment. For example, the empty debt column could
be assigned a scalar value or an array of values:**

In [None]:
frame2["debt"] = 2

In [None]:
frame2

Unnamed: 0,year,state,pop,debt
0,2000,Ohio,1.5,2
1,2001,Ohio,1.7,2
2,2002,Ohio,3.6,2
3,2001,Nevada,2.4,2
4,2002,Nevada,2.9,2
5,2003,Nevada,3.2,2


In [None]:
frame2["debt"] = [1,2,3,4,5,6]

In [None]:
frame2

Unnamed: 0,year,state,pop,debt
0,2000,Ohio,1.5,1
1,2001,Ohio,1.7,2
2,2002,Ohio,3.6,3
3,2001,Nevada,2.4,4
4,2002,Nevada,2.9,5
5,2003,Nevada,3.2,6


In [None]:
frame2["debt"] = np.arange(6)

In [None]:
frame2

Unnamed: 0,year,state,pop,debt
0,2000,Ohio,1.5,0
1,2001,Ohio,1.7,1
2,2002,Ohio,3.6,2
3,2001,Nevada,2.4,3
4,2002,Nevada,2.9,4
5,2003,Nevada,3.2,5


**The del keyword will delete columns like with a dictionary. As an example, I first add
a new column of Boolean values where the state column equals "Ohio**

In [None]:
frame2["eastern"] = frame2.state == "Ohio"

In [None]:
frame2

Unnamed: 0,year,state,pop,debt,eastern
0,2000,Ohio,1.5,0,True
1,2001,Ohio,1.7,1,True
2,2002,Ohio,3.6,2,True
3,2001,Nevada,2.4,3,False
4,2002,Nevada,2.9,4,False
5,2003,Nevada,3.2,5,False


In [None]:
del frame2["eastern"]

In [None]:
frame2.columns

Index(['year', 'state', 'pop', 'debt'], dtype='object')

**Another common form of data is a nested dictionary of dictionaries:**

In [None]:
populations = {"Ohio": {2000: 1.5, 2001: 1.7, 2002: 3.6}, "Nevada": {2001: 2.4, 2002: 2.9}}

In [None]:
frame3 = pd.DataFrame(populations)

In [None]:
frame3.T

Unnamed: 0,2000,2001,2002
Ohio,1.5,1.7,3.6
Nevada,,2.4,2.9


**Index Objects
pandas’s Index objects are responsible for holding the axis labels (including a Data‐
Frame’s column names) and other metadata (like the axis name or names). Any array
or other sequence of labels you use when constructing a Series or DataFrame is
internally converted to an Index**

In [None]:
obj = pd.Series(np.arange(3), index=["a", "b", "c"])

In [None]:
index = obj.index

In [None]:
index

Index(['a', 'b', 'c'], dtype='object')

In [None]:
index[1:]

Index(['b', 'c'], dtype='object')

In [None]:
index[1]

'b'

**Index objects are immutable and thus can’t be modified by the user:**

In [None]:
# index[1] = "d"


In [None]:
labels = pd.Index(np.arange(3))

In [None]:
obj2  = pd.Series([1.3,5.4,8.7], index  = labels)

In [None]:
obj2

Unnamed: 0,0
0,1.3
1,5.4
2,8.7
