#####Creating DataFrames

In [31]:
# Create a DataFrame
import numpy as np
from pandas import Series, DataFrame
import pandas as pd

In [32]:
# Create a 3 point series with an index
ser01 = Series([100,200,300],
                 index=['A','B','C'])
# Show
ser01

A    100
B    200
C    300
dtype: int64

In [33]:
# Create a DataFrame by passing series
df01 = pd.DataFrame(ser01)
df01

Unnamed: 0,0
A,100
B,200
C,300


In [34]:
# Create a dataframe from scratch using a dictionary
data = {'State':['CA','CA','PA'],
        'City':['SF','LA','PHI'],
        'Airport':['SFO','LAX','PHL'],
        'Population':[837000,3880000,8400000]}

df02 = DataFrame(data)

#Show
df02

Unnamed: 0,Airport,City,Population,State
0,SFO,SF,837000,CA
1,LAX,LA,3880000,CA
2,PHL,PHI,8400000,PA


#####Selecting Columns and Rows

In [35]:
# Select column
df02['State']

0    CA
1    CA
2    PA
Name: State, dtype: object

In [36]:
# Grab multiple columns
# This creates a new data frame
DataFrame(df02,
          columns=['City','State'])

Unnamed: 0,City,State
0,SF,CA
1,LA,CA
2,PHI,PA


In [37]:
# Select a row by index/label
df02.iloc[1]

Airport           LAX
City               LA
Population    3880000
State              CA
Name: 1, dtype: object

In [38]:
# Show first row only
df02.head(1)

Unnamed: 0,Airport,City,Population,State
0,SFO,SF,837000,CA


In [40]:
# Show last row only
df02.tail(1)

Unnamed: 0,Airport,City,Population,State
2,PHL,PHI,8400000,PA


#####Show Summary Statistics

In [43]:
# Show summary statistics
df02.describe()

Unnamed: 0,Population
count,3.0
mean,4372333.333333
std,3805461.382452
min,837000.0
25%,2358500.0
50%,3880000.0
75%,6140000.0
max,8400000.0


#####Transpose DataFrame

In [45]:
# Transpose DataFrame
df02.T

Unnamed: 0,0,1,2
Airport,SFO,LAX,PHL
City,SF,LA,PHI
Population,837000,3880000,8400000
State,CA,CA,PA


#####Sort DataFrame

In [47]:
# Show DataFrame
df02

Unnamed: 0,Airport,City,Population,State
0,SFO,SF,837000,CA
1,LAX,LA,3880000,CA
2,PHL,PHI,8400000,PA


In [49]:
# Sort ascending by axis (Column Headers)
df02.sort_index(axis=1, ascending=True)

Unnamed: 0,Airport,City,Population,State
0,SFO,SF,837000,CA
1,LAX,LA,3880000,CA
2,PHL,PHI,8400000,PA


In [50]:
# Sort descending by axis (Column Headers)
df02.sort_index(axis=1, ascending=False)

Unnamed: 0,State,Population,City,Airport
0,CA,837000,SF,SFO
1,CA,3880000,LA,LAX
2,PA,8400000,PHI,PHL


In [57]:
# Sort ascending by axis (Row Index)
df02.sort_index(axis=0, ascending=True)

Unnamed: 0,Airport,City,Population,State
0,SFO,SF,837000,CA
1,LAX,LA,3880000,CA
2,PHL,PHI,8400000,PA


In [56]:
# Sort descending by axis (Row Index)
df02.sort_index(axis=0, ascending=False)

Unnamed: 0,Airport,City,Population,State
2,PHL,PHI,8400000,PA
1,LAX,LA,3880000,CA
0,SFO,SF,837000,CA


In [60]:
# Sort by Values - Airport
df02.sort(columns='Airport')

Unnamed: 0,Airport,City,Population,State
1,LAX,LA,3880000,CA
2,PHL,PHI,8400000,PA
0,SFO,SF,837000,CA


In [61]:
# Sort by Values - Population
df02.sort(columns='Population')

Unnamed: 0,Airport,City,Population,State
0,SFO,SF,837000,CA
1,LAX,LA,3880000,CA
2,PHL,PHI,8400000,PA
