In [1]:
import numpy as np
from pandas import Series, DataFrame
import pandas as pd

In [2]:
#Now we'll learn DataFrames

#Let's get some data to play with. How about the NFL?
import webbrowser
website = 'http://en.wikipedia.org/wiki/NFL_win-loss_records'
webbrowser.open(website)

True

In [3]:
#Copy and read to get data
nfl_frame = pd.read_clipboard()

In [4]:
#Show
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First,NFL,Season,Total,Games,Division
0,1,Chicago,Bears,741,555,42,0.57,1920.0,1338,NFC,North,
1,2,Dallas,Cowboys,480,364,6,0.568,1960.0,850,NFC,East,
2,3,Green,Bay,Packers,720,547,37.0,0.566,1921,1304,NFC,North
3,4,Miami,Dolphins,429,335,4,0.561,1966.0,768,AFC,East,
4,5,New,England,Patriots,462,381,9.0,0.548,1960,852,AFC,East


In [5]:
# We can grab the oclumn names with .columns
nfl_frame.columns

Index(['Rank', 'Team', 'Won', 'Lost', 'Tied', 'Pct.', 'First', 'NFL', 'Season',
       'Total', 'Games', 'Division'],
      dtype='object')

In [6]:
#Lets see some specific data columns
DataFrame(nfl_frame,columns=['Team','Season','Total'])

Unnamed: 0,Team,Season,Total
0,Chicago,1338,NFC
1,Dallas,850,NFC
2,Green,1921,1304
3,Miami,768,AFC
4,New,1960,852


In [7]:
#What happens if we ask for a column that doesn't exist?
DataFrame(nfl_frame,columns=['Team','Season','Total','Stadium'])


Unnamed: 0,Team,Season,Total,Stadium
0,Chicago,1338,NFC,
1,Dallas,850,NFC,
2,Green,1921,1304,
3,Miami,768,AFC,
4,New,1960,852,


In [8]:
# Call columns
nfl_frame.columns

Index(['Rank', 'Team', 'Won', 'Lost', 'Tied', 'Pct.', 'First', 'NFL', 'Season',
       'Total', 'Games', 'Division'],
      dtype='object')

In [9]:
#We can retrieve individual columns
nfl_frame.Team

0    Chicago
1     Dallas
2      Green
3      Miami
4        New
Name: Team, dtype: object

In [10]:
# Or try this method for multiple word columns
nfl_frame['Total']

0      NFC
1      NFC
2    1,304
3      AFC
4      852
Name: Total, dtype: object

In [11]:
#We can retrieve rows through indexing
nfl_frame.ix[3]

Rank               4
Team           Miami
Won         Dolphins
Lost             429
Tied             335
Pct.               4
First          0.561
NFL             1966
Season           768
Total            AFC
Games           East
Division         NaN
Name: 3, dtype: object

In [12]:
#We can also assign value sto entire columns
nfl_frame['Stadium']="Levi's Stadium" #Careful with the ' here

In [13]:
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First,NFL,Season,Total,Games,Division,Stadium
0,1,Chicago,Bears,741,555,42,0.57,1920.0,1338,NFC,North,,Levi's Stadium
1,2,Dallas,Cowboys,480,364,6,0.568,1960.0,850,NFC,East,,Levi's Stadium
2,3,Green,Bay,Packers,720,547,37.0,0.566,1921,1304,NFC,North,Levi's Stadium
3,4,Miami,Dolphins,429,335,4,0.561,1966.0,768,AFC,East,,Levi's Stadium
4,5,New,England,Patriots,462,381,9.0,0.548,1960,852,AFC,East,Levi's Stadium


In [14]:
#Putting numbers for stadiums
nfl_frame["Stadium"] = np.arange(5)

#Show
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First,NFL,Season,Total,Games,Division,Stadium
0,1,Chicago,Bears,741,555,42,0.57,1920.0,1338,NFC,North,,0
1,2,Dallas,Cowboys,480,364,6,0.568,1960.0,850,NFC,East,,1
2,3,Green,Bay,Packers,720,547,37.0,0.566,1921,1304,NFC,North,2
3,4,Miami,Dolphins,429,335,4,0.561,1966.0,768,AFC,East,,3
4,5,New,England,Patriots,462,381,9.0,0.548,1960,852,AFC,East,4


In [15]:
# Call columns
nfl_frame.columns

Index(['Rank', 'Team', 'Won', 'Lost', 'Tied', 'Pct.', 'First', 'NFL', 'Season',
       'Total', 'Games', 'Division', 'Stadium'],
      dtype='object')

In [16]:
#Adding a Series to a DataFrame
stadiums = Series(["Levi's Stadium","AT&T Stadium"],index=[4,0])

In [17]:
#Now input into the nfl DataFrame
nfl_frame['Stadium']=stadiums

#Show
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First,NFL,Season,Total,Games,Division,Stadium
0,1,Chicago,Bears,741,555,42,0.57,1920.0,1338,NFC,North,,AT&T Stadium
1,2,Dallas,Cowboys,480,364,6,0.568,1960.0,850,NFC,East,,
2,3,Green,Bay,Packers,720,547,37.0,0.566,1921,1304,NFC,North,
3,4,Miami,Dolphins,429,335,4,0.561,1966.0,768,AFC,East,,
4,5,New,England,Patriots,462,381,9.0,0.548,1960,852,AFC,East,Levi's Stadium


In [48]:
#We can also delete columns
del nfl_frame['Stadium']

nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First,NFL,Season,Total,Games,Division
0,1,Chicago,Bears,741,555,42,0.57,1920.0,1338,NFC,North,
1,2,Dallas,Cowboys,480,364,6,0.568,1960.0,850,NFC,East,
2,3,Green,Bay,Packers,720,547,37.0,0.566,1921,1304,NFC,North
3,4,Miami,Dolphins,429,335,4,0.561,1966.0,768,AFC,East,
4,5,New,England,Patriots,462,381,9.0,0.548,1960,852,AFC,East


In [49]:
#DataFrames can be constructed many ways. Another way is from a dictionary of equal length lists
data = {'City':['SF','LA','NYC'],
        'Population':[837000,3880000,8400000]}

city_frame = DataFrame(data)

#Show
city_frame

Unnamed: 0,City,Population
0,SF,837000
1,LA,3880000
2,NYC,8400000


In [40]:
#For full list of ways to create DataFrames from various sources go to the documentation for pandas:
website = 'http://pandas.pydata.org/pandas-docs/dev/generated/pandas.DataFrame.html'
webbrowser.open(website)

True