In [1]:
import numpy as np

import pandas as pd

from pandas import Series, DataFrame


In [2]:
# This will open a specific website 

import webbrowser 

website = 'http://en.wikipedia.org/wiki/NFL_win-loss_records'

webbrowser.open(website)

True

In [3]:
# This command copies the contents from a clipboard onto 
# a dataframe

nfl_frame = pd.read_clipboard()

In [4]:
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division
0,1,Dallas Cowboys,493,367,6,0.573,1960,866,NFC East
1,2,Green Bay Packers,730,553,37,0.567,1921,1320,NFC North
2,3,Chicago Bears,744,568,42,0.565,1920,1354,NFC North
3,4,Miami Dolphins,439,341,4,0.563,1966,784,AFC East
4,5,New England Patriots,476,383,9,0.554,1960,868,AFC East


In [6]:
# This will return the name of the columns in the
# dataframe
nfl_frame.columns

Index([u'Rank', u'Team', u'Won', u'Lost', u'Tied', u'Pct.',
       u'First NFL Season', u'Total Games', u'Division'],
      dtype='object')

In [7]:
# This returns a series
nfl_frame.Rank

0    1
1    2
2    3
3    4
4    5
Name: Rank, dtype: int64

In [9]:
# This also reaturns a Series
nfl_frame.Team

0          Dallas Cowboys
1       Green Bay Packers
2           Chicago Bears
3          Miami Dolphins
4    New England Patriots
Name: Team, dtype: object

In [12]:
nfl_frame['First NFL Season']

0    1960
1    1921
2    1920
3    1966
4    1960
Name: First NFL Season, dtype: int64

In [16]:
DataFrame(nfl_frame, columns=['Team', 'First NFL Season', 'Total Games'])

Unnamed: 0,Team,First NFL Season,Total Games
0,Dallas Cowboys,1960,866
1,Green Bay Packers,1921,1320
2,Chicago Bears,1920,1354
3,Miami Dolphins,1966,784
4,New England Patriots,1960,868


In [17]:
# Creating a column with no information will assign it NaN to all values.
DataFrame(nfl_frame, columns=['Team', 'First NFL Season', 'Total Games', 'Stadium'])

Unnamed: 0,Team,First NFL Season,Total Games,Stadium
0,Dallas Cowboys,1960,866,
1,Green Bay Packers,1921,1320,
2,Chicago Bears,1920,1354,
3,Miami Dolphins,1966,784,
4,New England Patriots,1960,868,


In [21]:
# Rows can also be retrieved through indexing

nfl_frame.ix[3]


Rank                             4
Team                Miami Dolphins
Won                            439
Lost                           341
Tied                             4
Pct.                         0.563
First NFL Season              1966
Total Games                    784
Division                  AFC East
Name: 3, dtype: object

In [23]:
# Although I got a message that .ix is deprecrated and suggested
# using .loc

nfl_frame.loc[3]

Rank                             4
Team                Miami Dolphins
Won                            439
Lost                           341
Tied                             4
Pct.                         0.563
First NFL Season              1966
Total Games                    784
Division                  AFC East
Name: 3, dtype: object

In [28]:
# We can assign a value to all elements of the Stadium column

# Must use double qoutes when we use apostrophes
nfl_frame['Stadium'] = "Levi's Stadium"

In [30]:
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division,Stadium
0,1,Dallas Cowboys,493,367,6,0.573,1960,866,NFC East,Levi's Stadium
1,2,Green Bay Packers,730,553,37,0.567,1921,1320,NFC North,Levi's Stadium
2,3,Chicago Bears,744,568,42,0.565,1920,1354,NFC North,Levi's Stadium
3,4,Miami Dolphins,439,341,4,0.563,1966,784,AFC East,Levi's Stadium
4,5,New England Patriots,476,383,9,0.554,1960,868,AFC East,Levi's Stadium


In [32]:
# We can put numbers for stadiums as well

nfl_frame["Stadium"] = np.arange(5)

nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division,Stadium
0,1,Dallas Cowboys,493,367,6,0.573,1960,866,NFC East,0
1,2,Green Bay Packers,730,553,37,0.567,1921,1320,NFC North,1
2,3,Chicago Bears,744,568,42,0.565,1920,1354,NFC North,2
3,4,Miami Dolphins,439,341,4,0.563,1966,784,AFC East,3
4,5,New England Patriots,476,383,9,0.554,1960,868,AFC East,4


In [34]:
# We can also add a Series to a DataFrame

stadiums = Series(["Levi's Stadium","AT&T Stadium"], index=[4,0])

In [35]:
stadiums

4    Levi's Stadium
0      AT&T Stadium
dtype: object

In [37]:
nfl_frame["stadium"] = stadiums

nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division,Stadium,stadium
0,1,Dallas Cowboys,493,367,6,0.573,1960,866,NFC East,0,AT&T Stadium
1,2,Green Bay Packers,730,553,37,0.567,1921,1320,NFC North,1,
2,3,Chicago Bears,744,568,42,0.565,1920,1354,NFC North,2,
3,4,Miami Dolphins,439,341,4,0.563,1966,784,AFC East,3,
4,5,New England Patriots,476,383,9,0.554,1960,868,AFC East,4,Levi's Stadium


In [41]:
# Columns can also be deleted

del nfl_frame['stadium']

nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division
0,1,Dallas Cowboys,493,367,6,0.573,1960,866,NFC East
1,2,Green Bay Packers,730,553,37,0.567,1921,1320,NFC North
2,3,Chicago Bears,744,568,42,0.565,1920,1354,NFC North
3,4,Miami Dolphins,439,341,4,0.563,1966,784,AFC East
4,5,New England Patriots,476,383,9,0.554,1960,868,AFC East


In [46]:
# DataFrames can also be instantiated 

data = {'City': ['SF','LA', 'NYC'], 'Population':[837000,3880000,8400000] }

city_frame = DataFrame(data)

city_frame

Unnamed: 0,City,Population
0,SF,837000
1,LA,3880000
2,NYC,8400000


In [47]:
#For full list of ways to create DataFrames from various sources go to teh documentation for pandas:
website = 'http://pandas.pydata.org/pandas-docs/dev/generated/pandas.DataFrame.html'
webbrowser.open(website)

True