In [23]:
import numpy as np

import pandas as pd

# Using from pandas import Series,Dataframe allows us to more easily use them.
# With this we can use pd instead of pd.Series or pd.DataFrame.

from pandas import Series,DataFrame

In [24]:
# Lets get some data to make a dataframe.

In [25]:
import webbrowser

website = 'http://en.wikipedia.org/wiki/NFL_win-loss_records'

webbrowser.open(website)

True

In [26]:
# Copy and read data using pandas in order to grab a dataframe.

In [11]:
# I have gone to the website link, and copied the first five rows of the spreadsheet to the clipboard.
# I can copy and read data from my clipboard.

nfl_frame = pd.read_clipboard()

nfl_frame

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL season,Division
0,1,Dallas Cowboys,964,550,408,6,0.574,1960,NFC East
1,2,Green Bay Packers,1418,790,590,38,0.571,1921,NFC North
2,3,Baltimore Ravens,434,243,190,1,0.561,1996,AFC North
3,4,New England Patriots,966,537,420,9,0.561,1960,AFC East
4,5,Chicago Bears,1452,786,624,42,0.556,1920,NFC North


In [22]:
# Grab column names:

nfl_frame.columns

Index(['Rank', 'Team', 'GP', 'Won', 'Lost', 'Tied', 'Pct.', 'First NFL season',
       'Division'],
      dtype='object')

In [21]:
nfl_frame['First NFL season']

0    1960
1    1921
2    1996
3    1960
4    1920
Name: First NFL season, dtype: int64

In [28]:
# Grab multiple columns:

DataFrame(nfl_frame,columns=['Team','First NFL season','GP'])

Unnamed: 0,Team,First NFL season,GP
0,Dallas Cowboys,1960,964
1,Green Bay Packers,1921,1418
2,Baltimore Ravens,1996,434
3,New England Patriots,1960,966
4,Chicago Bears,1920,1452


In [30]:
# Ask for a column that does not exist:

DataFrame(nfl_frame,columns=['Team','First NFL season','GP','Stadium'])

# Instead of returning an error, it fills the column with 'null' values.

Unnamed: 0,Team,First NFL season,GP,Stadium
0,Dallas Cowboys,1960,964,
1,Green Bay Packers,1921,1418,
2,Baltimore Ravens,1996,434,
3,New England Patriots,1960,966,
4,Chicago Bears,1920,1452,


In [33]:
# See the first (n) rows:

nfl_frame.head(3)

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL season,Division
0,1,Dallas Cowboys,964,550,408,6,0.574,1960,NFC East
1,2,Green Bay Packers,1418,790,590,38,0.571,1921,NFC North
2,3,Baltimore Ravens,434,243,190,1,0.561,1996,AFC North


In [63]:
# See the last (n) rows:

nfl_frame.tail(3)

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL season,Division,Stadiums
2,3,Baltimore Ravens,434,243,190,1,0.561,1996,AFC North,
3,4,New England Patriots,966,537,420,9,0.561,1960,AFC East,King's Stands
4,5,Chicago Bears,1452,786,624,42,0.556,1920,NFC North,Greatbowl


In [64]:
# .loc description: https://www.codecademy.com/resources/docs/pandas/dataframe/loc

nfl_frame.loc[3]

Rank                                   4
Team                New England Patriots
GP                                   966
Won                                  537
Lost                                 420
Tied                                   9
Pct.                               0.561
First NFL season                    1960
Division                        AFC East
Stadiums                   King's Stands
Name: 3, dtype: object

In [65]:
# Assign values to entire columns.

nfl_frame['Stadium'] = "Levi's Stadium"

nfl_frame

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL season,Division,Stadiums,Stadium
0,1,Dallas Cowboys,964,550,408,6,0.574,1960,NFC East,Levi's Stadium,Levi's Stadium
1,2,Green Bay Packers,1418,790,590,38,0.571,1921,NFC North,,Levi's Stadium
2,3,Baltimore Ravens,434,243,190,1,0.561,1996,AFC North,,Levi's Stadium
3,4,New England Patriots,966,537,420,9,0.561,1960,AFC East,King's Stands,Levi's Stadium
4,5,Chicago Bears,1452,786,624,42,0.556,1920,NFC North,Greatbowl,Levi's Stadium


In [66]:
nfl_frame['Stadium'] = np.arange(5)

nfl_frame

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL season,Division,Stadiums,Stadium
0,1,Dallas Cowboys,964,550,408,6,0.574,1960,NFC East,Levi's Stadium,0
1,2,Green Bay Packers,1418,790,590,38,0.571,1921,NFC North,,1
2,3,Baltimore Ravens,434,243,190,1,0.561,1996,AFC North,,2
3,4,New England Patriots,966,537,420,9,0.561,1960,AFC East,King's Stands,3
4,5,Chicago Bears,1452,786,624,42,0.556,1920,NFC North,Greatbowl,4


In [67]:
stadiums = Series(["Levi's Stadium","King's Stands",'Greatbowl'],index=[0,3,4])

In [68]:
# Insert into our dataframe:

nfl_frame['Stadiums'] = stadiums

nfl_frame

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL season,Division,Stadiums,Stadium
0,1,Dallas Cowboys,964,550,408,6,0.574,1960,NFC East,Levi's Stadium,0
1,2,Green Bay Packers,1418,790,590,38,0.571,1921,NFC North,,1
2,3,Baltimore Ravens,434,243,190,1,0.561,1996,AFC North,,2
3,4,New England Patriots,966,537,420,9,0.561,1960,AFC East,King's Stands,3
4,5,Chicago Bears,1452,786,624,42,0.556,1920,NFC North,Greatbowl,4


In [69]:
# Delete a column

del nfl_frame['Stadium']

nfl_frame

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL season,Division,Stadiums
0,1,Dallas Cowboys,964,550,408,6,0.574,1960,NFC East,Levi's Stadium
1,2,Green Bay Packers,1418,790,590,38,0.571,1921,NFC North,
2,3,Baltimore Ravens,434,243,190,1,0.561,1996,AFC North,
3,4,New England Patriots,966,537,420,9,0.561,1960,AFC East,King's Stands
4,5,Chicago Bears,1452,786,624,42,0.556,1920,NFC North,Greatbowl


In [72]:
# Pandas can automatically take up a dictionary, as long as lists match up concerning number of datapoints.

data = {'City':['SF','LA','NYC'],'Population':[837000,3880000,8400000]}

city_frame = DataFrame(data)

city_frame

Unnamed: 0,City,Population
0,SF,837000
1,LA,3880000
2,NYC,8400000


In [73]:
#For full list of ways to create DataFrames from various sources go to teh documentation for pandas:

website = 'http://pandas.pydata.org/pandas-docs/dev/generated/pandas.DataFrame.html'

webbrowser.open(website)

True