In [14]:
# Think of a dataframe as a spreadsheet
# Import numpy, pandas
# Import Series and DataFrame from Pandas
# Means don't need to use pd.Series and pd.DataFrame
# just use Series and DataFrame on their own
import numpy as np

import pandas as pd

from pandas import Series, DataFrame

In [3]:
# Grab a webpage
# Pythons built in method to grab a webpage
import webbrowser
# Define website
website='https://en.wikipedia.org/wiki/NFL_win-loss_records'
# Open webpage
webbrowser.open(website)

True

In [6]:
# Go the trhe site
# Highlight first 5 rows with headingsa
# Copy to clipboard - select, copy
# Use pd.read to grab it and put it in a data frame
# Pandas add the first Index column
nfl_frame = pd.read_clipboard()

In [7]:
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied*,Pct.,First Season,Total Games,Conference
0,1,Dallas Cowboys,510,378,6,0.574,1960,894,NFC East
1,2,Chicago Bears,752,563,42,0.57,1920,1357,NFC North
2,3,Green Bay Packers,741,561,37,0.567,1921,1339,NFC North
3,4,Miami Dolphins,443,345,4,0.562,1966,792,AFC East
4,5,Baltimore Ravens,182,143,1,0.56,1996,326,AFC North


In [15]:
# Grab the column name of dataframe
nfl_frame.columns

Index([u'Rank', u'Team', u'Won', u'Lost', u'Tied*', u'Pct.', u'First Season', u'Total Games', u'Conference', u'Stadium'], dtype='object')

In [16]:
# Grab Rank column values
nfl_frame.Rank

0    1
1    2
2    3
3    4
4    5
Name: Rank, dtype: int64

In [17]:
# Grab Team column values
nfl_frame.Team

0       Dallas Cowboys
1        Chicago Bears
2    Green Bay Packers
3       Miami Dolphins
4     Baltimore Ravens
Name: Team, dtype: object

In [18]:
# Above only grabs from single string column name
# To drag from multiple worded column name
nfl_frame['First Season']

0    1960
1    1920
2    1921
3    1966
4    1996
Name: First Season, dtype: int64

In [19]:
# Grab multiple columns
# This creates a new data frame
# Stadium does not exist in the data frame so returns Nan
DataFrame(nfl_frame, columns=['Team','First Season','Total Games','Stadium'])

Unnamed: 0,Team,First Season,Total Games,Stadium
0,Dallas Cowboys,1960,894,0
1,Chicago Bears,1920,1357,1
2,Green Bay Packers,1921,1339,2
3,Miami Dolphins,1966,792,3
4,Baltimore Ravens,1996,326,4


In [20]:
# Retrieve TOP few rows
# Default returns first five rows
nfl_frame.head

<bound method DataFrame.head of    Rank               Team  Won  Lost  Tied*   Pct.  First Season  \
0     1     Dallas Cowboys  510   378      6  0.574          1960   
1     2      Chicago Bears  752   563     42  0.570          1920   
2     3  Green Bay Packers  741   561     37  0.567          1921   
3     4     Miami Dolphins  443   345      4  0.562          1966   
4     5   Baltimore Ravens  182   143      1  0.560          1996   

   Total Games Conference  Stadium  
0          894   NFC East        0  
1         1357  NFC North        1  
2         1339  NFC North        2  
3          792   AFC East        3  
4          326  AFC North        4  >

In [21]:
# Retrieve TOP three rows
nfl_frame.head(3)

Unnamed: 0,Rank,Team,Won,Lost,Tied*,Pct.,First Season,Total Games,Conference,Stadium
0,1,Dallas Cowboys,510,378,6,0.574,1960,894,NFC East,0
1,2,Chicago Bears,752,563,42,0.57,1920,1357,NFC North,1
2,3,Green Bay Packers,741,561,37,0.567,1921,1339,NFC North,2


In [22]:
# Retrieve LAST three rows
nfl_frame.tail(3)

Unnamed: 0,Rank,Team,Won,Lost,Tied*,Pct.,First Season,Total Games,Conference,Stadium
2,3,Green Bay Packers,741,561,37,0.567,1921,1339,NFC North,2
3,4,Miami Dolphins,443,345,4,0.562,1966,792,AFC East,3
4,5,Baltimore Ravens,182,143,1,0.56,1996,326,AFC North,4


In [23]:
# Return all values for third row (INDEX 3)
# Use ix

nfl_frame.ix[3]

Rank                         4
Team            Miami Dolphins
Won                        443
Lost                       345
Tied*                        4
Pct.                     0.562
First Season              1966
Total Games                792
Conference            AFC East
Stadium                      3
Name: 3, dtype: object

In [24]:
# Assign values to entire columns
# Grab NFL frame and Stadiums column
# Assign value "Levi's Stadium" as values in entire column
# Use double quotes as string contains a single quote
nfl_frame['Stadium'] = "Levi's Stadium"
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied*,Pct.,First Season,Total Games,Conference,Stadium
0,1,Dallas Cowboys,510,378,6,0.574,1960,894,NFC East,Levi's Stadium
1,2,Chicago Bears,752,563,42,0.57,1920,1357,NFC North,Levi's Stadium
2,3,Green Bay Packers,741,561,37,0.567,1921,1339,NFC North,Levi's Stadium
3,4,Miami Dolphins,443,345,4,0.562,1966,792,AFC East,Levi's Stadium
4,5,Baltimore Ravens,182,143,1,0.56,1996,326,AFC North,Levi's Stadium


In [25]:
# Grab NFL data frame
# Grab Stadium column
# Pass in numbers to Stadium column
nfl_frame ['Stadium'] = np.arange(5)
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied*,Pct.,First Season,Total Games,Conference,Stadium
0,1,Dallas Cowboys,510,378,6,0.574,1960,894,NFC East,0
1,2,Chicago Bears,752,563,42,0.57,1920,1357,NFC North,1
2,3,Green Bay Packers,741,561,37,0.567,1921,1339,NFC North,2
3,4,Miami Dolphins,443,345,4,0.562,1966,792,AFC East,3
4,5,Baltimore Ravens,182,143,1,0.56,1996,326,AFC North,4


In [28]:
# Add a series to a dataframe
# Make a new series called stadiums
# Pass a list
# Assign index of 4 to Levi's Stadium and 0 to ATT Stadium
# Show the new series called stadiums
stadiums = Series(["Levi's Stadium", "ATT Stadium"],index = [4,0])
stadiums

4    Levi's Stadium
0       ATT Stadium
dtype: object

In [29]:
# Import to NFL dataframe
# Adds the new series called stadiums to the NFL dataframe
# Adds the new column called Stadium
# Pandas will fill in the Null values (missing values)
nfl_frame['Stadium'] = stadiums
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied*,Pct.,First Season,Total Games,Conference,Stadium
0,1,Dallas Cowboys,510,378,6,0.574,1960,894,NFC East,ATT Stadium
1,2,Chicago Bears,752,563,42,0.57,1920,1357,NFC North,
2,3,Green Bay Packers,741,561,37,0.567,1921,1339,NFC North,
3,4,Miami Dolphins,443,345,4,0.562,1966,792,AFC East,
4,5,Baltimore Ravens,182,143,1,0.56,1996,326,AFC North,Levi's Stadium


In [30]:
# Delete entire column
del nfl_frame['Stadium']
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied*,Pct.,First Season,Total Games,Conference
0,1,Dallas Cowboys,510,378,6,0.574,1960,894,NFC East
1,2,Chicago Bears,752,563,42,0.57,1920,1357,NFC North
2,3,Green Bay Packers,741,561,37,0.567,1921,1339,NFC North
3,4,Miami Dolphins,443,345,4,0.562,1966,792,AFC East
4,5,Baltimore Ravens,182,143,1,0.56,1996,326,AFC North


In [34]:
# Pass a dictionary to a dataframe
# Create a dictionary with City, Population
# Show dictionary
data = {'City':['SF','LA','NYC'],'Population':[837000,3880000,8400000]}
data

{'City': ['SF', 'LA', 'NYC'], 'Population': [837000, 3880000, 8400000]}

In [35]:
# Create a new dataframe by passing in the dictionary
# Show the dataframe
# Pandas can take in a dictionary
# Many ways to create a dataframe in Pandas
city_frame = DataFrame(data)
city_frame


Unnamed: 0,City,Population
0,SF,837000
1,LA,3880000
2,NYC,8400000


In [38]:
# Check Pandas documentation for different ways to create dataframe

import webbrowser
website='http://pandas.pydata.org/pandas-docs/dev/generated/pandas.DataFrame.html'
webbrowser.open(website)

True