Working with Series 

In [29]:
import numpy as np
import pandas as pd
from pandas import DataFrame, Series

In [30]:
series_data = Series([1,2,3,4,5])

In [31]:
series_data


0    1
1    2
2    3
3    4
4    5
dtype: int64

In [32]:
#prints the values 
series_data.values

array([1, 2, 3, 4, 5], dtype=int64)

In [33]:
#prints the index values
series_data.index

RangeIndex(start=0, stop=5, step=1)

In [34]:
marks = Series([98,95,93,75], index =['Maths','science','chemistry','hindi'])

In [35]:
# prints both the index and marks
marks


Maths        98
science      95
chemistry    93
hindi        75
dtype: int64

In [36]:
marks.index

Index([u'Maths', u'science', u'chemistry', u'hindi'], dtype='object')

In [37]:
marks > 80

Maths         True
science       True
chemistry     True
hindi        False
dtype: bool

In [38]:
marks[marks >80]

Maths        98
science      95
chemistry    93
dtype: int64

In [39]:
'science' in marks


True

In [40]:
marks_dict = marks.to_dict()

In [41]:
marks_dict

{'Maths': 98, 'chemistry': 93, 'hindi': 75, 'science': 95}

In [42]:
marks = Series(marks_dict)

In [43]:
marks

Maths        98
chemistry    93
hindi        75
science      95
dtype: int64

In [44]:
 subjects = ['Maths','chemistry','hindi','science','social']

In [45]:
marks = Series(marks_dict,index=subjects)

In [46]:
marks

Maths        98.0
chemistry    93.0
hindi        75.0
science      95.0
social        NaN
dtype: float64

In [47]:
pd.isnull(marks)

Maths        False
chemistry    False
hindi        False
science      False
social        True
dtype: bool

In [20]:
pd.notnull(marks)

Maths         True
chemistry     True
hindi         True
science       True
social       False
dtype: bool

In [21]:
marks

Maths        98.0
chemistry    93.0
hindi        75.0
science      95.0
social        NaN
dtype: float64

Working with Series Done
Working with DataFrames

In [84]:
import webbrowser
websites = 'https://en.wikipedia.org/wiki/NFL_win%E2%80%93loss_records'
webbrowser.open(websites)

True

In [49]:
nfl_records = pd.read_clipboard()

In [50]:
nfl_records

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First,NFL,Season,Total,Games,Division
0,1,Dallas,Cowboys,493,367,6,0.573,1960.0,866,NFC,East,
1,2,Green,Bay,Packers,730,553,37.0,0.567,1921,1320,NFC,North
2,3,Chicago,Bears,744,568,42,0.565,1920.0,1354,NFC,North,
3,4,Miami,Dolphins,439,341,4,0.563,1966.0,784,AFC,East,
4,5,New,England,Patriots,476,383,9.0,0.554,1960,868,AFC,East
5,6,New,York,Giants,684,572,33.0,0.543,1925,1289,NFC,East


In [51]:
nfl_records.columns

Index([u'Rank', u'Team', u'Won', u'Lost', u'Tied', u'Pct.', u'First', u'NFL',
       u'Season', u'Total', u'Games', u'Division'],
      dtype='object')

In [52]:
nfl_records.Team
#nfl_records.Team or nfl_records[Team]

0     Dallas
1      Green
2    Chicago
3      Miami
4        New
5        New
Name: Team, dtype: object

In [53]:
DataFrame(nfl_records,columns=['Team','First NFL Season','Total Games'])

Unnamed: 0,Team,First NFL Season,Total Games
0,Dallas,,
1,Green,,
2,Chicago,,
3,Miami,,
4,New,,
5,New,,


In [54]:
#returns the number of records provided or 5 records a s default
nfl_records.head(3)

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First,NFL,Season,Total,Games,Division
0,1,Dallas,Cowboys,493,367,6,0.573,1960.0,866,NFC,East,
1,2,Green,Bay,Packers,730,553,37.0,0.567,1921,1320,NFC,North
2,3,Chicago,Bears,744,568,42,0.565,1920.0,1354,NFC,North,


In [55]:
# to retreive row data 
nfl_records.ix[3]

Rank               4
Team           Miami
Won         Dolphins
Lost             439
Tied             341
Pct.               4
First          0.563
NFL             1966
Season           784
Total            AFC
Games           East
Division         NaN
Name: 3, dtype: object

In [56]:
nfl_records['Stadium'] = "Levi's Stadium"

In [57]:
nfl_records


Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First,NFL,Season,Total,Games,Division,Stadium
0,1,Dallas,Cowboys,493,367,6,0.573,1960.0,866,NFC,East,,Levi's Stadium
1,2,Green,Bay,Packers,730,553,37.0,0.567,1921,1320,NFC,North,Levi's Stadium
2,3,Chicago,Bears,744,568,42,0.565,1920.0,1354,NFC,North,,Levi's Stadium
3,4,Miami,Dolphins,439,341,4,0.563,1966.0,784,AFC,East,,Levi's Stadium
4,5,New,England,Patriots,476,383,9.0,0.554,1960,868,AFC,East,Levi's Stadium
5,6,New,York,Giants,684,572,33.0,0.543,1925,1289,NFC,East,Levi's Stadium


In [58]:
stadiums = Series(['Levis','At&t'], index = [4,0])

In [59]:
stadiums 

4    Levis
0     At&t
dtype: object

In [60]:
nfl_records['Stadium'] = stadiums

In [61]:
nfl_records

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First,NFL,Season,Total,Games,Division,Stadium
0,1,Dallas,Cowboys,493,367,6,0.573,1960.0,866,NFC,East,,At&t
1,2,Green,Bay,Packers,730,553,37.0,0.567,1921,1320,NFC,North,
2,3,Chicago,Bears,744,568,42,0.565,1920.0,1354,NFC,North,,
3,4,Miami,Dolphins,439,341,4,0.563,1966.0,784,AFC,East,,
4,5,New,England,Patriots,476,383,9.0,0.554,1960,868,AFC,East,Levis
5,6,New,York,Giants,684,572,33.0,0.543,1925,1289,NFC,East,


In [62]:
nfl_records


Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First,NFL,Season,Total,Games,Division,Stadium
0,1,Dallas,Cowboys,493,367,6,0.573,1960.0,866,NFC,East,,At&t
1,2,Green,Bay,Packers,730,553,37.0,0.567,1921,1320,NFC,North,
2,3,Chicago,Bears,744,568,42,0.565,1920.0,1354,NFC,North,,
3,4,Miami,Dolphins,439,341,4,0.563,1966.0,784,AFC,East,,
4,5,New,England,Patriots,476,383,9.0,0.554,1960,868,AFC,East,Levis
5,6,New,York,Giants,684,572,33.0,0.543,1925,1289,NFC,East,


In [63]:
#Creating a dataframe from dictionary
data = {'City':['sf','la','ch'],'Population':[1234,2345,3456]}

In [64]:
city_population  = DataFrame(data)

In [65]:
#Dataframe created 
city_population

Unnamed: 0,City,Population
0,sf,1234
1,la,2345
2,ch,3456


In [66]:
my_series = Series([1,2,3,4], index = ['A','B','C','D'])
my_series

A    1
B    2
C    3
D    4
dtype: int64

In [67]:
my_index = my_series.index
my_index

Index([u'A', u'B', u'C', u'D'], dtype='object')

In [68]:
my_index[2:]

Index([u'C', u'D'], dtype='object')

In [28]:
# we cannot change the indexs if tried arises a error
my_index[0] = 'S'


TypeError: Index does not support mutable operations

In [75]:
changed_my_series = my_series.reindex(['A','B','C','D','E','F'])

In [77]:
changed_my_series

A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
F    NaN
dtype: float64

In [79]:
#changed the index and filled the new value 
changed_my_series.reindex(['A','B','C','D','E','F','G'], fill_value = 0)

A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
F    NaN
G    0.0
dtype: float64

In [80]:
ranger = range(15)

In [81]:
ranger

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]

In [82]:
new_index = Series(['USA','Mexico','Canada'], index = [0,5,10])

In [83]:
new_index.reindex(ranger,method='ffill')

0        USA
1        USA
2        USA
3        USA
4        USA
5     Mexico
6     Mexico
7     Mexico
8     Mexico
9     Mexico
10    Canada
11    Canada
12    Canada
13    Canada
14    Canada
dtype: object