In [28]:
# As you might expect you can create a series by passing in a list of values. When you do this, 
#pandas automatically assigns an index starting with zero and sets the name of the series to none
import pandas as pd

students = ['alice', 'jack', 'molly']
pd.Series(students)

0    alice
1     jack
2    molly
dtype: object

In [29]:
#If we create a list of strings and we have one element, a None type, 
#pandas inserts that as a None and uses the type object for the underlying array.
students = ['alice', 'jack', None]
pd.Series(students)


0    alice
1     jack
2     None
dtype: object

In [30]:
#So if we create a list of numbers, integers, or floats and put it in a None type, pandas automatically converts 
#this into a special floating-point value designated as NaN which stands for Not a Number. 
students = [1, 2, None]
pd.Series(students)

0    1.0
1    2.0
2    NaN
dtype: float64

In [31]:
#NaN is not equivalent to None and when we try the equality tests, the result is false.
import numpy as np
np.nan == None

False

In [32]:
np.nan == np.nan
# even NaN to it selt - false. we need special functions to test for the presence of not a numberi

False

In [33]:
np.isnan(np.nan)

True

In [34]:
student_score = {'alice':'physics',   # dictionary
                'jack':'science',
                'molly':'math'}
s = pd.Series(student_score)
s

alice    physics
jack     science
molly       math
dtype: object

In [35]:

s.index

Index(['alice', 'jack', 'molly'], dtype='object')

In [36]:
students = [('alice','brown'),('jack','white'),('molly','green')] # list of tuples
pd.Series(students)

0    (alice, brown)
1     (jack, white)
2    (molly, green)
dtype: object

In [37]:
s = pd.Series(['Physics','Science','Math'], index = ['alice','jack','molly'])
s

alice    Physics
jack     Science
molly       Math
dtype: object

In [38]:
student_score = {'alice':'physics',
                'jack':'science',
                'molly':'math'}
s = pd.Series(student_score, index = ['alice','molly','sam'])
s
#The result is that the series object doesn't have Jack in it even though he was in our original dataset, 
#but explicitly does have Sam in it but as a missing value.

alice    physics
molly       math
sam          NaN
dtype: object

In [39]:

student_score = {'alice':'physics',
                'jack':'science',
                'molly':'math',
                'sam':'history'}
s = pd.Series(student_score)
s

alice    physics
jack     science
molly       math
sam      history
dtype: object

In [40]:
#if you wanted to see the 4th entry, we could use the iloc attribute with the parameter 3. S.iloc sub 3, 

s.iloc[3]

'history'

In [41]:
#remember, we always start from zero. If you wanted to see what class Molly has, 
#we would use the loc attribute with a parameter of Molly, so s.loc Sub Molly.
s.loc['molly']

'math'

In [42]:
#keep in mind that iloc and loc are not methods, they are attributes, so you don't use parentheses to query them, 
#but square brackets instead, and this is called the indexing operator.


In [43]:
#For instance, if you pass in an integer parameter, the operator will behave as if you want to 
#query via the Iloc attribute, so will do s sub 3. And it's just as if we used s.iloc sub 3, 
#and if you pass in an object, it will query as if you wanted to use the label based on the loc attribute. 
#So s sub Molly actually queries as if we did s.loc sub Molly.

s[3]

'history'

In [44]:
s['molly']

'math'

In [45]:
#Pandas can't determine automatically whether you're intending to query by index position or index label. 
#So you need to be careful when you're using the indexing operator on the Series itself. 
#The safer option is to be more explicit and to use the iloc and loc attributes directly..
class_code = {99:'English',
             100:'History',
             101:'Chemistery',
             103:'Physics'}
s = pd.Series(class_code)

In [46]:
 s[0]
    #So, that didn't call s.iloc sub zero underneath as one might expect, and instead it generated this error.

KeyError: 0

In [None]:
grades = pd.Series([90,80,70,60])
total = 0
for grade in grades:
    total +=grade
print(total/len(grades))
#So just a very simple averaging function, this works, but it's slow.

In [None]:
#Modern computers can do many tasks simultaneously, especially, but not only tasks involving mathematics. 
#Pandas and the underlying NumPy support. A number of methods for computation. And vectorization in particular 
#works with most of the functions in the NumPy library, including the sum function.
import numpy as np

total = np.sum(grades)
print(total/len(grades))

In [50]:
numbers = pd.Series(np.random.randint(0,1000,10000))
numbers.head() # for first 5 values

0    355
1    991
2    274
3     24
4    773
dtype: int64

In [51]:
len(numbers)
#for timing the process we will use the cellular magical function of Jupyter

10000

In [52]:
%%timeit -n 100

total = 0
for number in numbers:
    total +=number
total/len(numbers)

1.31 ms ± 47.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [53]:
%%timeit -n 100

total = np.sum(numbers)
total/len(numbers)

75.6 µs ± 12.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [54]:
#now lets increase every item in the list by 2
numbers +=2
numbers.head()

0    357
1    993
2    276
3     26
4    775
dtype: int64

In [55]:
# we can use the iteritems function in particular which returns a label and value. - pandas function
for label, value in numbers.iteritems():
    numbers.set_value(label, value+2)
numbers.head()

0    359
1    995
2    278
3     28
4    777
dtype: int64

In [56]:
%%timeit -n 10

s = pd.Series(np.random.randint(0,1000,1000))

for label, value in s.iteritems():
    s.loc[label] = value+2

138 ms ± 1.72 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [49]:
%%timeit -n 10

s = pd.Series(np.random.randint(0,1000,1000))
s +=2

249 µs ± 24 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [57]:
#The .loc attribute lets you not only modify data in place, but also add new data as well. 
#If the value you passed in as the index doesn't exist, then a new entry is created. 
s = pd.Series([1,2,3])

s.loc['History'] = 100
s

0            1
1            2
2            3
History    100
dtype: int64

In [58]:
#We see that mixed types for data values or index labels are no problem for Pandas. 
#Since history is not in the original list of indices, s.loc[ sub History] essentially creates 
#a new element in the series, with the index name of History, and the value of 100.
#this was an example of unique index value

In [60]:
#now we will look at un unique index value
student_classes = pd.Series({'alice':'physics',
                'jack':'science',
                'molly':'math',
                'sam':'history'})
student_classes

alice    physics
jack     science
molly       math
sam      history
dtype: object

In [61]:
kelly_classes = pd.Series(['math','history','sport'], index=['kelly','kelly','kelly'])
kelly_classes

kelly       math
kelly    history
kelly      sport
dtype: object

In [62]:
allstudents_classes = student_classes.append(kelly_classes)
allstudents_classes

alice    physics
jack     science
molly       math
sam      history
kelly       math
kelly    history
kelly      sport
dtype: object

In [63]:
#And this is actually a common pattern in Pandas. By default, returning a new object instead of modifying one in place. 
#And it's one that you should come to expect. 
#By printing the original series, we can see that that series hasn't changed.
allstudents_classes.loc['kelly']

kelly       math
kelly    history
kelly      sport
dtype: object

In [65]:
allstudents_classes.loc['jack']

'science'