In [1]:
import pandas as pd

students_classes = {'Alice': 'Physics',
                   'Jack': 'Chemistry',
                   'Molly': 'English',
                   'Sam': 'History'}
s = pd.Series(students_classes)
s

Alice      Physics
Jack     Chemistry
Molly      English
Sam        History
dtype: object

In [5]:
#iloc for position index
print(s.iloc[3])
#OR
print(s[3])

History
History


In [6]:
#loc attribute for label index
print(s.loc['Molly'])

#OR by passing an object
print(s['Molly'])

English
English


In [7]:
#We can't use s[integer] if the indexes are of type integer also

In [8]:
grades = pd.Series([90, 80, 70, 60])

total = 0
for grade in grades:
    total+=grade
print(total/len(grades))

75.0


In [14]:
import numpy as np

# Then we just call np.sum and pass in an iterable item. In this case, our panda series.

total = np.sum(grades)
print(total/len(grades))

75.0


In [13]:

average = np.mean(grades)
print(average)

75.0


In [15]:
#let's now use the magic functions of jupyter to check the speed of excuting the previous
#three functions with a big serie creates randomly
numbers = pd.Series(np.random.randint(0,1000,10000))

# Now lets look at the top five items in that series to make sure they actually seem random. We
# can do this with the head() function
numbers.head()



0     76
1    161
2    889
3     13
4     69
dtype: int64

In [18]:
#the function that we gonna use is called timeit
#to use any function of the magic functions we need to use %% and it must be in the begining of the cell
# -n is a parameter of the number of the loops that we wanna run


In [17]:
%%timeit -n 100
total = 0
for number in numbers:
    total+=number

total/len(numbers)

1.44 ms ± 71.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [20]:
%%timeit -n 100
total = np.sum(numbers)
total/len(numbers)

119 µs ± 33.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [21]:
%%timeit -n 100
total = np.mean(numbers)


77.9 µs ± 18.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [22]:
#woooow *o*

In [23]:
# Let's look at the head of our series
numbers.head()

0     76
1    161
2    889
3     13
4     69
dtype: int64

In [24]:
# A Related feature in pandas and nummy is called broadcasting. With broadcasting, you can 
# apply an operation to every value in the series, changing the series. For instance, if we
# wanted to increase every random variable by 2, we could do so quickly using the += operator 
# directly on the Series object. 
numbers+=2
numbers.head()

0     78
1    163
2    891
3     15
4     71
dtype: int64

In [33]:
# We can use the iteritems() function which returns a label and value 

for label, value in numbers.iteritems():
    # now for the item which is returned, lets call set_value()

    numbers.loc[label]= value+2
# And we can check the result of this computation
numbers.head()

0     80
1    165
2    893
3     17
4     73
dtype: int64

In [38]:
%%timeit -n 10
for label, value in numbers.iteritems():
    # now for the item which is returned, lets call set_value()

    numbers.loc[label]= value+2

594 ms ± 28.2 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [44]:
%%timeit -n 10
# We need to recreate a series
s = pd.Series(np.random.randint(0,1000,10000))
# And we just broadcast with +=
s+=2

629 µs ± 175 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [45]:
#Amazing

__Ununique keys__

In [46]:
# Here's an example using a Series of a few numbers. 
s = pd.Series([1, 2, 3])

# We could add some new value, maybe a university course
s.loc['History'] = 102

s

0            1
1            2
2            3
History    102
dtype: int64

In [47]:
# We see that mixed types for data values or index labels are no problem for Pandas. Since 
# "History" is not in the original list of indices, s.loc['History'] essentially creates a 
# new element in the series, with the index named "History", and the value of 102

In [48]:
# Up until now I've shown only examples of a series where the index values were unique. I want 
# to end this lecture by showing an example where index values are not unique, and this makes 
# pandas Series a little different conceptually then, for instance, a relational database.

# Lets create a Series with students and the courses which they have taken
students_classes = pd.Series({'Alice': 'Physics',
                   'Jack': 'Chemistry',
                   'Molly': 'English',
                   'Sam': 'History'})
students_classes

Alice      Physics
Jack     Chemistry
Molly      English
Sam        History
dtype: object

In [49]:
# Now lets create a Series just for some new student Kelly, which lists all of the courses
# she has taken. We'll set the index to Kelly, and the data to be the names of courses.
kelly_classes = pd.Series(['Philosophy', 'Arts', 'Math'], index=['Kelly', 'Kelly', 'Kelly'])
kelly_classes

Kelly    Philosophy
Kelly          Arts
Kelly          Math
dtype: object

In [50]:
# Finally, we can append all of the data in this new Series to the first using the .append()
# function.
all_students_classes = students_classes.append(kelly_classes)

# This creates a series which has our original people in it as well as all of Kelly's courses
all_students_classes

Alice       Physics
Jack      Chemistry
Molly       English
Sam         History
Kelly    Philosophy
Kelly          Arts
Kelly          Math
dtype: object

In [51]:
# There are a couple of important considerations when using append. First, Pandas will take 
# the series and try to infer the best data types to use. In this example, everything is a string, 
# so there's no problems here. Second, the append method doesn't actually change the underlying Series
# objects, it instead returns a new series which is made up of the two appended together. This is
# a common pattern in pandas - by default returning a new object instead of modifying in place - and
# one you should come to expect. By printing the original series we can see that that series hasn't
# changed.
students_classes

Alice      Physics
Jack     Chemistry
Molly      English
Sam        History
dtype: object

In [52]:
# Finally, we see that when we query the appended series for Kelly, we don't get a single value, 
# but a series itself. 
all_students_classes.loc['Kelly']

Kelly    Philosophy
Kelly          Arts
Kelly          Math
dtype: object