In [1]:
names = ['dumbledore', 'beeblebrox', 'skywalker', 'hermione', 'leia']
capitalized_names = []
for name in names:
    capitalized_names.append(name.title())

# equals (do.. for)
capitalized_names = [name.title() for name in names]
capitalized_names

['Dumbledore', 'Beeblebrox', 'Skywalker', 'Hermione', 'Leia']

In [2]:
squares = [x**2 for x in range(9) if x % 2 == 0]
# to add else statements, move the conditionals to the beginning
squares = [x**2 if x % 2 == 0 else x + 3 for x in range(9)]

In [4]:
# example
names = ["Rick S", "Morty Smith", "Summer Smith", "Jerry Smith", "Beth Smith"]
first_names = [name.split(' ')[0] for name in names]
print(first_names)
# ['Rick', 'Morty', 'Summer', 'Jerry', 'Beth']

# example
multiples_3 = [i*3 for i in range(1,21)]
print(multiples_3)
# [3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51, 54, 57, 60]

# example
scores = {
             "Rick": 70,
             "Morty Smith": 35,
             "Summer Smith": 82,
             "Jerry Smith": 23,
             "Beth Smith": 98
          }
passed = [name for name, score in scores.items() if score>=65]
print(passed)
# ['Rick', 'Summer Smith', 'Beth Smith']

['Rick', 'Morty', 'Summer', 'Jerry', 'Beth']
[3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51, 54, 57, 60]
['Rick', 'Summer Smith', 'Beth Smith']


In [5]:
# Lambda Functions - lambda (arg1, arg2): do_a_thing_and_return_it
multiply = lambda x, y: x * y

# Equivalent of:
def multiply(x, y):
    return x * y

# Can call both of the above like:
multiply(4, 7)

28

In [6]:
# example of using map() to apply lambda function to a list
numbers = [
              [34, 63, 88, 71, 29],
              [90, 78, 51, 27, 45],
              [63, 37, 85, 46, 22],
              [51, 22, 34, 11, 18]
           ]
mean = lambda num_list: sum(num_list)/len(num_list)
averages = list(map(mean, numbers))
# or
averages = list(map(lambda num_list: sum(num_list)/len(num_list), numbers))
print(averages)
# [57.0, 58.2, 50.6, 27.2]
[57.0, 58.2, 50.6, 27.2]

[57.0, 58.2, 50.6, 27.2]


[57.0, 58.2, 50.6, 27.2]

In [7]:
# example of using filter() to apply lambda function to a list
cities = ["New York City", "Los Angeles", "Chicago", "Mountain View", "Denver", "Boston"]
is_short = lambda name: len(name) < 10
short_cities = list(filter(is_short, cities))
# or
short_cities = list(filter(lambda name: len(name) < 10, cities))
print(short_cities)

['Chicago', 'Denver', 'Boston']


In [8]:
# Generators
def my_range(x):
    i = 0
    while i < x:
        yield i
        i += 1
# since this returns an iterator, we can convert it to a list
# or iterate through it in a loop to view its contents
for x in my_range(5):
    print(x)
'''
0
1
2
3
4
'''

# You can create a generator in the same way you'd normally write a list comprehension, except with
# parentheses instead of square brackets.
# this list comprehension produces a list of squares
sq_list = [x**2 for x in range(10)]
# this generator produces an iterator of squares
sq_iterator = (x**2 for x in range(10))

# example
# generator function that works like the built-in function enumerate
lessons = ["Why Python Programming", "Data Types and Operators", "Control Flow", "Functions", "Scripting"]
def my_enumerate(iterable, start=0):
    i = start
    for element in iterable:
        yield i, element
        i = i + 1
for i, lesson in my_enumerate(lessons, 1):
    print("Lesson {}: {}".format(i, lesson))
'''
Lesson 1: Why Python Programming
Lesson 2: Data Types and Operators
Lesson 3: Control Flow
Lesson 4: Functions
Lesson 5: Scripting
'''

# example
# If you have an iterable that is too large to fit in memory in full
# (e.g., when dealing with large files), being able to take and use
# chunks of it at a time can be very valuable.
# Implementing a generator function, chunker, that takes in an
# iterable and yields a chunk of a specified size at a time.

def chunker(iterable, size):
    for i in range(0, len(iterable), size):
        yield iterable[i:i + size]
for chunk in chunker(range(25), 4):
    print(list(chunk))


0
1
2
3
4
Lesson 1: Why Python Programming
Lesson 2: Data Types and Operators
Lesson 3: Control Flow
Lesson 4: Functions
Lesson 5: Scripting
[0, 1, 2, 3]
[4, 5, 6, 7]
[8, 9, 10, 11]
[12, 13, 14, 15]
[16, 17, 18, 19]
[20, 21, 22, 23]
[24]


In [9]:
list_of_random_things = [1, 3.4, 'a string', True]

In [10]:
list_of_random_things[0];
list_of_random_things[-1]; #last element
list_of_random_things[len(list_of_random_things) - 1]; #last element

In [13]:
list_of_random_things[1:3] # returns [3.4, 'a string']
list_of_random_things[:2] # returns [1, 3.4]
list_of_random_things[1:] # returns all of the elements to the end of the list [3.4, 'a string', True]

[3.4, 'a string', True]

In [12]:
# in, not in
'this' in 'this is a string' # True
'in' in 'this is a string' # True
'isa' in 'this is a string' # False
5 not in [1, 2, 3, 4, 6] # True
5 in [1, 2, 3, 4, 6] # False

False

In [11]:
# Mutable and ordered
my_lst = [1, 2, 3, 4, 5]
my_lst[0] = 0
print(my_lst)

[0, 2, 3, 4, 5]


In [17]:
# length of list
len(list_of_random_things)

4

In [18]:
# returns the smallest element of the list
min(list_of_random_things)

# returns the greatest element of the list. This works because the the max function is defined in terms of the greater than comparison operator. The max function is undefined for lists that contain elements from different, incomparable types.
max(list_of_random_things)

TypeError: ignored

In [16]:
# returns a copy of a list in order from smallest to largest,
# leaving the list unchanged.
sorted(list_of_random_things)

TypeError: ignored

In [19]:
# join() returns a string consisting of the list elements joined by a separator string.
# Takes only a list of strings as an argument
name = "-".join(["Grace", "Kelly"])
print(name) # Grace-Kelly

Grace-Kelly


In [20]:
cities = ['new york city', 'mountain view', 'chicago', 'los angeles']
capitalized_cities = []
for city in cities:
    capitalized_cities.append(city.title())

In [21]:
letters = ['a', 'b', 'c', 'd']
letters.append('z')
print(letters) # ['a', 'b', 'c', 'd', 'z']
# Note: letters[i] = 'z'; wouldn't work, use append()

['a', 'b', 'c', 'd', 'z']


In [22]:
cities = ['new york city', 'mountain view', 'chicago', 'los angeles']
for index in range(len(cities)):
    cities[index] = cities[index].title()

In [23]:
items = ['first string', 'second string']
html_str = "<ul>\n"
for item in items:
    html_str += "<li>{}</li>\n".format(item)
html_str += "</ul>"
print(html_str)

<ul>
<li>first string</li>
<li>second string</li>
</ul>


In [24]:
# vowel string
vowelString = 'aeiou'
print(list(vowelString))

# vowel tuple
vowelTuple = ('a', 'e', 'i', 'o', 'u')
print(list(vowelTuple))

# vowel list
vowelList = ['a', 'e', 'i', 'o', 'u']
print(list(vowelList))

# All Print: ['a', 'e', 'i', 'o', 'u']

['a', 'e', 'i', 'o', 'u']
['a', 'e', 'i', 'o', 'u']
['a', 'e', 'i', 'o', 'u']


In [25]:
location = (13.4125, 103.866667)

In [26]:
print("Latitude:", location[0])
print("Longitude:", location[1])

Latitude: 13.4125
Longitude: 103.866667


In [27]:
# can also be used to assign multiple variables in a compact way
dimensions = 52, 40, 100

In [28]:
# tuple unpacking
length, width, height = dimensions
print("The dimensions are {} x {} x {}".format(length, width, height))

The dimensions are 52 x 40 x 100


In [29]:
numbers = [1, 2, 6, 3, 1, 1, 6]
unique_nums = set(numbers)
print(unique_nums) # {1, 2, 3, 6}

{1, 2, 3, 6}


In [30]:
fruit = {"apple", "banana", "orange", "grapefruit"}

In [31]:
print("watermelon" in fruit)

False


In [32]:
fruit.add("watermelon")
print(fruit)

{'orange', 'grapefruit', 'banana', 'watermelon', 'apple'}


In [33]:
print(fruit.pop())
print(fruit)

orange
{'grapefruit', 'banana', 'watermelon', 'apple'}


In [34]:
elements = {"hydrogen": 1, "helium": 2, "carbon": 6}

In [35]:
print(elements["helium"])

2


In [36]:
elements["lithium"] = 3 

In [37]:
# Just keys
for key in cast:
    print(key)
# Keys and values
for key, value in cast.items():
    print("Actor: {}    Role: {}".format(key, value))

NameError: ignored

In [38]:
# check whether a value is in a dictionary, the same way we check whether a value is in a list or set with the in keyword.
print("carbon" in elements) # True

True


In [39]:
# get() looks up values in a dictionary, but unlike square brackets, get returns None (or a default value of your choice) if the key isn't found.
# If you expect lookups to sometimes fail, get might be a better tool than normal square bracket lookups.
print(elements.get("dilithium")) # None
print(elements.get('kryptonite', 'There\'s no such element!'))
# "There's no such element!"

None
There's no such element!


In [40]:
n = elements.get("dilithium")
print(n is None) # True
print(n is not None) # False

True
False


In [41]:
a = [1, 2, 3]
b = a
c = [1, 2, 3]
print(a == b) # True
print(a is b) # True
print(a == c) # True
print(a is c) # False
# List a and list b are equal and identical.
# List c is equal to a (and b for that matter) since they have the same contents. But a and c (and b for that matter, again) point to two different objects, i.e., they aren't identical objects.
# That is the difference between checking for equality vs. identity.

True
True
True
False


In [42]:
elements = {"hydrogen": {"number": 1,
                         "weight": 1.00794,
                         "symbol": "H"},
              "helium": {"number": 2,
                         "weight": 4.002602,
                         "symbol": "He"}}
helium = elements["helium"]  # get the helium dictionary
hydrogen_weight = elements["hydrogen"]["weight"]  # get hydrogen's weight
oxygen = {"number":8,"weight":15.999,"symbol":"O"}  # create a new oxygen dictionary 
elements["oxygen"] = oxygen  # assign 'oxygen' as a key to the elements dictionary
print('elements = ', elements)

elements =  {'hydrogen': {'number': 1, 'weight': 1.00794, 'symbol': 'H'}, 'helium': {'number': 2, 'weight': 4.002602, 'symbol': 'He'}, 'oxygen': {'number': 8, 'weight': 15.999, 'symbol': 'O'}}


In [43]:
words =  ['great', 'expectations','the', 'adventures', 'of', 'sherlock','holmes','the','great','gasby','hamlet','adventures','of','huckleberry','fin'];
word_counter = {}
for word in words:
    word_counter[word] = word_counter.get(word,0)+1;
print(word_counter);
# Prints {'fin': 1, 'huckleberry': 1, 'hamlet': 1, 'holmes': 1, 'adventures': 2, 'sherlock': 1, 'expectations': 1, 'great': 2, 'the': 2, 'of': 2, 'gasby': 1}

{'great': 2, 'expectations': 1, 'the': 2, 'adventures': 2, 'of': 2, 'sherlock': 1, 'holmes': 1, 'gasby': 1, 'hamlet': 1, 'huckleberry': 1, 'fin': 1}


In [44]:
import numpy as np

In [45]:
# Create a 1D ndarray that contains only integers
x = np.array([1, 2, 3, 4, 5])
print('x = ', x) # x = [1 2 3 4 5]
print('x has dimensions:', x.shape) # x has dimensions: (5,)
print('The elements in x are of type:', x.dtype) # The elements in x are of type: int64

# Create a rank 2 ndarray that only contains integers
Y = np.array([[1,2,3],[4,5,6],[7,8,9], [10,11,12]])
print('Y has dimensions:', Y.shape) # Y has dimensions: (4, 3)
print('Y has a total of', Y.size, 'elements') # Y has a total of 12 elements
print('Y is an object of type:', type(Y)) # Y is an object of type: class 'numpy.ndarray'
print('The elements in Y are of type:', Y.dtype) # The elements in Y are of type: int64

x =  [1 2 3 4 5]
x has dimensions: (5,)
The elements in x are of type: int64
Y has dimensions: (4, 3)
Y has a total of 12 elements
Y is an object of type: <class 'numpy.ndarray'>
The elements in Y are of type: int64


In [46]:
# Specify the dtype when creating the ndarray
x = np.array([1.5, 2.2, 3.7, 4.0, 5.9], dtype = np.int64)

In [47]:
# Save the array into a file
np.save('my_array', x)

# Load the saved array from current directory
y = np.load('my_array.npy')

In [48]:
# Create ndarray using built-in functions
# 3 x 4 ndarray full of zeros
# np.zeros(shape)
X = np.zeros((3,4))

In [49]:
# a 3 x 2 ndarray full of ones
# np.ones(shape)
X = np.ones((3,2))

In [50]:
# 2 x 3 ndarray full of fives
# np.full(shape, constant value)
X = np.full((2,3), 5)

In [51]:
# Identity Matrix
# Since all Identity Matrices are square, the np.eye() function only takes a single integer as an argument
# 5 x 5 Identity matrix
X = np.eye(5)

In [52]:
# Diagonal Matrix
# 4 x 4 diagonal matrix that contains the numbers 10,20,30, and 50 on its main diagonal
X = np.diag([10,20,30,50])

In [53]:
# Arange
# rank 1 ndarray that has sequential integers from 0 to 9
# x = [0 1 2 3 4 5 6 7 8 9]
x = np.arange(10)

# rank 1 ndarray that has sequential integers from 4 to 9
# [start, stop)
# x = [4 5 6 7 8 9]
x = np.arange(4,10)

# rank 1 ndarray that has evenly spaced integers from 1 to 13 in steps of 3.
# np.arange(start,stop,step)
# x = [ 1 4 7 10 13]
x = np.arange(1,14,3)

In [54]:
# Linspace
# Even though the np.arange() function allows for non-integer steps,
# such as 0.3, the output is usually inconsistent, due to the finite
# floating point precision. For this reason, in the cases where
# non-integer steps are required, it is usually better to use linspace()
# becayse np.linspace() uses the number of elements we want in a
# particular interval, instead of the step between values.
# linspace returns N evenly spaced numbers over the closed interval [start, stop]
# np.linspace(start, stop, N)
# x = [ 0. 2.77777778 5.55555556 8.33333333 11.11111111 13.88888889 16.66666667 19.44444444 22.22222222 25. ]
x = np.linspace(0,25,10)

In [55]:
# Reshape
# np.reshape(ndarray, new_shape)
# converts the given ndarray into the specified new_shape
x = np.arange(20)
x = np.reshape(x, (4,5))
# or
x = np.arange(20).reshape(4, 5) # does the same thing as above
# and the same thing with with linshape
y = np.linspace(0,50,10, endpoint=False).reshape(5,2)
# One great feature about NumPy, is that some functions can also be
# applied as methods. This allows us to apply different functions in
# sequence in just one line of code

In [56]:
# Slicing
# ndarray[start:end]
# ndarray[start:]
# ndarray[:end]
# ndarray[<start>:<stop>:<step>]

# In methods one and three, the end index is excluded [,)
X = np.arange(20).reshape(4, 5)

# select all the elements that are in the 2nd through 4th rows and in the 3rd to 5th columns
Z = X[1:4,2:5]
# or
Z = X[1:,2:5]

# elements = a_list[<start>:<stop>:<step>]
# select all the elements in the 3rd row
v = X[2,:] # v = [10 11 12 13 14]
# select all the elements in the 3rd column
q = X[:,2] # q = [ 2 7 12 17]
# select all the elements in the 3rd column but return a rank 2 ndarray
R = X[:,2:3]
'''
[[ 2]
 [ 7]
 [12]
 [17]]
'''
# Note: Slicing creates a view, not a copy
# when we make assignments, such as: Z = X[1:4,2:5]
# the slice of the original array X is not copied in the variable Z.
# Rather, X and Z are now just two different names for the same ndarray.
# We say that slicing only creates a view of the original array.
# This means if we make changes to Z, X changes as well.

'\n[[ 2]\n [ 7]\n [12]\n [17]]\n'

In [57]:
 Random
# 3 x 3 ndarray with random floats in the half-open interval [0.0, 1.0).
# np.random.random(shape)
X = np.random.random((3,3))
# np.random.randint(start, stop, size = shape)
# [start, stop)
X = np.random.randint(4,15,size=(3,2))

# create ndarrays with random numbers that satisfy certain statistical properties
# 1000 x 1000 ndarray of random floats drawn from normal (Gaussian)
# distribution with a mean of zero and a standard deviation of 0.1.
# np.random.normal(mean, standard deviation, size=shape)
X = np.random.normal(0, 0.1, size=(1000,1000))

NameError: ignored

In [58]:
# Mutability
# Change ndarray
x[3] = 20
X[0,0] = 20

In [59]:
# Delete
# np.delete(ndarray, elements, axis)
x = np.array([1, 2, 3, 4, 5])
# delete the first and fifth element of x
x = np.delete(x, [0,4])

Y = np.array([[1,2,3],[4,5,6],[7,8,9]])
# delete the first row of Y
w = np.delete(Y, 0, axis=0)
# delete the first and last column of Y
v = np.delete(Y, [0,2], axis=1)

In [60]:
# Append
# np.append(ndarray, elements, axis)
# append the integer 6 to x
x = np.append(x, 6)
# append the integer 7 and 8 to x
x = np.append(x, [7,8])
# append a new row containing 7,8,9 to y
v = np.append(Y, [[10,11,12]], axis=0)
# append a new column containing 9 and 10 to y
q = np.append(Y,[[13],[14],[15]], axis=1)

In [61]:
# Insert
# np.insert(ndarray, index, elements, axis)
# inserts the given list of elements to ndarray right before
# the given index along the specified axis
x = np.array([1, 2, 5, 6, 7])
Y = np.array([[1,2,3],[7,8,9]])
# insert the integer 3 and 4 between 2 and 5 in x. 
x = np.insert(x,2,[3,4])
# insert a row between the first and last row of Y
w = np.insert(Y,1,[4,5,6],axis=0)
# insert a column full of 5s between the first and second column of Y
v = np.insert(Y,1,5, axis=1)

In [62]:
# Stacking
# NumPy also allows us to stack ndarrays on top of each other,
# or to stack them side by side. The stacking is done using either
# the np.vstack() function for vertical stacking, or the np.hstack()
# function for horizontal stacking. It is important to note that in
# order to stack ndarrays, the shape of the ndarrays must match.
x = np.array([1,2])
Y = np.array([[3,4],[5,6]])
z = np.vstack((x,Y)) # [[1,2], [3,4], [5,6]]
w = np.hstack((Y,x.reshape(2,1))) # [[3,4,1], [5,6,2]]

In [63]:
# Copy
# if we want to create a new ndarray that contains a copy of the
# values in the slice we need to use the np.copy()
# create a copy of the slice using the np.copy() function
Z = np.copy(X[1:4,2:5])
#  create a copy of the slice using the copy as a method
W = X[1:4,2:5].copy()

In [64]:
# Extract elements along the diagonal
d0 = np.diag(X)
# As default is k=0, which refers to the main diagonal.
# Values of k > 0 are used to select elements in diagonals above
# the main diagonal, and values of k < 0 are used to select elements
# in diagonals below the main diagonal.
d1 = np.diag(X, k=1)
d2 = np.diag(X, k=-1)

In [65]:
# Find Unique Elements in ndarray
u = np.unique(X)

In [67]:
# Boolean Indexing
X = np.arange(25).reshape(5, 5)
print('The elements in X that are greater than 10:', X[X > 10])
print('The elements in X that less than or equal to 7:', X[X <= 7])
print('The elements in X that are between 10 and 17:', X[(X > 10) & (X < 17)])

# use Boolean indexing to assign the elements that
# are between 10 and 17 the value of -1
X[(X > 10) & (X < 17)] = -1


The elements in X that are greater than 10: [11 12 13 14 15 16 17 18 19 20 21 22 23 24]
The elements in X that less than or equal to 7: [0 1 2 3 4 5 6 7]
The elements in X that are between 10 and 17: [11 12 13 14 15 16]


In [68]:
# Set Operations
x = np.array([1,2,3,4,5])
y = np.array([6,7,2,8,4])
print('The elements that are both in x and y:', np.intersect1d(x,y))
print('The elements that are in x that are not in y:', np.setdiff1d(x,y))
print('All the elements of x and y:',np.union1d(x,y))

The elements that are both in x and y: [2 4]
The elements that are in x that are not in y: [1 3 5]
All the elements of x and y: [1 2 3 4 5 6 7 8]


In [69]:
# Sorting
# When used as a function, it doesn't change the original ndarray
s = np.sort(x)
# When used as a method, the original array will be sorted
x.sort()

# sort x but only keep the unique elements in x
s = np.sort(np.unique(x))

# sort the columns of X
s = np.sort(X, axis = 0)

# sort the rows of X
s = np.sort(X, axis = 1)

In [70]:
# NumPy allows element-wise operations on ndarrays as well as
# matrix operations. In order to do element-wise operations,
# NumPy sometimes uses something called Broadcasting.
# Broadcasting is the term used to describe how NumPy handles
# element-wise arithmetic operations with ndarrays of different shapes.
# For example, broadcasting is used implicitly when doing arithmetic
# operations between scalars and ndarrays.
x = np.array([1,2,3,4])
y = np.array([5.5,6.5,7.5,8.5])
np.add(x,y)
np.subtract(x,y)
np.multiply(x,y)
np.divide(x,y)

# in order to do these operations the shapes of the ndarrays
# being operated on, must have the same shape or be broadcastable
X = np.array([1,2,3,4]).reshape(2,2)
Y = np.array([5.5,6.5,7.5,8.5]).reshape(2,2)
np.add(X,Y)
np.subtract(X,Y)
np.multiply(X,Y)
np.divide(X,Y)
# apply mathematical functions to all elements of an ndarray at once
np.exp(x)
np.sqrt(x)
np.power(x,2)


array([ 1,  4,  9, 16])

In [71]:
# Statistical Functions
print('Average of all elements in X:', X.mean())
print('Average of all elements in the columns of X:', X.mean(axis=0))
print('Average of all elements in the rows of X:', X.mean(axis=1))
print()
print('Sum of all elements in X:', X.sum())
print('Standard Deviation of all elements in X:', X.std())
print('Median of all elements in X:', np.median(X))
print('Maximum value of all elements in X:', X.max())
print('Minimum value of all elements in X:', X.min())

Average of all elements in X: 2.5
Average of all elements in the columns of X: [2. 3.]
Average of all elements in the rows of X: [1.5 3.5]

Sum of all elements in X: 10
Standard Deviation of all elements in X: 1.118033988749895
Median of all elements in X: 2.5
Maximum value of all elements in X: 4
Minimum value of all elements in X: 1


In [72]:
# Broadcasting
# NumPy is working behind the scenes to broadcast 3 along the ndarray
# so that they have the same shape. This allows us to add 3 to each
# element of X with just one line of code.
print(4*X)
print(4+X)
print(4-X)
print(4/X)
# NumPy is able to add 1 x 3 and 3 x 1 ndarrays to 3 x 3 ndarrays
# by broadcasting the smaller ndarrays along the big ndarray so that
# they have compatible shapes. In general, NumPy can do this provided
# that the smaller ndarray can be expanded to the shape of the larger
# ndarray in such a way that the resulting broadcast is unambiguous.
x = np.array([1,2,3])
Y = np.array([[1,2,3],[4,5,6],[7,8,9]])
Z = np.array([1,2,3]).reshape(3,1)
print(x + Y)
print(Z + Y)

[[ 4  8]
 [12 16]]
[[5 6]
 [7 8]]
[[3 2]
 [1 0]]
[[4.         2.        ]
 [1.33333333 1.        ]]
[[ 2  4  6]
 [ 5  7  9]
 [ 8 10 12]]
[[ 2  3  4]
 [ 6  7  8]
 [10 11 12]]


In [73]:
import pandas as pd

In [74]:
groceries = pd.Series(data = [30, 6, 'Yes', 'No'], index = ['eggs', 'apples', 'milk', 'bread'])

In [75]:
print('Groceries has shape:', groceries.shape)
print('Groceries has dimension:', groceries.ndim)
print('Groceries has a total of', groceries.size, 'elements')
print('The data in Groceries is:', groceries.values)
print('The index of Groceries is:', groceries.index)

Groceries has shape: (4,)
Groceries has dimension: 1
Groceries has a total of 4 elements
The data in Groceries is: [30 6 'Yes' 'No']
The index of Groceries is: Index(['eggs', 'apples', 'milk', 'bread'], dtype='object')


In [76]:
# check whether an index label exists in Series
x = 'bananas' in groceries

In [77]:
# Accessing Elements
# using index labels:
# single index label
print('How many eggs do we need to buy:', groceries['eggs'])
# access multiple index labels
print('Do we need milk and bread:\n', groceries[['milk', 'bread']]) 
# use loc to access multiple index labels
print('How many eggs and apples do we need to buy:\n', groceries.loc[['eggs', 'apples']]) 

# access elements in Groceries using numerical indices:
# use multiple numerical indices
print('How many eggs and apples do we need to buy:\n',  groceries[[0, 1]]) 
# use a negative numerical index
print('Do we need bread:\n', groceries[[-1]]) 
# use a single numerical index
print('How many eggs do we need to buy:', groceries[0]) 
# use iloc (stands for integer location) to access multiple numerical indices
print('Do we need milk and bread:\n', groceries.iloc[[2, 3]])
# Since we can access elements in various ways, in order to remove
# any ambiguity to whether we are referring to an index label
# or numerical index, Pandas Series have two attributes,
# .loc and .iloc to explicitly state what we mean. The attribute
# .loc stands for location and it is used to explicitly state that
# we are using a labeled index. Similarly, the attribute .iloc stands
# for integer location and it is used to explicitly state that we are
# using a numerical index.

How many eggs do we need to buy: 30
Do we need milk and bread:
 milk     Yes
bread     No
dtype: object
How many eggs and apples do we need to buy:
 eggs      30
apples     6
dtype: object
How many eggs and apples do we need to buy:
 eggs      30
apples     6
dtype: object
Do we need bread:
 bread    No
dtype: object
How many eggs do we need to buy: 30
Do we need milk and bread:
 milk     Yes
bread     No
dtype: object


In [78]:
# access using Boolean Indexes
time_light[time_light<40]

NameError: ignored

In [79]:
# Change Elements
groceries['eggs'] = 2

In [80]:
# Delete Elements
# doesn't change the original Series being modified
groceries.drop('apples')
# delete items from Series in place by setting keyword inplace to True
groceries.drop('apples', inplace = True)

In [81]:
# Arithmetic Operations
# we can perform element-wise arithmetic operations on Pandas Series
fruits = pd.Series(data = [10, 6, 3,], index = ['apples', 'oranges', 'bananas'])
fruits + 2 # Adds 2 to all elements in the series
fruits - 2
fruits * 2
fruits / 2
# apply mathematical functions from NumPy to all elements of a Series
np.exp(fruits)
np.sqrt(fruits)
np.power(fruits,2)
# only apply arithmetic operations on selected items in Series
fruits['bananas'] + 2
fruits.iloc[0] - 2
fruits[['apples', 'oranges']] * 2
# you can apply arithmetic operations on a Series of mixed data
# type provided that the arithmetic operation is defined for all
# data types in the Series, otherwise you will get an error

apples     20
oranges    12
dtype: int64

In [82]:
# understanding axes
df.sum()       
# sums “down” the 0 axis (rows)
df.sum(axis=0) 
# equivalent (since axis=0 is the default)
df.sum(axis=1) 
# sums “across” the 1 axis (columns)

NameError: ignored

In [83]:
# Loading Data into DF
df = pd.read_csv('marauders_map.csv')

# limit which rows are read when reading in a file
pd.read_csv(‘df.csv’, nrows=10)        
# only read first 10 rows

pd.read_csv(‘df.csv’, skiprows=[1, 2]) 
# skip the first two rows of data

# randomly sample a DataFrame
train = df.sample(frac=0.75, random_column_y=1) 
# will contain 75% of the rows

test = df[~df.index.isin(train.index)] 
# will contain the other 25%

# change the maximum number of rows and columns printed (‘None’ means unlimited)
pd.set_option(‘max_rows’, None) 
# default is 60 rows

pd.set_option(‘max_columns’, None) 
# default is 20 columns
print df
# reset options to defaults
pd.reset_option(‘max_rows’)
pd.reset_option(‘max_columns’)

# change the options temporarily (settings are restored when you exit the ‘with’ block)
with pd.option_context(‘max_rows’, None, ‘max_columns’, None):
    print df

SyntaxError: ignored