# DATA SCIENCE CLASS REFRESH

# *PYTHON COLLECTIONS MODULE*

## Python defaultdict type

In [2]:
#Checking if defaultdict is a subclass of dict

from collections import defaultdict
issubclass(defaultdict, dict)

True

In [5]:
def_dict = defaultdict(list) # passes list to .default_factory

def_dict['one'] = 1   # adds a key-value pair
def_dict['missing'] # access a missing key returns an empty list

#defaultdict automatically creates a list of vaalues for missing key
def_dict['missing2'].append(4) #modify a missing key

def_dict

defaultdict(list, {'one': 1, 'missing': [], 'missing2': [4]})

In [6]:
#Example

dd_dict = defaultdict(list)
dd_dict['segun'] = 'UNILAG'
dd_dict['tolu'].append('FUTA')

dd_dict

defaultdict(list, {'segun': 'UNILAG', 'tolu': ['FUTA']})

NOTE: defaultdict can be used to do the following:
    1. Group Items 
    2. Group Unique Items (Avoid repitition)
    3. Counting Items

In [9]:
#Grouping Items with defaultdict type: Consider a faculty with the following depts and students

faculty = [
    ('met', 'segun'),
    ('mech', 'alimi'),
    ('systems', 'zainab'),
    ('met', 'mohammed'),
    ('mech', 'era')
    ]
from collections import defaultdict
dd = defaultdict(list) # .default_factory is set to list.
for dept, student in faculty: #for loop is to iterate through the faculty list
    dd[dept].append(student) #creates the keys for the dept, initializes them to an empty list, and then appends the students to each dept.

print(dd)

defaultdict(<class 'list'>, {'met': ['segun', 'mohammed'], 'mech': ['alimi', 'era'], 'systems': ['zainab']})


In [8]:
#Grouping Unique Items (Avoiding repitition)

faculty = [
    ('met', 'segun'),
    ('mech', 'alimi'),
    ('mech', 'alimi'),
    ('systems', 'zainab'),
    ('met', 'mohammed'),
    ('met', 'mohammed'),
    ('mech', 'era')
    ]
from collections import defaultdict
dd = defaultdict(set) # .default_factory is set to set.
for dept, student in faculty: #for loop is to iterate through the faculty list
    dd[dept].add(student)

print(dd)

defaultdict(<class 'set'>, {'met': {'mohammed', 'segun'}, 'mech': {'era', 'alimi'}, 'systems': {'zainab'}})


In [10]:
#Counting Items

from collections import defaultdict

faculty = [
    ('met', 'segun'),
    ('mech', 'alimi'),
    ('systems', 'zainab'),
    ('met', 'mohammed'),
    ('mech', 'era')
    ]

dd = defaultdict(int) #.default_factory is set to int. Calling int() with no argument, returns the value 0
for dept, _ in faculty: #note the underscore in the code/ the dataset needs to be clean (no repeated data).
    dd[dept] += 1

print(dd)

defaultdict(<class 'int'>, {'met': 2, 'mech': 2, 'systems': 1})


In [11]:
#Counting items like in

from collections import defaultdict

river = 'mississippi'
dd = defaultdict(int)
for letter in river:
    dd[letter] += 1

print(dd)

defaultdict(<class 'int'>, {'m': 1, 'i': 4, 's': 4, 'p': 2})


In [12]:
#Accumulating Values

"""
products = Books, Tutorials, Courses
for month July, August, September respectively
"""

incomes = [('Books', 1250.00),
           ('Books', 1300.00),
           ('Books', 1420.00),
           ('Tutorials', 560.00),
           ('Tutorials', 630.00),
           ('Tutorials', 750.00),
           ('Courses', 2500.00),
           ('Courses', 2430.00),
           ('Courses', 2750.00),]

from collections import defaultdict

dd = defaultdict(float) #.default_factory is set to float.

# for loop to accumulate the incomes for each product
for product, cost in incomes:
    dd[product] += cost

# for loop to print the total income for each product    
for product, cost in dd.items():
    print('The total income from ' + product + ' is $' + str(dd[product]))


The total income from Books is $3970.0
The total income from Tutorials is $1940.0
The total income from Courses is $7680.0


## Python's Counter

In [13]:
#we use Counter() to count in a lot of ways

from collections import Counter
river = Counter('mississippi')
print(river)

Counter({'i': 4, 's': 4, 'p': 2, 'm': 1})


In [14]:
#using element() function in Counter

from collections import Counter
count = Counter({'ade':4, 'segun':5})
print(list(count.elements()))

['ade', 'ade', 'ade', 'ade', 'segun', 'segun', 'segun', 'segun', 'segun']


In [15]:
#using most_common() function in Counter
from collections import Counter
note = 'If you want elements sorted from largest to smallest, you can specify a reverse True parameter. And instead of comparing the elements themselves, you can compare the results of a function that you specify with key'
word_counts = note.split() #converts note to a list of strings
word_count = Counter(word_counts) #gives a count of words in the list

#print the 5 most common words and their counts
for word, count in word_count.most_common(5):
    print(word, count)


you 4
elements 2
can 2
specify 2
a 2


## List Comprehensions

In [16]:
#even_numbers
even_numbers = [x for x in range(10) if x % 2 == 0]
even_numbers

[0, 2, 4, 6, 8]

In [17]:
#squares
squares = [x**2 for x in range(10)]
squares

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

In [18]:
#even_squares
even_squares = [x**2 for x in range(0,10,2)]
even_squares

[0, 4, 16, 36, 64]

In [19]:
#even_squares
even_squares = [x**2 for x in range(10) if x % 2 == 0]
even_squares

[0, 4, 16, 36, 64]

In [22]:
#square_dict
square_dict = {x: x**2 for x in range(6)}
square_dict

{0: 0, 1: 1, 2: 4, 3: 9, 4: 16, 5: 25}

In [23]:
#if you dont need the value from the list, its conventional to use an underscore as variable (it has same length as even_numbers)
zeroes = [0 for _ in even_numbers]
zeroes

[0, 0, 0, 0, 0]

In [26]:
#A list comprehension can include multiple for s
pairs = [(x,y)
        for x in range(10)
        for y in range(10)] #100 pairs (0,0), (0,1)...(9,8), (9,9)

increasing_pairs = [(x,y)
                    for x in range(10)
                    for y in range(x + 1, 10)]
# only parts wih x < y; range(lo, hi)


## Generators


Generators are used;
- for better memory management and utilization
- to produce infinite items
- used to pipeline a number of operations

Generator functions;
- make use of 'yield' keyword (Unlike 'return' for normal functions)
- run when 'next()' method is called (unlike normal funcs which run when name of the method is called)
- produces items one at a time and only when required ( unlike normal funcs that produce all items at once)


In [4]:
# using generators and the next() method

def new(dict):
    for key, value in dict.items():
        yield key, value

a = {1: 'Hello', 2: 'Pythonista'}
b = new(a)
print(b) # to show this is a generator object
next(b) # to produce the first item

<generator object new at 0x0000021716B62A48>


(1, 'Hello')

In [5]:
#calling next again prints the next key, value pairs
next(b)

(2, 'Pythonista')

In [6]:
#calling next again with no item left in the dict gives a StopIteration error
next(b)

StopIteration: 

In [7]:
#you can use a for loop with generators to print values
def exam():
    n = 4
    yield n
    n = n * n
    yield n

v = exam()
for x in v:
    print(x)

4
16


In [9]:
#Generators
my_generator = (x*x for x in range(3))
for i in my_generator:
    print(i)

0
1
4


In [10]:
#you cannot perform the for i in my_generator more than once. This code wont run.
for q in my_generator:
    print(q)

In [33]:
def infinite_sequence():
    num = 0
    while True:
        yield num
        num += 1
        
gen = infinite_sequence()
next(gen) #the next func is to check the next iteration
next(gen)

1

In [16]:
#using generators to yield the fibonacci series. 

#The fibonnaci series is a series of numbers where in each number also called the Fibonacci number is the sum of two preceeding numbers

def fibonnaci():
    x,y = 0,1
    while True:
        yield x
        x,y =y, x+y    
#setting a limit of 100
b = fibonnaci()
for a in b:
    if a > 100:
        break
    print(a)

0
1
1
2
3
5
8
13
21
34
55
89


## Python's random()

In [38]:
import random
a = random.random()#returns floating point numbers in the range 0 - 1.0
print(a)

0.3873304111293543


In [55]:
#randrange() can be used to print random numbers in ranges with steps
randrange(0,10,2)

4

In [57]:
#randint() can be used to print random integers
randint(1,6)

1

In [61]:
#choice() function for random selection of an item from a list
from random import choice
friends = ['Muhammed', 'Ikayh', 'Habib', 'Ijebu']
selection = choice(friends)
selection

'Habib'

In [63]:
#sample()function
from random import sample
mlist = [1,2,3,4,5,6,7,8,9]
print(sample(mlist,3))

[8, 3, 6]


In [65]:
#choices()
from random import choices
mlist = [1,2,3,4,5,6,7,8,9]
print(choices(mlist,k=3))

[9, 4, 3]


In [69]:
#random.seed()
import random
random.seed(5)
print(random.random())

random.seed(5)
print(random.random())

0.6229016948897019
0.6229016948897019


In [70]:
#random.shuffle()
mlist = [1,2,3,4,5,6,7,8,9]
random.shuffle(mlist)
print(mlist)

[5, 3, 9, 8, 2, 4, 7, 1, 6]


In [71]:
#random.uniform(start, end)
import random
print(random.uniform(10.5,11.0))

10.685896677781153


## OBJECT-ORIENTED PROGRAMMING

In [None]:
def make_album(artist, album):
    info = {
        'Name': artist.title(),
        'Album': album.title(),
        }
    for key, value in info.items():
        message = key + ' = ' + value.title()
        print(message)

while True:
    print("\nWELCOME TO JUMANJI\nEnter 's' to quit")
    name = input("Who's your favorite artiste?\n")
    if name == 's':
        break
    work = input("Which album do you love most?\n")
    if work == 's':
        break
   
    ad = make_album(name, work)
    print(ad)


WELCOME TO JUMANJI
Enter 's' to quit
Who's your favorite artiste?
WIZKID
Which album do you love most?
AYO
Name = Wizkid
Album = Ayo
None

WELCOME TO JUMANJI
Enter 's' to quit
