### Collection Modules

###### Counter

In [1]:
from collections import Counter

In [2]:
l = [1,1,1,1,2,2,3,3,3,3,3,5,5,5,5]

In [3]:
Counter(l)

Counter({3: 5, 1: 4, 5: 4, 2: 2})

In [4]:
mylist = ['a','a',2,2,3,5,5]

In [5]:
Counter(mylist)

Counter({'a': 2, 2: 2, 5: 2, 3: 1})

In [6]:
Counter('aaaaabbbhfisfudshhhhdhhuu')

Counter({'h': 7, 'a': 5, 'b': 3, 'u': 3, 'f': 2, 's': 2, 'd': 2, 'i': 1})

In [9]:
sentence = 'this is the babY sound of ajay Atul. Bring it on babY'

In [10]:
Counter(sentence.split())

Counter({'babY': 2,
         'this': 1,
         'is': 1,
         'the': 1,
         'sound': 1,
         'of': 1,
         'ajay': 1,
         'Atul.': 1,
         'Bring': 1,
         'it': 1,
         'on': 1})

In [11]:
letters = 'aaaabbbbbbbbbcccdddefjgiiii'

In [12]:
c = Counter(letters)

In [13]:
c

Counter({'b': 9,
         'a': 4,
         'i': 4,
         'c': 3,
         'd': 3,
         'e': 1,
         'f': 1,
         'j': 1,
         'g': 1})

In [14]:
c.most_common()

[('b', 9),
 ('a', 4),
 ('i', 4),
 ('c', 3),
 ('d', 3),
 ('e', 1),
 ('f', 1),
 ('j', 1),
 ('g', 1)]

In [15]:
c.most_common(2)

[('b', 9), ('a', 4)]

In [16]:
list(c)

['a', 'b', 'c', 'd', 'e', 'f', 'j', 'g', 'i']

###### Default Dictionary

In [17]:
from collections import defaultdict

In [18]:
d = {'a':25, 'b':35}

In [19]:
d['b']

35

In [20]:
d['wrong']

KeyError: 'wrong'

In [21]:
#in order to solve the problem we use default dictionary
d = defaultdict(lambda :0)  #this will give us value 0 when key is not present 

In [22]:
d['correct']=100

In [23]:
d['correct']

100

In [24]:
d['wrong']

0

In [25]:
d

defaultdict(<function __main__.<lambda>()>, {'correct': 100, 'wrong': 0})

###### Named Tuple

In [26]:
mytuple = (10,20,30)

In [27]:
mytuple[0]   #its hectic to remember the indexing 

10

In [28]:
from collections import namedtuple

In [29]:
Dog = namedtuple('Dog',['breed','name','age'])

In [30]:
d = Dog('lab','nik',32)

In [42]:
d

Dog(breed='lab', name='nik', age=32)

In [33]:
d.age

32

In [34]:
d.breed

'lab'

In [35]:
d.name

'nik'

In [37]:
d.count(32)

1

In [41]:
d[0]

'lab'

### DateTime Module

In [1]:
import datetime

In [4]:
mytime = datetime.time(20,30,20)

In [6]:
print(mytime)

20:30:20


In [7]:
mytime.hour

20

In [8]:
mytime.microsecond

0

In [9]:
type(mytime)

datetime.time

In [12]:
mydate = datetime.date.today()

In [13]:
print(mydate)

2024-01-19


In [14]:
mydate.year

2024

In [15]:
mydate.day

19

In [16]:
mydate.ctime()

'Fri Jan 19 00:00:00 2024'

In [17]:
from datetime import datetime

In [18]:
mydatetime = datetime(2024,1,19,20,25,35)

In [19]:
print(mydatetime)

2024-01-19 20:25:35


In [20]:
mydatetime=mydatetime.replace(year=2023)

In [21]:
mydatetime

datetime.datetime(2023, 1, 19, 20, 25, 35)

In [22]:
from datetime import date

In [24]:
date1 = date(2023,2,24)
date2 = date(2026,2,25)

In [26]:
result = date2 - date1

In [27]:
type(result)

datetime.timedelta

In [29]:
result.days

1097

In [32]:
result.seconds

0

In [34]:
result.total_seconds()

94780800.0

In [35]:
datetime1 = datetime(2023,2,24,20,15,30,65)
datetime2 = datetime(2026,2,25,1,35,6,100)

In [36]:
res = datetime2-datetime1

In [37]:
type(res)

datetime.timedelta

In [38]:
res

datetime.timedelta(days=1096, seconds=19176, microseconds=35)

In [39]:
res.seconds

19176

In [41]:
res.total_seconds()

94713576.000035

In [42]:
res.days

1096

### Math and Random Module

In [43]:
import math

In [46]:
#help(math)    #for module information

In [47]:
val = 4.35

In [48]:
math.floor(val)

4

In [49]:
math.ceil(val)

5

In [50]:
round(val)

4

In [51]:
val

4.35

In [52]:
math.pi

3.141592653589793

In [54]:
from math import pi

In [55]:
pi

3.141592653589793

In [53]:
math.e

2.718281828459045

In [56]:
math.inf      #infinity

inf

In [57]:
math.nan   #not a number

nan

In [58]:
math.log(math.e)

1.0

In [59]:
math.log(100,10)

2.0

In [60]:
math.sin(pi/2)

1.0

In [61]:
math.degrees(pi)

180.0

In [62]:
math.radians(180)

3.141592653589793

In [63]:
import random

In [65]:
l = [random.randint(0,100) for x in range(10)]

In [66]:
l

[19, 3, 32, 86, 0, 53, 10, 75, 24, 39]

In [72]:
#when we want to generate same sequence of random numbers we use seed method and set a value to it
random.seed(101)
l = [random.randint(0,100) for x in range(10)]

In [74]:
l

[74, 24, 69, 45, 59, 6, 84, 64, 27, 77]

In [75]:
random.seed(101)
l2 = [random.randint(0,100) for x in range(10)]

In [76]:
l2

[74, 24, 69, 45, 59, 6, 84, 64, 27, 77]

In [77]:
mylist=list(range(20))

In [78]:
mylist

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]

In [79]:
random.choice(mylist)

7

In [82]:
#choices with repetition
random.choices(mylist,k=10)

[18, 9, 15, 6, 8, 5, 3, 8, 10, 3]

In [83]:
#choices without repetition
random.sample(mylist,k=10)

[10, 7, 12, 13, 6, 1, 15, 18, 4, 3]

In [84]:
mylist

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]

In [85]:
random.shuffle(mylist)  #in place shuffle

In [86]:
mylist

[14, 9, 5, 2, 17, 4, 15, 7, 0, 11, 18, 13, 1, 6, 19, 16, 12, 8, 10, 3]

In [87]:
random.uniform(a=10,b=100)

73.41358775172473

In [88]:
random.gauss(20,5)

26.434796975939406

### Python In-Built Debugger

Not Useful

### Regular Expression

In [90]:
text = 'The agents phone number is 408-125-3232. Call Soon!'

In [91]:
'phone' in text

True

In [92]:
import re #regular expression

In [93]:
pattern = 'phone'

In [94]:
re.search(pattern,text)

<re.Match object; span=(11, 16), match='phone'>

In [95]:
pattern = 'Not in Text'

In [96]:
re.search(pattern,text)

In [97]:
pattern = 'phone'

In [98]:
result = re.search(pattern,text)

In [100]:
result.span()

(11, 16)

In [102]:
result.start()

11

In [103]:
result.end()

16

In [104]:
text = 'my phone once, my phone twice'

In [105]:
match = re.search('phone',text)

In [106]:
match

<re.Match object; span=(3, 8), match='phone'>

In [107]:
matches = re.findall('phone',text)

In [108]:
matches

['phone', 'phone']

In [110]:
for match in re.finditer('phone',text):
    print(match)
    print(match.span())
    print(match.group())

<re.Match object; span=(3, 8), match='phone'>
(3, 8)
phone
<re.Match object; span=(18, 23), match='phone'>
(18, 23)
phone


In [111]:
text = 'The agents phone number is 408-125-3232. Call Soon!'

In [114]:
phone_specific = re.search('408-125-3232',text)

In [115]:
phone_specific

<re.Match object; span=(27, 39), match='408-125-3232'>

In [116]:
phone_general = re.search(r'\d\d\d-\d\d\d-\d\d\d\d',text)   #identifiers

In [117]:
phone_general

<re.Match object; span=(27, 39), match='408-125-3232'>

In [118]:
phone_general.group()

'408-125-3232'

In [119]:
re.search('\d{3}-\d{3}-\d{4}',text)   #quantifiers

<re.Match object; span=(27, 39), match='408-125-3232'>

In [120]:
#we can Compile a regular expression pattern for search and can use its part seperately
pattern = re.compile(r'(\d{3})-(\d{3})-(\d{3})')

In [121]:
result = re.search(pattern,text)

In [123]:
result.group()

'408-125-323'

In [124]:
result.group(1)   #in such manner we can use it seperately

'408'

In [125]:
re.search(r'cat|dog','The dog is burried under the tree') #the pipe operator represents the or operation

<re.Match object; span=(4, 7), match='dog'>

In [126]:
re.findall(r'at','the cat in the hat sat there')

['at', 'at', 'at']

In [127]:
re.findall(r'.at','the cat in the hat sat there')  # .represents wildcard

['cat', 'hat', 'sat']

In [129]:
re.findall(r'...at','the cat in the hat spsat there')

['e cat', 'e hat', 'spsat']

In [131]:
re.findall(r'^\d','1 is a number')   #this indicates string starting from a digit

['1']

In [132]:
re.findall(r'^\d','the 1 is a number')

[]

In [135]:
re.findall(r'\d$','this is a number 2') #this indicates string ending to a digit

['2']

In [136]:
phrase = 'This string contains 3 numbers 34 inside 5 this string'

In [137]:
pattern = r'[^\d]' #break the string at points where none or one or more time a digit appears and 
                   #return the parts which do not contain numbers

In [138]:
re.findall(pattern,phrase)

['T',
 'h',
 'i',
 's',
 ' ',
 's',
 't',
 'r',
 'i',
 'n',
 'g',
 ' ',
 'c',
 'o',
 'n',
 't',
 'a',
 'i',
 'n',
 's',
 ' ',
 ' ',
 'n',
 'u',
 'm',
 'b',
 'e',
 'r',
 's',
 ' ',
 ' ',
 'i',
 'n',
 's',
 'i',
 'd',
 'e',
 ' ',
 ' ',
 't',
 'h',
 'i',
 's',
 ' ',
 's',
 't',
 'r',
 'i',
 'n',
 'g']

In [139]:
pattern = r'[^\d]+'   #break the string at points where atleast one time digit appears
                      #and return the parts which do not contain numbers

In [141]:
re.findall(pattern,phrase)

['This string contains ', ' numbers ', ' inside ', ' this string']

In [142]:
phrase = 'This is a string. but , it has punctuations? how to remove it!'

In [145]:
pattern = r'[^.,?! ]+'

In [146]:
lst = re.findall(pattern,phrase)

In [147]:
lst

['This',
 'is',
 'a',
 'string',
 'but',
 'it',
 'has',
 'punctuations',
 'how',
 'to',
 'remove',
 'it']

In [148]:
" ".join(lst)

'This is a string but it has punctuations how to remove it'

In [149]:
text = 'only find the hyphen-word in this sentence. they will be sooo long-ish in the text'

In [153]:
pattern = r'[\w]+-[\w]+'

In [154]:
re.findall(pattern,text)

['hyphen-word', 'long-ish']

In [155]:
pattern = r'\w+-\w+'

In [156]:
re.findall(pattern,text)

['hyphen-word', 'long-ish']

In [159]:
text1 = 'This is a catfish'
text2 = 'This is a catterpiller'
text3 = 'This is a catnap'

In [160]:
pattern = r'cat(fish|terpiller|nap)'  #one pattern can be useful for many other searches

In [161]:
re.search(pattern,text1)

<re.Match object; span=(10, 17), match='catfish'>

In [162]:
re.search(pattern,text2)

<re.Match object; span=(10, 22), match='catterpiller'>

### Timing Python Code

In [163]:
def func_one(n):
    return [str(num) for num in range(n)]

In [164]:
func_one(10)

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

In [165]:
def func_two(n):
    return list(map(str,range(n)))

In [166]:
func_two(10)

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

In [167]:
#To determine which code is faster
#Method 1-Note the time
import time

In [175]:
start_time = time.time()
res = func_one(1000000)
end_time = time.time()
elapsed_time = end_time-start_time
print(elapsed_time)

0.23341798782348633


In [176]:
start_time = time.time()
res = func_two(1000000)
end_time = time.time()
elapsed_time = end_time-start_time
print(elapsed_time)

0.3374214172363281


In [179]:
#Method 2- the timeit module
import timeit   #it requires two parameters as follows

In [180]:
stmt = '''
func_one(100)
'''

In [181]:
setup = '''
def func_one(n):
    return [str(num) for num in range(n)]
'''

In [182]:
timeit.timeit(stmt,setup,number=100000)

2.225154200103134

In [185]:
stmt = '''
func_two(100)
'''
setup = '''
def func_two(n):
    return list(map(str,range(n)))
'''

In [186]:
timeit.timeit(stmt,setup,number=100000)

2.59678060002625

In [187]:
#Method 3- the %%timeit method it works only on notebook

In [188]:
%%timeit
func_one(100)

18.4 µs ± 3.75 µs per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [189]:
%%timeit
func_two(100)

25.4 µs ± 4.16 µs per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


### Zipping and Unzipping Files in Python

In [190]:
f = open('fileone.txt','w+')
f.write("How can you be number one")
f.close()

In [191]:
f = open('filetwo.txt','w+')
f.write("How can you be number two hushhh!!")
f.close()

In [192]:
import zipfile

In [193]:
comp_file = zipfile.ZipFile('comp_file.zip','w')

In [194]:
comp_file.write('fileone.txt',compress_type=zipfile.ZIP_DEFLATED)

In [195]:
comp_file.write('filetwo.txt',compress_type=zipfile.ZIP_DEFLATED)

In [196]:
comp_file.close()

In [197]:
zip_file  = zipfile.ZipFile('comp_file.zip','r')

In [198]:
zip_file.extractall('extracted content') 

In [199]:
import shutil

In [200]:
pwd

'C:\\Users\\Nikhil\\Desktop\\Python\\Personal Notes'

In [201]:
dir_to_zip = 'C:\\Users\\Nikhil\\Desktop\\Python\\Personal Notes\\extracted content'

In [202]:
shutil.make_archive('comp_file2','zip',dir_to_zip)

'C:\\Users\\Nikhil\\Desktop\\Python\\Personal Notes\\comp_file2.zip'

In [203]:
shutil.unpack_archive('comp_file2.zip','extracted content2','zip')