In [1]:
import re

In [2]:
with open('ferpa.txt','r') as file:
    wiki = file.read()

In [3]:
wiki

'Overview[edit]\nFERPA gives parents access to their child\'s education records, an opportunity to seek to have the records amended, and some control over the disclosure of information from the records. With several exceptions, schools must have a student\'s consent prior to the disclosure of education records after that student is 18 years old. The law applies only to educational agencies and institutions that receive funds under a program administered by the U.S. Department of Education.\n\nOther regulations under this act, effective starting January 3, 2012, allow for greater disclosures of personal and directory student identifying information and regulate student IDs and e-mail addresses.[2] For example, schools may provide external companies with a student\'s personally identifiable information without the student\'s consent.[2]\n\nExamples of situations affected by FERPA include school employees divulging information to anyone other than the student about the student\'s grades o

In [4]:
re.findall('[A-Za-z]{1,100}\[edit\]', wiki)

['Overview[edit]', 'records[edit]', 'records[edit]']

In [5]:
# A-Za-z has a metacharacter \w 

re.findall('[\w]{1,100}\[edit\]', wiki)

['Overview[edit]', 'records[edit]', 'records[edit]']

In [6]:
# use * (zero or more items) instead of {1,100)

re.findall('[\w]*\[edit\]', wiki)

['Overview[edit]', 'records[edit]', 'records[edit]']

In [7]:
# we can add space using space character

re.findall('[\w ]*\[edit\]', wiki)

['Overview[edit]',
 'Access to public records[edit]',
 'Student medical records[edit]']

In [8]:
# now we can remove the [edit] from all element in the list

for title in re.findall('[\w ]*\[edit\]', wiki):
    print(re.split('[\[]', title)[0])

Overview
Access to public records
Student medical records


In [9]:
# make a list

titles = [ re.split('[\[]', title)[0] for title in re.findall('[\w ]*\[edit\]', wiki)]
titles

['Overview', 'Access to public records', 'Student medical records']

### Groups

In [10]:
re.findall('([\w ]*)(\[edit\])', wiki)

[('Overview', '[edit]'),
 ('Access to public records', '[edit]'),
 ('Student medical records', '[edit]')]

In [11]:
value = re.finditer('([\w ]*)(\[edit\])', wiki)
value

<callable_iterator at 0x228b6111570>

In [12]:
next(value)

<re.Match object; span=(0, 14), match='Overview[edit]'>

In [13]:
next(value)

<re.Match object; span=(2715, 2745), match='Access to public records[edit]'>

In [14]:
value = re.finditer('([\w ]*)(\[edit\])', wiki)

for item in value:
    print(item)

<re.Match object; span=(0, 14), match='Overview[edit]'>
<re.Match object; span=(2715, 2745), match='Access to public records[edit]'>
<re.Match object; span=(3692, 3721), match='Student medical records[edit]'>


In [15]:
for item in re.finditer("([\w ]*)(\[edit\])", wiki):
    print(item.groups())

('Overview', '[edit]')
('Access to public records', '[edit]')
('Student medical records', '[edit]')


In [16]:
for item in re.finditer("([\w ]*)(\[edit\])", wiki):
    print(item.group(1))

Overview
Access to public records
Student medical records


In [17]:
# now we make this a dictionary 
for item in re.finditer("(?P<title>[\w ]*)(?P<edit_link>\[edit\])", wiki):
    print(item.groupdict())


{'title': 'Overview', 'edit_link': '[edit]'}
{'title': 'Access to public records', 'edit_link': '[edit]'}
{'title': 'Student medical records', 'edit_link': '[edit]'}


In [18]:
for item in re.finditer('(?P<title>[\w ]*)(?P<edit_link>\[edit\])', wiki):
    print(item.groupdict()['title'])

Overview
Access to public records
Student medical records


In [19]:
# Actually we don't want the [edit] in your result, but we need to check in this cases we can use ?= 

for item in re.finditer('(?P<title>[\w ]+)(?=\[edit\])', wiki):
    print(item)

<re.Match object; span=(0, 8), match='Overview'>
<re.Match object; span=(2715, 2739), match='Access to public records'>
<re.Match object; span=(3692, 3715), match='Student medical records'>


# verbose mode

In [20]:
with open("buddhist.txt", "r", encoding="utf-8") as file:
    wiki = file.read()

wiki

'Buddhist universities and colleges in the United States\nFrom Wikipedia, the free encyclopedia\nJump to navigationJump to search\n\nThis article needs additional citations for verification. Please help improve this article by adding citations to reliable sources. Unsourced material may be challenged and removed.\nFind sources: "Buddhist universities and colleges in the United States" – news · newspapers · books · scholar · JSTOR (December 2009) (Learn how and when to remove this template message)\nThere are several Buddhist universities in the United States. Some of these have existed for decades and are accredited. Others are relatively new and are either in the process of being accredited or else have no formal accreditation. The list includes:\n\nDhammakaya Open University – located in Azusa, California, part of the Thai Wat Phra Dhammakaya[1]\nDharmakirti College – located in Tucson, Arizona Now called Awam Tibetan Buddhist Institute (http://awaminstitute.org/)\nDharma Realm Buddh

In [21]:
pattern ="""
(?P<title>.*)       # the university title
(–\ located\ in\ )  # indicater of the location 
(?P<city>\w*)       # city name
(,\ )               # separator for the state
(?P<state>\w*)      # sate name"""

for item in re.finditer(pattern, wiki, re.VERBOSE):
    print(item.groupdict())

{'title': 'Dhammakaya Open University ', 'city': 'Azusa', 'state': 'California'}
{'title': 'Dharmakirti College ', 'city': 'Tucson', 'state': 'Arizona'}
{'title': 'Dharma Realm Buddhist University ', 'city': 'Ukiah', 'state': 'California'}
{'title': 'Ewam Buddhist Institute ', 'city': 'Arlee', 'state': 'Montana'}
{'title': 'Institute of Buddhist Studies ', 'city': 'Berkeley', 'state': 'California'}
{'title': 'Maitripa College ', 'city': 'Portland', 'state': 'Oregon'}
{'title': 'University of the West ', 'city': 'Rosemead', 'state': 'California'}
{'title': 'Won Institute of Graduate Studies ', 'city': 'Glenside', 'state': 'Pennsylvania'}


In [25]:
import re
s = 'ACAABAACAAAB'
result = re.findall('A{1,2}', s)
result

['A', 'AA', 'AA', 'AA', 'A']

In [36]:
text=r'''Everyone has the following fundamental freedoms:
    (a) freedom of conscience and religion;
    (b) freedom of thought, belief, opinion and expression, including freedom of the press and other media of communication;
    (c) freedom of peaceful assembly; and
    (d) freedom of association.'''

import re
pattern = '\(.\)'
print(len(re.findall(pattern,text)))

4


In [31]:
import numpy as np

old = np.array([[1, 1, 1], [1, 1, 1]])
new = old
new[0, :2] = 0

print(old)

[[0 0 1]
 [1 1 1]]


In [41]:
a1 = np.random.rand(4)
a2 = np.random.rand(4, 1)
a3 = np.array([[1, 2, 3, 4]])
a4 = np.arange(1, 4, 1)
a5 = np.linspace(1 ,4, 4)

print(a1)
print(a2)
print(a3)
print(a4)
print(a5)
print(a3.ndim)

[0.23626264 0.62544827 0.59720988 0.77149725]
[[0.32320512]
 [0.92721043]
 [0.61136989]
 [0.03823945]]
[[1 2 3 4]]
[1 2 3]
[1. 2. 3. 4.]
2
