### Expressions :
***
`\d`                         Any numeric digit from `0` to `9`.

`\D`                         Matches any character which is not a decimal digit.
                           This is the opposite of `\d`.

`\w`                         Any letter, numeric digit, or the underscore
                           character.  (Think of this as matching
                           "word" characters.)

`\W`                         Any character that is not a letter,
                           numeric digit, or the underscore character.

`\s`                         Any space, tab, or newline character.  (
                           Think of this as matching white-space
                           characters.)
                           
`\S`                         Any character that is not a space, tab,
                           or newline.
***


In [3]:
import re
import pandas as pd

In [4]:
text = "A78L41K"

In [8]:
num = re.search('\d\d', text)
num

<re.Match object; span=(1, 3), match='78'>

In [9]:
num.group(0)

'78'

In [10]:
text = "8PM19MIN"

In [12]:
nondigi = re.search("\D", text)
nondigi

<re.Match object; span=(1, 2), match='P'>

In [14]:
nondigi = re.search("\D", text)
nondigi.group()

'P'

In [35]:
text = "My phone number is 555 666 7777"

In [36]:
telno = re.search("\d\d\d \d\d\d \d\d\d\d", text)
telno.group()

'555 666 7777'

In [38]:
text = "My phone number is 415-555-1212"
telno = re.search("\d\d\d-\d\d\d-\d\d\d\d", text)
telno.group()

'415-555-1212'

In [48]:
telno = re.search("\d\d\d-\d\d\d-\d\d\d\d",text)
telno.group()

'415-555-1212'

In [49]:
telno = re.search("\d"*3 + "-" + "\d"*3 + "-" + "\d"*4, text)

In [50]:
print(telno.group())

415-555-1212


In [51]:
with open("text.txt", "w") as file:
  file.write(text)

In [57]:
with open("text.txt", "r") as file:
  txt = file.read()
print(txt)
output = re.search("(\d\d\d)-(\d\d\d-\d\d\d\d)", txt)

print(output.group(1))
print(output.group(2))

My phone number is 415-555-1212
415
555-1212


In [58]:
value = "O 1, t 10, o 100. 100000"

In [59]:
output = re.findall("\d{1}", value)
print(output)

['1', '1', '0', '1', '0', '0', '1', '0', '0', '0', '0', '0']


In [60]:
output = re.findall("\d{2}", value)
print(output)

['10', '10', '10', '00', '00']


In [62]:
output = re.findall("\d{1,6}", value)
print(output)

['1', '10', '100', '100000']


In [66]:
phone = "2004-959-559" # This is Phone Number"
output = re.sub("\D", "", phone)
output

'2004959559'

In [67]:
output = re.sub("\D", ".", phone)
output

'2004.959.559'

In [69]:
output = re.sub("\D", "-", phone) # sub fonksiyonu replace gibi iş görür.
output

'2004-959-559'

In [70]:
output = re.sub("\d", "+", phone)
output

'++++-+++-+++'

### Special Characters
___
``"[]"``	  A set of characters	``"[a-m]"``


``"\"``	      Signals a special sequence (can also be used to escape special characters)
``"."``	      Any character (except newline character)


``"^"``	      Starts with	``"^hello"``

``"$"``	      Ends with	``"world$"``

``"*"``	      Zero or more occurrences

`"+"`	      One or more occurrences

`"{}"`	  Exactly the specified number of occurrences

`"|"`	      Either or	`"falls|stays"`

`"()"`	  Capture and group
___

In [71]:
txt = "1 person against 100 people"

In [72]:
output = re.findall("\d+", txt)
output

['1', '100']

In [74]:
txt = "hello world"
output = re.findall("^h", txt) # h ile başlayan bir şey varsa dolu küme yoksa boş küme
output

['h']

In [76]:
txt = "hello world"
output = re.findall("^hello", txt) 
output

['hello']

In [77]:
txt = "hello world"
output = re.findall("^fello", txt) 
output

[]

In [80]:
txt = "hello world"
output = re.findall("d$", txt) 
output

['d']

In [81]:
txt = "hello world"
output = re.findall("world$", txt) 
output

['world']

In [82]:
s = pd.Series(['a3', 'b4', 'c5', 'd'])

In [83]:
s.str.extract("(\d)")

Unnamed: 0,0
0,3.0
1,4.0
2,5.0
3,


In [84]:
s.str.extract("(\w)")

Unnamed: 0,0
0,a
1,b
2,c
3,d


In [86]:
s = pd.Series(["a3aa", "b4aa", "c5aa"])
s

0    a3aa
1    b4aa
2    c5aa
dtype: object

In [87]:
s.str.extract("(\w)\d(\w)(\w)")

Unnamed: 0,0,1,2
0,a,a,a
1,b,a,a
2,c,a,a


In [88]:
s= pd.Series(['40 l/100 km (comb)',
        '38 l/100 km (comb)', '6.4 l/100 km (comb)',
       '8.3 kg/100 km (comb)', '5.1 kg/100 km (comb)',
       '5.4 l/100 km (comb)', '6.7 l/100 km (comb)',
       '6.2 l/100 km (comb)', '7.3 l/100 km (comb)',              # burda temizlik yapmak lazım. o da regex le olacak.
       '6.3 l/100 km (comb)', '5.7 l/100 km (comb)',
       '6.1 l/100 km (comb)', '6.8 l/100 km (comb)',
       '7.5 l/100 km (comb)', '7.4 l/100 km (comb)',
       '3.6 kg/100 km (comb)', '0 l/100 km (comb)',
       '7.8 l/100 km (comb)'])

In [89]:
s

0       40 l/100 km (comb)
1       38 l/100 km (comb)
2      6.4 l/100 km (comb)
3     8.3 kg/100 km (comb)
4     5.1 kg/100 km (comb)
5      5.4 l/100 km (comb)
6      6.7 l/100 km (comb)
7      6.2 l/100 km (comb)
8      7.3 l/100 km (comb)
9      6.3 l/100 km (comb)
10     5.7 l/100 km (comb)
11     6.1 l/100 km (comb)
12     6.8 l/100 km (comb)
13     7.5 l/100 km (comb)
14     7.4 l/100 km (comb)
15    3.6 kg/100 km (comb)
16       0 l/100 km (comb)
17     7.8 l/100 km (comb)
dtype: object

In [90]:
s.str.extract("(\d\d)") 

Unnamed: 0,0
0,40
1,38
2,10
3,10
4,10
5,10
6,10
7,10
8,10
9,10


In [94]:
result = s.str.extract("(\d\d|\d.\d|\d)") 
result

Unnamed: 0,0
0,40.0
1,38.0
2,6.4
3,8.3
4,5.1
5,5.4
6,6.7
7,6.2
8,7.3
9,6.3


In [96]:
result = s.str.extract("(\d\d|\d.\d|\d).+(\d\d\d)") 
result

Unnamed: 0,0,1
0,40.0,100
1,38.0,100
2,6.4,100
3,8.3,100
4,5.1,100
5,5.4,100
6,6.7,100
7,6.2,100
8,7.3,100
9,6.3,100


In [99]:
result =  s.str.extract('(^\d*.\d*) \w*/(\d*)')
result

Unnamed: 0,0,1
0,40.0,100
1,38.0,100
2,6.4,100
3,8.3,100
4,5.1,100
5,5.4,100
6,6.7,100
7,6.2,100
8,7.3,100
9,6.3,100


In [103]:
a = s.str.extract('(\S+)')
a

Unnamed: 0,0
0,40.0
1,38.0
2,6.4
3,8.3
4,5.1
5,5.4
6,6.7
7,6.2
8,7.3
9,6.3


In [104]:
s = pd.Series(['06/2020\n\n4.9 l/100 km (comb)',
'11/2020\n\n166 g CO2/km (comb)',
'10/2019\n\n5.3 l/100 km (comb)',
'05/2022\n\n6.3 l/100 km (comb)',
'07/2019\n\n128 g CO2/km (comb)',
'06/2022\n\n112 g CO2/km (comb)',
'01/2022\n\n5.8 l/100 km (comb)',
'11/2020\n\n106 g CO2/km (comb)',
'04/2019\n\n105 g CO2/km (comb)',
'08/2020\n\n133 g CO2/km (comb)',
'04/2022\n\n133 g CO2/km (comb)'])

In [106]:
result = s.str.extract("(\d+).(\d+)")
result

Unnamed: 0,0,1
0,6,2020
1,11,2020
2,10,2019
3,5,2022
4,7,2019
5,6,2022
6,1,2022
7,11,2020
8,4,2019
9,8,2020


In [108]:
t = s.str.extract('(\S+/\S+)')
t

Unnamed: 0,0
0,06/2020
1,11/2020
2,10/2019
3,05/2022
4,07/2019
5,06/2022
6,01/2022
7,11/2020
8,04/2019
9,08/2020


In [109]:
t = s.str.extract('(\S+)/(\S+)')
t

Unnamed: 0,0,1
0,6,2020
1,11,2020
2,10,2019
3,5,2022
4,7,2019
5,6,2022
6,1,2022
7,11,2020
8,4,2019
9,8,2020


In [110]:
result=s.str.extract('(\d{2}).(\d{4})')
result

Unnamed: 0,0,1
0,6,2020
1,11,2020
2,10,2019
3,5,2022
4,7,2019
5,6,2022
6,1,2022
7,11,2020
8,4,2019
9,8,2020


In [111]:
result = s.str.extract("(\d{2})/(\d{4})")
result

Unnamed: 0,0,1
0,6,2020
1,11,2020
2,10,2019
3,5,2022
4,7,2019
5,6,2022
6,1,2022
7,11,2020
8,4,2019
9,8,2020
