# Regular Expressions

In [2]:
import re

## Find the email adresses in the given text list.

In [1]:
text = ['hello',
        'arthur.clark@bogazici.edu.tr',
        'hg.wells@yahoo.com',
         'isaas_asimov@yahoo.com',
         'frank-herbert@yahoo.com',
       'aclrk@aol.net',
        'this45']

[a-zA-Z0-9_.+-]+ → Matches the local part of the email (before @), allowing letters, digits, dots, underscores, plus, and hyphens.

In [3]:
re_pattern = "[a-zA-Z0-9]+@[a-zA-Z]+\.(com|net)"

In [4]:
for i in range(len(text)):
    if(re.search(re_pattern, text[i])):
        print(text[i])

hg.wells@yahoo.com
isaas_asimov@yahoo.com
frank-herbert@yahoo.com
aclrk@aol.net


*    \ \.  Escapes the dot to match the literal . in domain names.
* [a-zA-Z0-9-.]+  Matches the top-level domain (e.g., .com, .edu.tr, etc.).

In [5]:
re_pattern2 = "[a-zA-Z0-9]+@[a-zA-Z]+\.[a-zA-Z0-9]"

In [6]:
for i in range(len(text)):
    if(re.search(re_pattern2, text[i])):
        print(text[i])

arthur.clark@bogazici.edu.tr
hg.wells@yahoo.com
isaas_asimov@yahoo.com
frank-herbert@yahoo.com
aclrk@aol.net


## Merge the given seperated digits

In [7]:
number_lists =['123-456-789',  '345-567-980']

In [8]:
for number in number_lists:
    merged = re.sub(r'-', '', number)
    print(merged)

123456789
345567980


## Some Useful Patterns


In [10]:
word_list = ['Siyah',
             'Sagma',
             'Sofa',
             'Ses',
             'Bed',
             'Santa Claus']

In [11]:
pattern = '^S.+a$' # dollar and caret show the position a is the lowercase character


* ^ → Matches the start of the string.
* S → Matches the uppercase letter 'S' at the beginning.
* .+ → Matches one or more of any character (except newlines).
* a → Matches the lowercase letter 'a'.
* $ → Matches the end of the string.

In [12]:
for word in word_list:
    if(re.search(pattern, word)):
        print(word)

Sagma
Sofa


In [12]:
pattern2 = '\w' #mathches every word

In [13]:
for word in word_list:
    if(re.search(pattern2, word)):
        print(word)

Siyah
Sagma
Sofa
Ses
Bed
Santa Claus


In [14]:
name_list =['Isac Asimov',
            'Arthur C. Clark',
            'Frank Herbert',
            'm!sha',
            'Zendeya']

In [15]:
name_pattern = '(\w+)\s+(\w+.)\s+(\w+)'

In [16]:
for name in name_list:
    match = re.search(name_pattern, name)
    if match:
      print('MAtch Object:', match)
      print('Full Neme:', match.group())
      print('First Name:', match.group(1))


MAtch Object: <re.Match object; span=(0, 15), match='Arthur C. Clark'>
Full Neme: Arthur C. Clark
First Name: Arthur


In [17]:
name_pattern2 = '^(\w+)(?:\s+([A-Z]\.)?)?\s+(\w+)$'

In [18]:
for name in name_list:
    match = re.search(name_pattern2, name)
    if match:
      print(match)

<re.Match object; span=(0, 11), match='Isac Asimov'>
<re.Match object; span=(0, 15), match='Arthur C. Clark'>
<re.Match object; span=(0, 13), match='Frank Herbert'>


# String Manipulations

In [19]:
name_list = ['Isac Asimov', 'Arthur C. Clark', 'Frank Herbert']

parsed_names = []

for name in name_list:
    parts = name.split()
    first_name = parts[0]
    last_name = parts[-1]
    middle_initial = parts[1] if len(parts) == 3 else None

    parsed_names.append((first_name, middle_initial, last_name))

# Print results
for first, middle, last in parsed_names:
    print(f"First Name: {first}, Middle Initial: {middle if middle else 'None'}, Last Name: {last}")


First Name: Isac, Middle Initial: None, Last Name: Asimov
First Name: Arthur, Middle Initial: C., Last Name: Clark
First Name: Frank, Middle Initial: None, Last Name: Herbert


# Use Pandas

In [20]:
!pip install pandas


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [21]:
import pandas as pd

# Create DataFrame with Science Fiction Writers
writers = pd.DataFrame({'Full Name': [
    'Isaac Asimov',
    'Arthur C. Clarke',
    'Frank Herbert',
    'Philip K. Dick',
    'Ursula K. Le Guin',
    'H.G. Wells'
]})

# Extract first name using a lambda function
writers['First Name'] = writers['Full Name'].apply(lambda x: x.split()[0])

# Display DataFrame
print(writers)



           Full Name First Name
0       Isaac Asimov      Isaac
1   Arthur C. Clarke     Arthur
2      Frank Herbert      Frank
3     Philip K. Dick     Philip
4  Ursula K. Le Guin     Ursula
5         H.G. Wells       H.G.
