# Regular Expressions

In [None]:
import re

## Find the email adresses in the given text list.

In [None]:
text = ['hello',
        'arthur.clark@bogazici.edu.tr',
        'hg.wells@yahoo.com',
         'isaas_asimov@yahoo.com',
         'frank-herbert@yahoo.com',
       'aclrk@aol.net',
        'this45']

[a-zA-Z0-9_.+-]+ → Matches the local part of the email (before @), allowing letters, digits, dots, underscores, plus, and hyphens.

In [None]:
re_pattern = "[a-zA-Z0-9]+@[a-zA-Z]+\.(com|net)"

In [None]:
for i in range(len(text)):
    if(re.search(re_pattern, text[i])):
        print(text[i])

*    \ \.  Escapes the dot to match the literal . in domain names.
* [a-zA-Z0-9-.]+  Matches the top-level domain (e.g., .com, .edu.tr, etc.).

In [None]:
re_pattern2 = "[a-zA-Z0-9]+@[a-zA-Z]+\.[a-zA-Z0-9]"

In [None]:
for i in range(len(text)):
    if(re.search(re_pattern2, text[i])):
        print(text[i])

## Merge the given seperated digits

In [None]:
number_lists =['123-456-789',  '345-567-980']

In [None]:
for number in number_lists:
    merged = re.sub(r'-', '', number)
    print(merged)

## Some Useful Patterns


In [None]:
word_list = ['Siyah',
             'Sagma',
             'Sofa',
             'Ses',
             'Bed',
             'Santa Claus']

In [None]:
pattern = '^S.+a$' # dollar and caret show the position a is the lowercase character


* ^ → Matches the start of the string.
* S → Matches the uppercase letter 'S' at the beginning.
* .+ → Matches one or more of any character (except newlines).
* a → Matches the lowercase letter 'a'.
* $ → Matches the end of the string.

In [None]:
for word in word_list:
    if(re.search(pattern, word)):
        print(word)

In [None]:
pattern2 = '\w' #mathches every word

In [None]:
for word in word_list:
    if(re.search(pattern2, word)):
        print(word)

In [None]:
name_list =['Isac Asimov',
            'Arthur C. Clark',
            'Frank Herbert',
            'm!sha',
            'Zendeya']

In [None]:
name_pattern = '(\w+)\s+(\w+.)\s+(\w+)'

In [None]:
for name in name_list:
    match = re.search(name_pattern, name)
    if match:
      print('MAtch Object:', match)
      print('Full Neme:', match.group())
      print('First Name:', match.group(1))


In [None]:
name_pattern2 = '^(\w+)(?:\s+([A-Z]\.)?)?\s+(\w+)$'

In [None]:
for name in name_list:
    match = re.search(name_pattern2, name)
    if match:
      print(match)

# String Manipulations

In [None]:
name_list = ['Isac Asimov', 'Arthur C. Clark', 'Frank Herbert']

parsed_names = []

for name in name_list:
    parts = name.split()
    first_name = parts[0]
    last_name = parts[-1]
    middle_initial = parts[1] if len(parts) == 3 else None

    parsed_names.append((first_name, middle_initial, last_name))

# Print results
for first, middle, last in parsed_names:
    print(f"First Name: {first}, Middle Initial: {middle if middle else 'None'}, Last Name: {last}")


# Use Pandas

In [None]:
!pip install pandas

In [None]:
import pandas as pd

# Create DataFrame with Science Fiction Writers
writers = pd.DataFrame({'Full Name': [
    'Isaac Asimov',
    'Arthur C. Clarke',
    'Frank Herbert',
    'Philip K. Dick',
    'Ursula K. Le Guin',
    'H.G. Wells'
]})

# Extract first name using a lambda function
writers['First Name'] = writers['Full Name'].apply(lambda x: x.split()[0])

# Display DataFrame
print(writers)

