# Regular Expressions

In [315]:
import re

## Find the email adresses in the given text list.

In [316]:
text = ['hello',
        'arthur.clark@bogazici.edu.tr',
        'hg.wells@yahoo.com',
         'isaas_asimov@yahoo.com',
         'frank-herbert@yahoo.com',
       'aclrk@aol.net',
        'this45']

[a-zA-Z0-9_.+-]+ → Matches the local part of the email (before @), allowing letters, digits, dots, underscores, plus, and hyphens.

In [317]:
re_pattern = "[a-zA-Z0-9]+@[a-zA-Z]+\.(com|net)"

In [318]:
for i in range(len(text)):
    if(re.search(re_pattern, text[i])):
        print(text[i])

hg.wells@yahoo.com
isaas_asimov@yahoo.com
frank-herbert@yahoo.com
aclrk@aol.net


*    \ \.  Escapes the dot to match the literal . in domain names.
* [a-zA-Z0-9-.]+  Matches the top-level domain (e.g., .com, .edu.tr, etc.).

In [319]:
re_pattern2 = "[a-zA-Z0-9]+@[a-zA-Z]+\.[a-zA-Z0-9]"

In [320]:
for i in range(len(text)):
    if(re.search(re_pattern2, text[i])):
        print(text[i])

arthur.clark@bogazici.edu.tr
hg.wells@yahoo.com
isaas_asimov@yahoo.com
frank-herbert@yahoo.com
aclrk@aol.net


## Merge the given seperated digits

In [129]:
number_lists =['123-456-789',  '345-567-980']

In [131]:
for number in number_lists:
    merged = re.sub(r'-', '', number)
    print(merged)

123456789
345567980


## Some Useful Patterns


In [219]:
word_list = ['Siyah',
             'Sagma',
             'Sofa',
             'Ses',
             'Bed',
             'Santa Claus']

In [222]:
pattern = '^S.+a$' # dollar and caret show the position a is the lowercase character


* ^ → Matches the start of the string.
* S → Matches the uppercase letter 'S' at the beginning.
* .+ → Matches one or more of any character (except newlines).
* a → Matches the lowercase letter 'a'.
* $ → Matches the end of the string.

In [223]:
for word in word_list:
    if(re.search(pattern, word)):
        print(word)

Sagma
Sofa


In [226]:
pattern2 = '\w' #mathches every word

In [228]:
for word in word_list:
    if(re.search(pattern2, word)):
        print(word)

Siyah
Sagma
Sofa
Ses
Bed
Santa Claus


In [293]:
name_list =['Isac Asimov',
            'Arthur C. Clark',
            'Frank Herbert',
            'm!sha',
            'Zendeya']

In [294]:
name_pattern = '(\w+)\s+(\w+.)\s+(\w+)'

In [311]:
for name in name_list:
    match = re.search(name_pattern, name)
    if match:
      print('MAtch Object:', match)
      print('Full Neme:', match.group())
      print('First Name:', match.group(1))


MAtch Object: <re.Match object; span=(0, 15), match='Arthur C. Clark'>
Full Neme: Arthur C. Clark
First Name: Arthur


In [304]:
name_pattern2 = '^(\w+)(?:\s+([A-Z]\.)?)?\s+(\w+)$'

In [305]:
for name in name_list:
    match = re.search(name_pattern2, name)
    if match:
      print(match)

<re.Match object; span=(0, 11), match='Isac Asimov'>
<re.Match object; span=(0, 15), match='Arthur C. Clark'>
<re.Match object; span=(0, 13), match='Frank Herbert'>


# String Manipulations

In [310]:
name_list = ['Isac Asimov', 'Arthur C. Clark', 'Frank Herbert']

parsed_names = []

for name in name_list:
    parts = name.split()
    first_name = parts[0]
    last_name = parts[-1]
    middle_initial = parts[1] if len(parts) == 3 else None

    parsed_names.append((first_name, middle_initial, last_name))

# Print results
for first, middle, last in parsed_names:
    print(f"First Name: {first}, Middle Initial: {middle if middle else 'None'}, Last Name: {last}")


First Name: Isac, Middle Initial: None, Last Name: Asimov
First Name: Arthur, Middle Initial: C., Last Name: Clark
First Name: Frank, Middle Initial: None, Last Name: Herbert


# Use Pandas

In [312]:
!pip install pandas

Collecting pandas
  Obtaining dependency information for pandas from https://files.pythonhosted.org/packages/a8/44/d9502bf0ed197ba9bf1103c9867d5904ddcaf869e52329787fc54ed70cc8/pandas-2.2.3-cp311-cp311-macosx_10_9_x86_64.whl.metadata
  Downloading pandas-2.2.3-cp311-cp311-macosx_10_9_x86_64.whl.metadata (89 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.9/89.9 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting numpy>=1.23.2 (from pandas)
  Obtaining dependency information for numpy>=1.23.2 from https://files.pythonhosted.org/packages/e2/a7/b14f0a73eb0fe77cb9bd5b44534c183b23d4229c099e339c522724b02678/numpy-2.2.3-cp311-cp311-macosx_14_0_x86_64.whl.metadata
  Downloading numpy-2.2.3-cp311-cp311-macosx_14_0_x86_64.whl.metadata (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m898.0 kB/s[0m eta [36m0:00:00[0m0:01[0m
Collecting pytz>=2020.1 (from pandas)
  Obtaining dependency informa

In [314]:
import pandas as pd

# Create DataFrame with Science Fiction Writers
writers = pd.DataFrame({'Full Name': [
    'Isaac Asimov',
    'Arthur C. Clarke',
    'Frank Herbert',
    'Philip K. Dick',
    'Ursula K. Le Guin',
    'H.G. Wells'
]})

# Extract first name using a lambda function
writers['First Name'] = writers['Full Name'].apply(lambda x: x.split()[0])

# Display DataFrame
print(writers)



           Full Name First Name
0       Isaac Asimov      Isaac
1   Arthur C. Clarke     Arthur
2      Frank Herbert      Frank
3     Philip K. Dick     Philip
4  Ursula K. Le Guin     Ursula
5         H.G. Wells       H.G.
