# String Manipulation

## Basics: replace, deletion, concatenation

In [3]:
# replace
# list: mutable  string: immutable
s = "asdfasdf"

print(s.replace("a", "q"))  
print(s)  # note that strings are immutable

# used as deletion
print(s.replace("a", ""))  # this deletes every "a"
print(s.replace("a", "", 1))  # this deletes first "a"

k=2
print(s[0:k] + s[k+1:])  # delete k'th character

qsdfqsdf
asdfasdf
sdfsdf
sdfasdf
asfasdf


### Various ways of concatenation

In [1]:
# first: default + operator
s1 = "Hello"
s2 = "world"

print(s1 + " " + s2)  # quadratic time O(n^2) for repetitive concatenation:

# second: str.join()
print(" ".join([s1, s2]))

# third: str.format()
print("{} {}".format(s1, s2))

# fourth: python 3's fstring
print(f"{s1} {s2}")

Hello world
Hello world
Hello world
Hello world


### When to use one of those 4 methods?

Why + operator is bad?

In [3]:
S = ["abc", "def", "ghi", "jkl", "mno"]

new_str = ""

for s in S:
    new_str += s

print(new_str)


abcdefghijklmno


.join() is to be used when concatenating multiple strings stored in some data type separated by the same delimiter (splace between words).

In [4]:
S = ["abc", "def", "ghi", "jkl", "mno"]

print("".join(S))
print(" ".join(S))
print("\t".join(S))
print("\n".join(S))

abcdefghijklmno
abc def ghi jkl mno
abc	def	ghi	jkl	mno
abc
def
ghi
jkl
mno


format() is used when it is needed to define the basis string first. fstring is used when doing things on the fly.

In [7]:
basis = "{}.{}"

filename = "asdf"
extension = "mp3"

full_name = basis.format(filename, extension)

full_name = f"{filename}.{extension}" 


# names = ["Hans", "Elsa", "Sven", "Anna"]

# def print_filenames(basis, names):
#     filenames = []
#     for i, x in enumerate(names):
#         if x == "Sven":
#             ext = "animal"
#         else:
#             ext = "human"
#         filenames.append(basis.format(i, x, ext))

#     print(filenames)

# print_filenames(basis, names)

'asdf.mp3'

In [13]:
filenames = []
for i, x in enumerate(names):
    if x == "Sven":
        ext = "animal"
    else:
        ext = "human"
    filenames.append(f"{i}_{x}.{ext}")

print(filenames)

['0_Hans.human', '1_Elsa.human', '2_Sven.animal', '3_Anna.human']


## Special Strings: OS path

In [14]:
import os

### Another Reference:
https://medium.com/@ageitgey/python-3-quick-tip-the-easy-way-to-deal-with-file-paths-on-windows-mac-and-linux-11a072b58d5f#:~:text=Python%203.4%20introduced%20a%20new,forward%20slashes%20with%20pathlib%20functions.

In [38]:
# Expand relative
os.path.expanduser('~/Desktop')

# get pwd
os.getcwd()

# get absolute path
os.path.abspath('../oop')

# get relative path
os.path.relpath('Users/choisehyun/Desktop')

# basename, dirname
os.path.basename('some/path/to/file.py')
os.path.dirname('some/path/to/file.py')

# join path
os.path.join('some/path/to', 'directory/', 'filename')  # what is the difference between join?

# exist, isfile, makedirs
os.path.exists('../oop')
os.path.isfile('../oop')
os.makedirs('../oop/some/new/dir', exist_ok=True)

## Extra: Unicode peculiarities

In [6]:
import unicodedata

In [10]:
unicodedata.normalize("NFKD", "안녕하세요")

'안녕하세요'

In [11]:
u"asdf"  # important in python 2. 

print("asdf \t asdf")
print(r"asdf \t asdf")  # r means raw

asdf 	 asdf
asdf \t asdf


## Further: regex

In [None]:
import re  # regex module. Later with text related stuff