# 공백 제거

In [1]:
import pandas as pd

df = pd.DataFrame({'text': ['  hello world  ', '  python programming  ', '  data science  ']})
df['stripped_text'] = df['text'].str.strip()
print(df)

                     text       stripped_text
0           hello world           hello world
1    python programming    python programming
2          data science          data science


# 문자열 분리

In [3]:
import pandas as pd

df = pd.DataFrame({'data': ['2023-10-27_A', '2023-11-15_B', '2023-12-01_C']})
df[['date', 'category']] = df['data'].str.split('_', expand=True)
print(df)

           data        date category
0  2023-10-27_A  2023-10-27        A
1  2023-11-15_B  2023-11-15        B
2  2023-12-01_C  2023-12-01        C


# 문자열 대체

In [4]:
import pandas as pd

df = pd.DataFrame({'product': ['apple_100', 'banana_200', 'orange_150']})
df['product_name'] = df['product'].str.replace('_', ' ')
print(df)

# 정규표현식을 이용해 숫자 제거
df['product_name_only'] = df['product'].str.replace(r'_\d+', '', regex=True)
print(df)

      product product_name
0   apple_100    apple 100
1  banana_200   banana 200
2  orange_150   orange 150
      product product_name product_name_only
0   apple_100    apple 100             apple
1  banana_200   banana 200            banana
2  orange_150   orange 150            orange


# 문자열 연결

In [7]:
import pandas as pd

df = pd.DataFrame({
    'first_name': ['James', 'Mary', 'Peter', 'Jane'],
    'last_name': ['Bond', 'Smith', None, 'Doe']
})

# 'Hello, '를 먼저 붙인 후, 나머지 Series들을 리스트로 전달
df['greeting_cat'] = (
    'Hello, ' + df['first_name']
).str.cat(df['last_name'], sep=' ', na_rep='[이름없음]')

print(df)

  first_name last_name         greeting_cat
0      James      Bond    Hello, James Bond
1       Mary     Smith    Hello, Mary Smith
2      Peter      None  Hello, Peter [이름없음]
3       Jane       Doe      Hello, Jane Doe
