## 열 생성(추가)

In [1]:
import pandas as pd

In [41]:
friend_dict_list = [
    {'name': 'John', 'age': 15, 'job': 'student'},
    {'name': 'Jenny', 'age': 25, 'job': 'developer'},
    {'name': 'Nate', 'age': 30, 'job': 'teacher'}
]
df = pd.DataFrame(friend_dict_list, columns=['name','age', 'job'])

In [42]:
df.head()

Unnamed: 0,name,age,job
0,John,15,student
1,Jenny,25,developer
2,Nate,30,teacher


In [43]:
# salary 컬럼 추가
df['salary'] = 0

In [44]:
df.head()

Unnamed: 0,name,age,job,salary
0,John,15,student,0
1,Jenny,25,developer,0
2,Nate,30,teacher,0


## 열 수정

In [6]:
import numpy as np

In [45]:
# job이 학생이 아닌 경우 salary에 'yes'를, 학생인 경우 'no'를 저장
df['salary'] = np.where(df['job'] != 'student', 'yes', 'no')

In [46]:
df.head()

Unnamed: 0,name,age,job,salary
0,John,15,student,no
1,Jenny,25,developer,yes
2,Nate,30,teacher,yes


### 새로운 열에 연산결과 추가하기

In [47]:
friend_dict_list = [
    {'name': 'John', 'midterm': 95, 'final': 85},
    {'name': 'Jenny', 'midterm': 85, 'final': 80},
    {'name': 'Nate', 'midterm': 30, 'final': 10}
]
df = pd.DataFrame(friend_dict_list, columns=['name','midterm', 'final'])

In [48]:
df.head()

Unnamed: 0,name,midterm,final
0,John,95,85
1,Jenny,85,80
2,Nate,30,10


In [49]:
# 중간고사 점수와 기말고사 점수 합 저장하는 total 컬럼 추가
df['total'] = df['midterm'] + df['final']

In [50]:
df.head()

Unnamed: 0,name,midterm,final,total
0,John,95,85,180
1,Jenny,85,80,165
2,Nate,30,10,40


In [51]:
# 평균 점수를 저장하는 average 컬럼 추가
df['average'] = df['total'] / 2

In [52]:
df.head()

Unnamed: 0,name,midterm,final,total,average
0,John,95,85,180,90.0
1,Jenny,85,80,165,82.5
2,Nate,30,10,40,20.0


### 리스트로 추가하기

In [53]:
grades = []

for row in df['average']:
    if row >= 90:
        grades.append('A')
    elif row >= 80:
        grades.append('B')
    else:
        grades.append('F')
        
df['grade'] = grades

In [54]:
df.head()

Unnamed: 0,name,midterm,final,total,average,grade
0,John,95,85,180,90.0,A
1,Jenny,85,80,165,82.5,B
2,Nate,30,10,40,20.0,F


### apply 메소드로 추가하기

In [None]:
# apply(func: 'AggFuncType', convert_dtype: 'bool' = True, args: 'tuple[Any, ...]' = (), **kwargs)
help(df.grade.apply)

In [56]:
def pass_or_fail(row):
    if row != 'F':
        return "Pass"
    else:
        return "Fail"

In [57]:
# 각 df.grade의 row 값을 pass_or_fail 함수의 인자로 주고, 결과값을 df.grade에 반영
df.grade = df.grade.apply(pass_or_fail)

In [58]:
df.head()

Unnamed: 0,name,midterm,final,total,average,grade
0,John,95,85,180,90.0,Pass
1,Jenny,85,80,165,82.5,Pass
2,Nate,30,10,40,20.0,Fail


### apply 메소드로 추가하기
- ```df.컬럼명.apply``` 으로 접근할 수 없는 경우
- ```df['컬럼명'].apply``` 으로 사용 가능

In [59]:
date_list = [
    {'yyyy-mm-dd' : '2012-01-01'},
    {'yyyy-mm-dd' : '2022-08-17'}
]
df = pd.DataFrame(date_list, columns=['yyyy-mm-dd'])

In [60]:
df.head()

Unnamed: 0,yyyy-mm-dd
0,2012-01-01
1,2022-08-17


In [61]:
def extract_year(row):
    return row.split('-')[0]

In [62]:
df['year'] = df['yyyy-mm-dd'].apply(extract_year)

In [63]:
df.head()

Unnamed: 0,yyyy-mm-dd,year
0,2012-01-01,2012
1,2022-08-17,2022


## 행 생성(추가)

In [32]:
friend_dict_list = [
    {'name': 'John', 'midterm': 95, 'final': 85},
    {'name': 'Jenny', 'midterm': 85, 'final': 80},
    {'name': 'Nate', 'midterm': 30, 'final': 10}
]
df = pd.DataFrame(friend_dict_list, columns=['name','midterm', 'final'])

In [33]:
df2 = pd.DataFrame([
    ['Ben', 50, 50]
], columns=['name', 'midterm', 'final'])

In [34]:
df2.head()

Unnamed: 0,name,midterm,final
0,Ben,50,50


In [35]:
# FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version.
# Use pandas.concat instead.
# frame.append 지원중단 예정. pandas.concat 사용 권장
df.append(df2, ignore_index=True)

  df.append(df2, ignore_index=True)


Unnamed: 0,name,midterm,final
0,John,95,85
1,Jenny,85,80
2,Nate,30,10
3,Ben,50,50


In [None]:
# concat(objs: 'Iterable[NDFrame] | Mapping[Hashable, NDFrame]', axis: 'Axis' = 0, join: 'str' = 'outer', ignore_index: 'bool' = False, keys=None, levels=None, names=None, verify_integrity: 'bool' = False, sort: 'bool' = False, copy: 'bool' = True)
help(pd.concat)

In [38]:
# 권장방법
pd.concat([df, df2], ignore_index=True)

Unnamed: 0,name,midterm,final
0,John,95,85
1,Jenny,85,80
2,Nate,30,10
3,Ben,50,50
