In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

### 데이터프레임 내 값변경/추가

In [2]:
# 샘플 데이터프레임 생성_1
df1 = sns.load_dataset('anagrams')
df1 = df1[df1.index %2 ==0].reset_index(drop=True)  # 인덱스가 짝수인 행만 사용함
df1

Unnamed: 0,subidr,attnr,num1,num2,num3
0,1,divided,2,4.0,7
1,3,divided,3,5.0,6
2,5,divided,4,5.0,8
3,7,divided,5,4.5,6
4,9,divided,2,3.0,7
5,11,focused,6,5.0,6
6,13,focused,6,5.0,9
7,15,focused,8,8.0,7
8,17,focused,7,7.0,6
9,19,focused,5,6.0,6


In [3]:
# str.replace() : 문자열 내 매칭되는 글자 치환
df1_1 = df1[['attnr']].copy()
df1_1['_revised'] = df1.attnr.str.replace('ed','e')
df1_1

Unnamed: 0,attnr,_revised
0,divided,divide
1,divided,divide
2,divided,divide
3,divided,divide
4,divided,divide
5,focused,focuse
6,focused,focuse
7,focused,focuse
8,focused,focuse
9,focused,focuse


In [4]:
# replace() : 전체 문자열 치환 (매칭 안되는 값은 그대로 반환함)
df1_2 = df1[['num1']].copy()
df1_2['_revised'] = df1.num1.replace({2 : 'small', 4 : 'medium',  6 : 'big'})
df1_2

Unnamed: 0,num1,_revised
0,2,small
1,3,3
2,4,medium
3,5,5
4,2,small
5,6,big
6,6,big
7,8,8
8,7,7
9,5,5


In [5]:
# 함수 및 apply 활용
def condition(x):  
    if x<5:
        return '5미만'
    elif x<7:
        return '5~7'
    else:
        return '7이상'
df1_3 = df1[['num2']].copy()
df1_3['_revised'] = df1.num2.apply(condition)
df1_3

Unnamed: 0,num2,_revised
0,4.0,5미만
1,5.0,5~7
2,5.0,5~7
3,4.5,5미만
4,3.0,5미만
5,5.0,5~7
6,5.0,5~7
7,8.0,7이상
8,7.0,7이상
9,6.0,5~7


In [6]:
# lambda 활용
df1_4 = df1[['num3']].copy()
df1_4['_revised'] = df1.num3.apply(lambda x: x**2)
df1_4

Unnamed: 0,num3,_revised
0,7,49
1,6,36
2,8,64
3,6,36
4,7,49
5,6,36
6,9,81
7,7,49
8,6,36
9,6,36


In [7]:
# transform() : 주어진 함수에 따라 값 수정
df1_5 = df1[['num1']].copy()
df1_5[['_revised_log', '_revised_sqrt']] = df1.num1.transform([lambda x: np.log(x), np.sqrt])
df1_5

Unnamed: 0,num1,_revised_log,_revised_sqrt
0,2,0.693147,1.414214
1,3,1.098612,1.732051
2,4,1.386294,2.0
3,5,1.609438,2.236068
4,2,0.693147,1.414214
5,6,1.791759,2.44949
6,6,1.791759,2.44949
7,8,2.079442,2.828427
8,7,1.94591,2.645751
9,5,1.609438,2.236068


In [8]:
# clip(lower, upper) : 최대/최소값을 기반으로 트리밍(최소값 미만은 lower로, 최대값 이상은 upper로 제한)
df1_6 = df1[['num2']].copy()
df1_6['_revised'] = df1.num2.clip(lower = 4.5, upper = 6.5)
df1_6

Unnamed: 0,num2,_revised
0,4.0,4.5
1,5.0,5.0
2,5.0,5.0
3,4.5,4.5
4,3.0,4.5
5,5.0,5.0
6,5.0,5.0
7,8.0,6.5
8,7.0,6.5
9,6.0,6.0


In [9]:
# eval(expression) : 문자열로 받아서 실행, 대부분의 구문을 분석해서 실행할 수 있음
df1_7 = df1.eval('num2 = num2 * 50')  
df1_7

Unnamed: 0,subidr,attnr,num1,num2,num3
0,1,divided,2,200.0,7
1,3,divided,3,250.0,6
2,5,divided,4,250.0,8
3,7,divided,5,225.0,6
4,9,divided,2,150.0,7
5,11,focused,6,250.0,6
6,13,focused,6,250.0,9
7,15,focused,8,400.0,7
8,17,focused,7,350.0,6
9,19,focused,5,300.0,6


In [10]:
# np.where(조건, True일 경우 반환, False일 경우 반환) 
df1_8 = df1[['num2']].copy()
df1_8['_revised'] = np.where(df1.num2 < 5, '5미만', np.where(df1.num2<7, '5~7', '7이상'))
df1_8

Unnamed: 0,num2,_revised
0,4.0,5미만
1,5.0,5~7
2,5.0,5~7
3,4.5,5미만
4,3.0,5미만
5,5.0,5~7
6,5.0,5~7
7,8.0,7이상
8,7.0,7이상
9,6.0,5~7


### 컬럼 추가, 변경, 정리, 삭제

In [11]:
# 샘플 데이터프레임 생성_2
dict2 = {'Name' : ['San Miguel','Sauvignon Blanc', 'Jinro', 'Mak Geolli'], 'Korean' : [60, 80, 70, 75], 'Math' : [90,80,80,50], 'Society' : [70, 90, 95, 90]}
df2 = pd.DataFrame(dict2)
df2

Unnamed: 0,Name,Korean,Math,Society
0,San Miguel,60,90,70
1,Sauvignon Blanc,80,80,90
2,Jinro,70,80,95
3,Mak Geolli,75,50,90


In [12]:
# 컬럼 추가 : 맨 오른쪽
df2['Science'] = [85, 75, 80, 65]
df2

Unnamed: 0,Name,Korean,Math,Society,Science
0,San Miguel,60,90,70,85
1,Sauvignon Blanc,80,80,90,75
2,Jinro,70,80,95,80
3,Mak Geolli,75,50,90,65


In [13]:
# insert(위치, 컬럼명, 값 또는 seq) : n번째 위치에 컬럼 추가
df2.insert(3, 'English', [60, 70, 80, 90])
df2

Unnamed: 0,Name,Korean,Math,English,Society,Science
0,San Miguel,60,90,60,70,85
1,Sauvignon Blanc,80,80,70,90,75
2,Jinro,70,80,80,95,80
3,Mak Geolli,75,50,90,90,65


In [14]:
# split() : 컬럼 쪼개기
df2[['First_name', 'Last_name']] = df2.Name.str.split(' ', expand=True)
df2

Unnamed: 0,Name,Korean,Math,English,Society,Science,First_name,Last_name
0,San Miguel,60,90,60,70,85,San,Miguel
1,Sauvignon Blanc,80,80,70,90,75,Sauvignon,Blanc
2,Jinro,70,80,80,95,80,Jinro,
3,Mak Geolli,75,50,90,90,65,Mak,Geolli


In [15]:
# join() : 컬럼 합치기
df2['Name2'] = df2[['Last_name', 'First_name']].apply(lambda x: " ".join(x.values.astype(str)), axis=1)
df2

Unnamed: 0,Name,Korean,Math,English,Society,Science,First_name,Last_name,Name2
0,San Miguel,60,90,60,70,85,San,Miguel,Miguel San
1,Sauvignon Blanc,80,80,70,90,75,Sauvignon,Blanc,Blanc Sauvignon
2,Jinro,70,80,80,95,80,Jinro,,None Jinro
3,Mak Geolli,75,50,90,90,65,Mak,Geolli,Geolli Mak


In [16]:
# 원하는 컬럼만 남기거나 순서 변경하기. columns()로 출력 후 복사해서 사용하면 용이
df2 = df2[['Name', 'Korean', 'Math', 'Society', 'English', 'First_name', 'Last_name']]
df2

Unnamed: 0,Name,Korean,Math,Society,English,First_name,Last_name
0,San Miguel,60,90,70,60,San,Miguel
1,Sauvignon Blanc,80,80,90,70,Sauvignon,Blanc
2,Jinro,70,80,95,80,Jinro,
3,Mak Geolli,75,50,90,90,Mak,Geolli


In [17]:
# 컬럼 앞뒤 맞바꾸기
col1 = df2.columns[-2:].to_list()
col2 = df2.columns[:-2].to_list()
df2 = df2[col1 + col2]
df2

Unnamed: 0,First_name,Last_name,Name,Korean,Math,Society,English
0,San,Miguel,San Miguel,60,90,70,60
1,Sauvignon,Blanc,Sauvignon Blanc,80,80,90,70
2,Jinro,,Jinro,70,80,95,80
3,Mak,Geolli,Mak Geolli,75,50,90,90


In [18]:
# 이렇게도 가능하지만,,,너무 복잡한듯
col = ['Society', 'Math']
pd.concat([df2[col], df2[[x for x in df2.columns if x not in col]]], axis=1)

Unnamed: 0,Society,Math,First_name,Last_name,Name,Korean,English
0,70,90,San,Miguel,San Miguel,60,60
1,90,80,Sauvignon,Blanc,Sauvignon Blanc,80,70
2,95,80,Jinro,,Jinro,70,80
3,90,50,Mak,Geolli,Mak Geolli,75,90


In [19]:
# 컬럼 순서를 거꾸로 
df2 = df2[reversed(df2.columns)]
df2

Unnamed: 0,English,Society,Math,Korean,Name,Last_name,First_name
0,60,70,90,60,San Miguel,Miguel,San
1,70,90,80,80,Sauvignon Blanc,Blanc,Sauvignon
2,80,95,80,70,Jinro,,Jinro
3,90,90,50,75,Mak Geolli,Geolli,Mak


In [20]:
# List Comprehension : 서로 맞바꾸기
df2[['First_name' if c == 'Last_name' else 'Last_name' if c=='First_name' else c for c in df2.columns]]

Unnamed: 0,English,Society,Math,Korean,Name,First_name,Last_name
0,60,70,90,60,San Miguel,San,Miguel
1,70,90,80,80,Sauvignon Blanc,Sauvignon,Blanc
2,80,95,80,70,Jinro,Jinro,
3,90,90,50,75,Mak Geolli,Mak,Geolli


In [21]:
# 특정 컬럼 앞으로 보내기
df2 = df2[['Society'] + [i for i in df2.columns.to_list() if i != 'Society']]
df2

Unnamed: 0,Society,English,Math,Korean,Name,Last_name,First_name
0,70,60,90,60,San Miguel,Miguel,San
1,90,70,80,80,Sauvignon Blanc,Blanc,Sauvignon
2,95,80,80,70,Jinro,,Jinro
3,90,90,50,75,Mak Geolli,Geolli,Mak


In [22]:
# 컬럼 삭제하기
df2.drop(['First_name'], axis=1, inplace=True)
del df2['Last_name']
df2

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2.drop(['First_name'], axis=1, inplace=True)


Unnamed: 0,Society,English,Math,Korean,Name
0,70,60,90,60,San Miguel
1,90,70,80,80,Sauvignon Blanc
2,95,80,80,70,Jinro
3,90,90,50,75,Mak Geolli


In [23]:
# rename(columns = {}) : 컬럼명 변경하기
df2 = df2.rename(columns = {'Korean' : '국어', 'Math' : '수학'})
df2

Unnamed: 0,Society,English,수학,국어,Name
0,70,60,90,60,San Miguel
1,90,70,80,80,Sauvignon Blanc
2,95,80,80,70,Jinro
3,90,90,50,75,Mak Geolli


In [24]:
# axis로 지정 가능
df2.rename({'Society' : '사회', 'Engilsh' : '영어'}, axis=1, inplace=True)  # 당연히 정확히 일치해야 하며, 불일치시 스킵
df2

Unnamed: 0,사회,English,수학,국어,Name
0,70,60,90,60,San Miguel
1,90,70,80,80,Sauvignon Blanc
2,95,80,80,70,Jinro
3,90,90,50,75,Mak Geolli


In [25]:
# 컬럼명 일괄 변경, 갯수 맞아야 함
df2.columns = ['Soc', 'Eng', 'Mat', 'Kor', 'Name']
df2

Unnamed: 0,Soc,Eng,Mat,Kor,Name
0,70,60,90,60,San Miguel
1,90,70,80,80,Sauvignon Blanc
2,95,80,80,70,Jinro
3,90,90,50,75,Mak Geolli


In [26]:
# 컬럼명 소문자로 일괄 변경
df2.columns = list(c.lower() for c in df2.columns)
df2

Unnamed: 0,soc,eng,mat,kor,name
0,70,60,90,60,San Miguel
1,90,70,80,80,Sauvignon Blanc
2,95,80,80,70,Jinro
3,90,90,50,75,Mak Geolli


In [27]:
df2_2 = df2[['name','kor','kor','mat','eng','soc']]
df2_2

Unnamed: 0,name,kor,kor.1,mat,eng,soc
0,San Miguel,60,60,90,60,70
1,Sauvignon Blanc,80,80,80,70,90
2,Jinro,70,70,80,80,95
3,Mak Geolli,75,75,50,90,90


In [28]:
# 컬럼에 중복 있을 경우, 단순히 컬럼리스트 재지정한다고 삭제되지 않음
df2_2[['name','kor','mat','eng','soc']]

Unnamed: 0,name,kor,kor.1,mat,eng,soc
0,San Miguel,60,60,90,60,70
1,Sauvignon Blanc,80,80,80,70,90
2,Jinro,70,70,80,80,95
3,Mak Geolli,75,75,50,90,90


In [29]:
# 이 경우, 중복 컬럼 삭제를 통해 해결 가능
df2_2.T.drop_duplicates().T

Unnamed: 0,name,kor,mat,eng,soc
0,San Miguel,60,90,60,70
1,Sauvignon Blanc,80,80,70,90
2,Jinro,70,80,80,95
3,Mak Geolli,75,50,90,90


### 행 추가, 삭제

In [30]:
df2_3 = df2[['name'] + [x for x in df2.columns if 'name' not in x]].copy()  # 특정 컬럼(들)만 앞으로 보내기
df2_3

Unnamed: 0,name,soc,eng,mat,kor
0,San Miguel,70,60,90,60
1,Sauvignon Blanc,90,70,80,80
2,Jinro,95,80,80,70
3,Mak Geolli,90,90,50,75


In [31]:
# 마지막에 행 추가
df2_3.loc[df2_3.shape[0]] = ['Chung Ha', 100, 90, 90, 80]
df2_3

Unnamed: 0,name,soc,eng,mat,kor
0,San Miguel,70,60,90,60
1,Sauvignon Blanc,90,70,80,80
2,Jinro,95,80,80,70
3,Mak Geolli,90,90,50,75
4,Chung Ha,100,90,90,80


In [32]:
# 특정 조건으로 행 삭제
df2_3 = df2_3[df2_3.name != 'Jinro']
df2_3

Unnamed: 0,name,soc,eng,mat,kor
0,San Miguel,70,60,90,60
1,Sauvignon Blanc,90,70,80,80
3,Mak Geolli,90,90,50,75
4,Chung Ha,100,90,90,80


In [33]:
# 인덱스로 행 삭제
df2_3 = df2_3.drop(3)
df2_3

Unnamed: 0,name,soc,eng,mat,kor
0,San Miguel,70,60,90,60
1,Sauvignon Blanc,90,70,80,80
4,Chung Ha,100,90,90,80


### 변수 타입 변경

In [34]:
# 샘플 데이터프레임 생성_3
dict3 = {'ipo' : ['제이오', '오아시스', '나노팀'], 
         'date' : ['20230208', '20230216', '20230221'],
         'deposit' : ['52,000', '-', '26,700'],
         'rate' : [142.47, 0, 1637.43]}
df3 = pd.DataFrame(dict3)
print(df3.dtypes)
df3

ipo         object
date        object
deposit     object
rate       float64
dtype: object


Unnamed: 0,ipo,date,deposit,rate
0,제이오,20230208,52000,142.47
1,오아시스,20230216,-,0.0
2,나노팀,20230221,26700,1637.43


In [35]:
# astype() : 컬럼 타입 변경
df3.rate.astype(int)  # int | int8~64 | float | float16~64 | complex | str | category | bool

0     142
1       0
2    1637
Name: rate, dtype: int32

In [36]:
# 복수컬럼 타입 변경
df3.astype({'date' : 'int32', 'deposit' : str})

Unnamed: 0,ipo,date,deposit,rate
0,제이오,20230208,52000,142.47
1,오아시스,20230216,-,0.0
2,나노팀,20230221,26700,1637.43


In [37]:
# to_numeric(컬럼명, errors) : 숫자형으로 변경, dtype() 과 달리 시리즈만 가능
pd.to_numeric(
    df3.deposit.str.replace(',', ''),  # 변경할 컬럼
    errors = 'coerce',  # ignore : 변경 불가하면 원본 반환 | coerce : 변경 불가하면 NaN으로 반환 | *raise : 변경 불가하면 에러
    downcast = 'float'  # 메모리 절약, integer | signed | unsigned | float
)

0    52000.0
1        NaN
2    26700.0
Name: deposit, dtype: float32

### 결측값 확인, 삭제, 보간

In [38]:
# 샘플 데이터프레임 생성_4
col = ['spring', 'summer', 'fall', 'winter']
row = ['apple', 'peach', 'melon', 'strawberry', 'chestnut']
data = [[6, pd.NA, 8, 7],
                [4, 9, 5, np.nan],
                [5, 5, 5, 5],
                [None, 3, 6, 10],
                [None, None, None, None]]
df4 = pd.DataFrame(data, row, col)
df4

Unnamed: 0,spring,summer,fall,winter
apple,6.0,,8.0,7.0
peach,4.0,9.0,5.0,
melon,5.0,5.0,5.0,5.0
strawberry,,3.0,6.0,10.0
chestnut,,,,


In [39]:
# isna() : 결측값 여부 확인
df4.isna()

Unnamed: 0,spring,summer,fall,winter
apple,False,True,False,False
peach,False,False,False,True
melon,False,False,False,False
strawberry,True,False,False,False
chestnut,True,True,True,True


In [40]:
# dropna() : 결측값 제거
df4.dropna(subset = ['summer', 'fall'],  # 결측값 여부 체크할 컬럼, 생략시 전체
          how = 'any',  # any: 결측값 하나라도 있는 경우, all: 모두 결측값인 경우, 기본값 any
          axis = 0,  # 0: 행 기준, 1: 열 기준, 기본값 0
          )

Unnamed: 0,spring,summer,fall,winter
peach,4.0,9,5.0,
melon,5.0,5,5.0,5.0
strawberry,,3,6.0,10.0


In [41]:
# fillna() : 결측값 보간
df4.fillna(method = 'ffill',  # 결측값 변경 방식, *ffill: na 앞의 값으로 채움, bfill: na 뒤의 값으로 채움
          axis = 0,  # *0: 행 기준 | 1: 열 기준
          limit = 1,  # 결측값 변경할 횟수제한, 미지정시 무제한
          downcast = 'infer'  # float64를 int64로 변경
          )

  df4.fillna(method = 'ffill',  # 결측값 변경 방식, *ffill: na 앞의 값으로 채움, bfill: na 뒤의 값으로 채움
  df4.fillna(method = 'ffill',  # 결측값 변경 방식, *ffill: na 앞의 값으로 채움, bfill: na 뒤의 값으로 채움


Unnamed: 0,spring,summer,fall,winter
apple,6.0,,8,7
peach,4.0,9.0,5,7
melon,5.0,5.0,5,5
strawberry,5.0,3.0,6,10
chestnut,,3.0,6,10


In [42]:
# 특정 값으로 결측값 채우기
df4.fillna(0)

Unnamed: 0,spring,summer,fall,winter
apple,6.0,0,8.0,7.0
peach,4.0,9,5.0,0.0
melon,5.0,5,5.0,5.0
strawberry,0.0,3,6.0,10.0
chestnut,0.0,0,0.0,0.0


In [43]:
# 컬럼별로 다르게 결측값 채우기
df4.fillna({'spring':'A', 'summer':'B', 'fall':'C', 'winter':'D'})

Unnamed: 0,spring,summer,fall,winter
apple,6.0,B,8.0,7.0
peach,4.0,9,5.0,D
melon,5.0,5,5.0,5.0
strawberry,A,3,6.0,10.0
chestnut,A,B,C,D


### 중복 확인, 삭제

In [44]:
# 샘플 데이터프레임 생성_5
col = ['col1', 'col2', 'col3']
data = [['A', '가', 1],
                ['A', '가', 1],
                ['A', '나', 2],
                ['B', '나', 3],
                ['B', '나', 4]]
df5 = pd.DataFrame(data, columns = col)
df5

Unnamed: 0,col1,col2,col3
0,A,가,1
1,A,가,1
2,A,나,2
3,B,나,3
4,B,나,4


In [45]:
# duplicated() : 중복행 확인
df5.duplicated(
    subset = ['col1', 'col2'],  # 지정한 열 기준으로 중복여부 확인, 미지정시 행 전체
    keep = 'first'  # 확인 방향, *first : 위에서부터 확인 후 중복인 행 나오면 True | last : 아래에서부터 확인
)

0    False
1     True
2    False
3    False
4     True
dtype: bool

In [46]:
# drop_duplicates() : 중복행 제거
df5.drop_duplicates(
    subset = ['col1', 'col2'],  # 지정한 열 기준으로 중복여부 확인, 미지정시 행 전체
    keep = 'first',  # 중복제거 시 남길 행, *first : 첫값 남김 | last : 마지막값 남김
    inplace = False,  # 원본 변경 여부, True | *False
    ignore_index = False  # 기존 인덱스 무시하고 새로 채번, True | *False
)

Unnamed: 0,col1,col2,col3
0,A,가,1
2,A,나,2
3,B,나,3


In [47]:
# 샘플 데이터프레임 생성_6
df6 = pd.DataFrame(np.random.randint(1,101,(6,2)), index=pd.date_range('20230620', periods=6), columns= ['col1','col2'])
df6

Unnamed: 0,col1,col2
2023-06-20,32,16
2023-06-21,32,25
2023-06-22,46,59
2023-06-23,78,60
2023-06-24,96,81
2023-06-25,66,2


In [48]:
# diff() : 연속된 두행 간 차이를 구함
df6['col1_diff'] = df6.col1.diff()
df6

Unnamed: 0,col1,col2,col1_diff
2023-06-20,32,16,
2023-06-21,32,25,0.0
2023-06-22,46,59,14.0
2023-06-23,78,60,32.0
2023-06-24,96,81,18.0
2023-06-25,66,2,-30.0


In [49]:
# shift() : 데이터프레임의 인덱스를 이동. 시계열 데이터에 유용
df6.shift(1, fill_value=1)

Unnamed: 0,col1,col2,col1_diff
2023-06-20,1,1,1.0
2023-06-21,32,16,
2023-06-22,32,25,0.0
2023-06-23,46,59,14.0
2023-06-24,78,60,32.0
2023-06-25,96,81,18.0


In [50]:
# diff() 와 결과 동일
df6['col1_gap'] = df6.col1 - df6.col1.shift(1)
df6

Unnamed: 0,col1,col2,col1_diff,col1_gap
2023-06-20,32,16,,
2023-06-21,32,25,0.0,0.0
2023-06-22,46,59,14.0,14.0
2023-06-23,78,60,32.0,32.0
2023-06-24,96,81,18.0,18.0
2023-06-25,66,2,-30.0,-30.0


In [51]:
# pct_change() : # 연속된 두행 간 비율을 구함
df6['col1_ratio'] = df6.col1.pct_change()
df6

Unnamed: 0,col1,col2,col1_diff,col1_gap,col1_ratio
2023-06-20,32,16,,,
2023-06-21,32,25,0.0,0.0,0.0
2023-06-22,46,59,14.0,14.0,0.4375
2023-06-23,78,60,32.0,32.0,0.695652
2023-06-24,96,81,18.0,18.0,0.230769
2023-06-25,66,2,-30.0,-30.0,-0.3125


In [52]:
# pct_change() 와 결과 동일
df6['col1_rt'] = df6.col1 / df6.col1.shift(1) - 1
df6

Unnamed: 0,col1,col2,col1_diff,col1_gap,col1_ratio,col1_rt
2023-06-20,32,16,,,,
2023-06-21,32,25,0.0,0.0,0.0,0.0
2023-06-22,46,59,14.0,14.0,0.4375,0.4375
2023-06-23,78,60,32.0,32.0,0.695652,0.695652
2023-06-24,96,81,18.0,18.0,0.230769,0.230769
2023-06-25,66,2,-30.0,-30.0,-0.3125,-0.3125


In [53]:
# rolling() : 일정 크기의 창(window)를 이용하여 추가 메서드를 통해 계산. parameter 많으니 필요시 검색
df6.col2.rolling(window=3).sum()

2023-06-20      NaN
2023-06-21      NaN
2023-06-22    100.0
2023-06-23    144.0
2023-06-24    200.0
2023-06-25    143.0
Freq: D, Name: col2, dtype: float64

In [54]:
# expanding() : 해당 연산을 누적으로 진행
df6.col2.expanding().sum()

2023-06-20     16.0
2023-06-21     41.0
2023-06-22    100.0
2023-06-23    160.0
2023-06-24    241.0
2023-06-25    243.0
Freq: D, Name: col2, dtype: float64

### 집계 행/열 추가

In [55]:
# 데이터프레임 재활용
df7 = pd.DataFrame(dict2)
df7

Unnamed: 0,Name,Korean,Math,Society
0,San Miguel,60,90,70
1,Sauvignon Blanc,80,80,90
2,Jinro,70,80,95
3,Mak Geolli,75,50,90


In [57]:
# 컬럼 합계 추가
df7['Sum'] = df7.sum(axis=1, numeric_only=True)
df7

Unnamed: 0,Name,Korean,Math,Society,Sum
0,San Miguel,60,90,70,220
1,Sauvignon Blanc,80,80,90,250
2,Jinro,70,80,95,245
3,Mak Geolli,75,50,90,215


In [58]:
# 행 평균 추가
df7.set_index('Name', inplace=True)  # 숫자만 남도록 문자부분 인덱스 지정, 필요시 인덱스 다시 해제
df7.loc['Avg'] = df7.mean(axis=0)
df7

Unnamed: 0_level_0,Korean,Math,Society,Sum
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
San Miguel,60.0,90.0,70.0,220.0
Sauvignon Blanc,80.0,80.0,90.0,250.0
Jinro,70.0,80.0,95.0,245.0
Mak Geolli,75.0,50.0,90.0,215.0
Avg,71.25,75.0,86.25,232.5
