# DataFrame 예제 코드 - 추가, 수정, 삭제

In [104]:
import pandas as pd

In [105]:
df = pd.DataFrame({
    'name':     ['Celine','James','Jason','Hans'],
    'nation':   ['France','UK','USA','Germany'],
    'english':  [90,80,80,60],
    'math':     [100,40,40,50],
    'science':  [100,30,60,90]
})
df.index = ['a','b','c','d']
df

Unnamed: 0,name,nation,english,math,science
a,Celine,France,90,100,100
b,James,UK,80,40,30
c,Jason,USA,80,40,60
d,Hans,Germany,60,50,90


## 추가

### 컬럼 추가

In [106]:
df['korean'] = 80
df

Unnamed: 0,name,nation,english,math,science,korean
a,Celine,France,90,100,100,80
b,James,UK,80,40,30,80
c,Jason,USA,80,40,60,80
d,Hans,Germany,60,50,90,80


In [107]:
df['physics'] = [100,80,60,70]
df

Unnamed: 0,name,nation,english,math,science,korean,physics
a,Celine,France,90,100,100,80,100
b,James,UK,80,40,30,80,80
c,Jason,USA,80,40,60,80,60
d,Hans,Germany,60,50,90,80,70


### 행(row) 추가

In [5]:
df.loc['e'] = 5
df

Unnamed: 0,name,nation,english,math,science,korean,physics
a,Celine,France,90,100,100,80,100
b,James,UK,80,40,30,80,80
c,Jason,USA,80,40,60,80,60
d,Hans,Germany,60,50,90,80,70
e,5,5,5,5,5,5,5


In [108]:
df.loc['e'] = ['Vesper','USA',100,80,100,90,90]
df

Unnamed: 0,name,nation,english,math,science,korean,physics
a,Celine,France,90,100,100,80,100
b,James,UK,80,40,30,80,80
c,Jason,USA,80,40,60,80,60
d,Hans,Germany,60,50,90,80,70
e,Vesper,USA,100,80,100,90,90


## 자르기

In [109]:
# 행(row) 자르기
df_j = df.truncate(before='b',after='c', axis=0)
df_j

Unnamed: 0,name,nation,english,math,science,korean,physics
b,James,UK,80,40,30,80,80
c,Jason,USA,80,40,60,80,60


## Cell 값 수정

In [110]:
print(df.loc['b','nation'])
df.loc['b', 'nation'] = 'USA'
df

UK


Unnamed: 0,name,nation,english,math,science,korean,physics
a,Celine,France,90,100,100,80,100
b,James,USA,80,40,30,80,80
c,Jason,USA,80,40,60,80,60
d,Hans,Germany,60,50,90,80,70
e,Vesper,USA,100,80,100,90,90


In [111]:
df['nation']['b'] = 'Germany'
df

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df['nation']['b'] = 'Germany'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['nation']['b'] = 'Germany'


Unnamed: 0,name,nation,english,math,science,korean,physics
a,Celine,France,90,100,100,80,100
b,James,Germany,80,40,30,80,80
c,Jason,USA,80,40,60,80,60
d,Hans,Germany,60,50,90,80,70
e,Vesper,USA,100,80,100,90,90


In [112]:
df.loc[['a','b'], 'physics'] = [90,90]
df

Unnamed: 0,name,nation,english,math,science,korean,physics
a,Celine,France,90,100,100,80,90
b,James,Germany,80,40,30,80,90
c,Jason,USA,80,40,60,80,60
d,Hans,Germany,60,50,90,80,70
e,Vesper,USA,100,80,100,90,90


### replace() 메소드

In [114]:
df_replaced = df.replace(to_replace='USA', value='America')
df_replaced

Unnamed: 0,name,nation,english,math,science,korean,physics
a,Celine,France,90,100,100,80,90
b,James,Germany,80,40,30,80,90
c,Jason,America,80,40,60,80,60
d,Hans,Germany,60,50,90,80,70
e,Vesper,America,100,80,100,90,90


## row/column 위치 변경(Pivoting)

In [10]:
# 전치 행렬 
df2 = df.transpose()
df2

Unnamed: 0,a,b,c,d,e
name,Celine,James,Jason,Hans,Vesper
nation,France,Germany,USA,Germany,USA
english,90,80,80,60,100
math,100,40,40,50,80
science,100,30,60,90,100
korean,80,80,80,80,90
physics,90,90,60,70,90


## 삭제

### 행 삭제

In [11]:
# drop() 메소드의 axis 파리미터로 0을 주면 row 삭제
df2 = df.drop('d', axis=0)
df2

Unnamed: 0,name,nation,english,math,science,korean,physics
a,Celine,France,90,100,100,80,90
b,James,Germany,80,40,30,80,90
c,Jason,USA,80,40,60,80,60
e,Vesper,USA,100,80,100,90,90


In [12]:
# 첫 번째 파라미터로 list를 전달하면 여러 목록을 삭제
df2.drop(['c','e'], axis=0, inplace=True)
df2

Unnamed: 0,name,nation,english,math,science,korean,physics
a,Celine,France,90,100,100,80,90
b,James,Germany,80,40,30,80,90


In [13]:
# index 속성으로 슬라이싱 사용
df2.drop(df2.index[0:1], inplace=True)
df2

Unnamed: 0,name,nation,english,math,science,korean,physics
b,James,Germany,80,40,30,80,90


#### 열(column) 삭제

In [14]:
# drop() 메소드의 axis 파리미터로 1을 주면 column 삭제
df2 = df.drop('physics', axis=1)
df2

Unnamed: 0,name,nation,english,math,science,korean
a,Celine,France,90,100,100,80
b,James,Germany,80,40,30,80
c,Jason,USA,80,40,60,80
d,Hans,Germany,60,50,90,80
e,Vesper,USA,100,80,100,90


In [15]:
# 첫 번째 파라미터로 list를 전달하면 여러 목록을 삭제
df2.drop(['science','korean'], axis=1, inplace=True)
df2

Unnamed: 0,name,nation,english,math
a,Celine,France,90,100
b,James,Germany,80,40
c,Jason,USA,80,40
d,Hans,Germany,60,50
e,Vesper,USA,100,80


In [16]:
# columns 속성을 사용하여 슬라이싱
df2.drop(df2.columns[2:3], axis=1, inplace=True)
df2

Unnamed: 0,name,nation,math
a,Celine,France,100
b,James,Germany,40
c,Jason,USA,40
d,Hans,Germany,50
e,Vesper,USA,80


### del 문 사용

In [17]:
# del문을 사용하여 컬럼 삭제
del df2['math']
df2

Unnamed: 0,name,nation
a,Celine,France
b,James,Germany
c,Jason,USA
d,Hans,Germany
e,Vesper,USA


### 열 꺼내기 - pop() 메소드

In [None]:
nation = df2.pop('nation')
nation

a     France
b    Germany
c        USA
d    Germany
e        USA
Name: nation, dtype: object

## 복사

### DataFrame 복사 - copy() 메소드

In [None]:
# shallow copy
df_copy = df
df_copy

Unnamed: 0,name,nation,english,math,science,korean,physics
a,Celine,France,90,100,100,80,90
b,James,Germany,80,40,30,80,90
c,Jason,USA,80,40,60,80,60
d,Hans,Germany,60,50,90,80,70
e,Vesper,USA,100,80,100,90,90


In [39]:
# 원본 값이 변경되면 복사본의 값도 변경됨
df['nation']['b'] = 'Korea'

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df['nation']['b'] = 'Korea'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['nation']['b'] = 'Korea'


In [40]:
df_copy

Unnamed: 0,name,nation,english,math,science,korean,physics
a,Celine,France,90,100,100,80,90
b,James,Korea,80,40,30,80,90
c,Jason,USA,80,40,60,80,60
d,Hans,Germany,60,50,90,80,70
e,Vesper,USA,100,80,100,90,90


In [41]:
# 복사
df_copy = df.copy(deep=True)

In [42]:
df['nation']['b'] = 'Germany'
df_copy['nation']['b']

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df['nation']['b'] = 'Germany'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['nation']['b'] = 'Germany'


'Korea'

## 중복행 제거

In [81]:
df.loc[:][['name','nation','english','math','science']]
df_2 = df.copy()
df_duf = pd.concat([df, df_2])
df_duf


Unnamed: 0,name,nation,english,math,science
a,Celine,France,90,100,100
b,James,Germany,80,40,30
c,Jason,USA,80,40,60
d,Hans,Germany,60,50,90
a,Celine,France,90,100,100
b,James,Germany,80,40,30
c,Jason,USA,80,40,60
d,Hans,Germany,60,50,90


In [83]:
df_duf.drop_duplicates(inplace=True)
df_duf

Unnamed: 0,name,nation,english,math,science
a,Celine,France,90,100,100
b,James,Germany,80,40,30
c,Jason,USA,80,40,60
d,Hans,Germany,60,50,90


## 피봇 변환

In [92]:
df.pivot(index='nation', columns='name', values='english')

name,Celine,Hans,James,Jason
nation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
France,90.0,,,
Germany,,60.0,80.0,
USA,,,,80.0


In [100]:
df.pivot_table(index='name', values=['english','math'],aggfunc='sum')

Unnamed: 0_level_0,english,math
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Celine,90,100
Hans,60,50
James,80,40
Jason,80,40
