데이터 프레임의 마스킹

In [20]:
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randint(1, 10, (2, 2)), index = [0, 1], columns = ['A', 'B'])
print(df) # 1~9 사이의 정수 중 2행 2열로 랜덤 추출

print(df['A'] <= 5) # 칼럼 A의 값들이 5보다 같거나 작은지

print(df.query('A <= 5 and B >= 7')) # 칼럼 A는 5보다 작으면서 칼럼 B는 8보다 큰 데이터만 추출

   A  B
0  5  7
1  5  1
0    True
1    True
Name: A, dtype: bool
   A  B
0  5  7


데이터 프레임의 개별 연산 ①

In [26]:
import pandas as pd

df = pd.DataFrame([[1, 2, 3, 4], [1, 2, 3, 4]], index = [0, 1], columns = ['A', 'B', 'C', 'D'])
print(df)

df2 = df.apply(lambda x : x + 1) # apply안에 lambda를 넣으면 행을 단위로 반복 명령 수행
print(df2)

# 단순하게 1을 더해주는 함수
def add_one(x):
    return x + 1

df3 = df2.apply(add_one) # apply안에 함수를 넣으면 데이터 프레임의 각 값에 모두 함수가 적용 됨
print(df3)

   A  B  C  D
0  1  2  3  4
1  1  2  3  4
   A  B  C  D
0  2  3  4  5
1  2  3  4  5
   A  B  C  D
0  3  4  5  6
1  3  4  5  6


데이터 프레임의 개별 연산 ②

In [32]:
import pandas as pd

df = pd.DataFrame([
    ['Apple', 'Apple', 'Carrot', 'Banana'],
    ['Durian', 'Banana', 'Apple', 'Carrot']
], index = [0, 1], columns = ['A', 'B', 'C', 'D'])

print("Original DataFrame:")
print(df)

df2 = df.replace({'Apple' : 'Airport'}) # Apple을 Airport로 

print("\nDataFrame after replacement:") # \n은 한줄 띄우는 명령어
print(df2)

Original DataFrame:
        A       B       C       D
0   Apple   Apple  Carrot  Banana
1  Durian  Banana   Apple  Carrot

DataFrame after replacement:
         A        B        C       D
0  Airport  Airport   Carrot  Banana
1   Durian   Banana  Airport  Carrot


데이터 프레임의 그룹화 ①

In [41]:
import pandas as pd

df = pd.DataFrame([['Apple', 7, 'Fruit'],
                   ['Banana', 3, 'Fruit'],
                   ['Beef', 5, 'Meal'],
                   ['Kimchi', 4, 'Meal']],
                   columns = ['Name', 'Frequency', 'Type'])

print(df)

print(df.groupby(['Type']).sum()['Frequency'])

     Name  Frequency   Type
0   Apple          7  Fruit
1  Banana          3  Fruit
2    Beef          5   Meal
3  Kimchi          4   Meal
Type
Fruit    10
Meal      9
Name: Frequency, dtype: int64


데이터 프레임의 그룹화 ②

In [66]:
import numpy as np
import pandas as pd

df = pd.DataFrame([
  ['Apple', 7, 5, 'Fruit'],
  ['Banana', 3, 6, 'Fruit'],
  ['Beef', 5, 2, 'Meal'],
  ['Kimchi', 4, 8, 'Meal']],
  columns=["Name", "Frequency", "Importance", "Type"])

print(df)

print(df.groupby('Type')[['Frequency', 'Importance']].aggregate(['min', 'max', 'mean']))

     Name  Frequency  Importance   Type
0   Apple          7           5  Fruit
1  Banana          3           6  Fruit
2    Beef          5           2   Meal
3  Kimchi          4           8   Meal
      Frequency          Importance         
            min max mean        min max mean
Type                                        
Fruit         3   7  5.0          5   6  5.5
Meal          4   5  4.5          2   8  5.0


데이터 프레임의 그룹화 ③

In [77]:
import pandas as pd

df = pd.DataFrame([
  ['Apple', 7, 5, 'Fruit'],
  ['Banana', 3, 6, 'Fruit'],
  ['Beef', 5, 2, 'Meal'],
  ['Kimchi', 4, 8, 'Meal']],
  columns=["Name", "Frequency", "Importance", "Type"])

print(df)

def my_filter(data):
    return data['Frequency'].mean() >= 5

print(df.groupby('Type').filter(my_filter))

     Name  Frequency  Importance   Type
0   Apple          7           5  Fruit
1  Banana          3           6  Fruit
2    Beef          5           2   Meal
3  Kimchi          4           8   Meal
     Name  Frequency  Importance   Type
0   Apple          7           5  Fruit
1  Banana          3           6  Fruit


데이터 프레임의 그룹화 ④

In [81]:
import pandas as pd

df = pd.DataFrame([
  ['Apple', 7, 5, 'Fruit'],
  ['Banana', 3, 6, 'Fruit'],
  ['Beef', 5, 2, 'Meal'],
  ['Kimchi', 4, 8, 'Meal']],
  columns=["Name", "Frequency", "Importance", "Type"])

print(df)

print(df.groupby('Type').get_group('Fruit'))

     Name  Frequency  Importance   Type
0   Apple          7           5  Fruit
1  Banana          3           6  Fruit
2    Beef          5           2   Meal
3  Kimchi          4           8   Meal
     Name  Frequency  Importance   Type
0   Apple          7           5  Fruit
1  Banana          3           6  Fruit


데이터 프레임의 그룹화 ⑤

In [104]:
import pandas as pd

df = pd.DataFrame([
  ['Apple', 7, 5, 'Fruit'],
  ['Banana', 3, 6, 'Fruit'],
  ['Beef', 5, 2, 'Meal'],
  ['Kimchi', 4, 8, 'Meal']],
  columns=["Name", "Frequency", "Importance", "Type"])

print(df)

# 각 그룹의 Frequency 평균 계산
frequency_mean = df.groupby('Type')['Frequency'].mean()

print('frequency_mean')
print(frequency_mean)

df['gap'] = df.apply(lambda x : x['Frequency'] - frequency_mean[x['Type']], axis = 1) # axis = 1: 행 방향

print(df)

     Name  Frequency  Importance   Type
0   Apple          7           5  Fruit
1  Banana          3           6  Fruit
2    Beef          5           2   Meal
3  Kimchi          4           8   Meal
frequency_mean
Type
Fruit    5.0
Meal     4.5
Name: Frequency, dtype: float64
     Name  Frequency  Importance   Type  gap
0   Apple          7           5  Fruit  2.0
1  Banana          3           6  Fruit -2.0
2    Beef          5           2   Meal  0.5
3  Kimchi          4           8   Meal -0.5


데이터 프레임의 다중화

In [128]:
import numpy as np
import pandas as pd
from IPython.display import display

df = pd.DataFrame(np.random.randint(1, 10, (4, 4)),
                  index = [['1차', '1차', '2차', '2차'],
                           ['공격', '수비', '공격', '수비']],
                  columns = ['1회', '2회', '3회', '4회'])

display(df)
display(df[['1회', '2회']].loc['2차'])

Unnamed: 0,Unnamed: 1,1회,2회,3회,4회
1차,공격,9,8,2,9
1차,수비,8,9,6,4
2차,공격,6,6,1,8
2차,수비,2,2,2,8


Unnamed: 0,1회,2회
공격,6,6
수비,2,2


피벗 테이블의 기초

In [131]:
import numpy as np
import pandas as pd
from IPython.display import display

df = pd.DataFrame([
    ['Apple', 7, 5, 'Fruit'],
    ['Banana', 3, 6, 'Fruit'],
    ['Coconut', 2, 6, 'Fruit'],
    ['Rice', 8, 2, 'Meal'],
    ['Beef', 5, 2, 'Meal'],
    ['Kimchi', 4, 8, 'Meal']],
   columns=["Name", "Frequency", "Importance", "Type"])

display(df)

df_pivot = df.pivot_table(
    index = 'Importance',
    columns = 'Type',
    values = 'Frequency'
)

display(df_pivot)

Unnamed: 0,Name,Frequency,Importance,Type
0,Apple,7,5,Fruit
1,Banana,3,6,Fruit
2,Coconut,2,6,Fruit
3,Rice,8,2,Meal
4,Beef,5,2,Meal
5,Kimchi,4,8,Meal


Type,Fruit,Meal
Importance,Unnamed: 1_level_1,Unnamed: 2_level_1
2,,6.5
5,7.0,
6,2.5,
8,,4.0
