## 판다스?

테이터프레임(엑셀과 비슷) 과 시리즈라는 자료형  
데이터 분석을 위한 다양한 기능을 제공하는 라이브러리  
R의 데이터프레임에 영향  
내부적으로 numpy 를 사용하므로 함께 import  

http://pandas.pydata.org

기능요약  
https://pandas.pydata.org/Pandas_Cheat_Sheet.pdf


# Series

In [5]:
import pandas as pd
import numpy as np

In [6]:
x = [1, 2, 3, 4, 5]
pd.Series(x)

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [7]:
x = np.array([1, 2, 3, 4, 5])
pd.Series(x)

0    1
1    2
2    3
3    4
4    5
dtype: int32

In [8]:
x = [1, 2, 3, 4, 5]
x = pd.Series(x)

print(x[0])
print(x[1: 3])
print(x[::-1])
print(x[x > 3])
print(x + 1)
print(x * 10)
print(x + x)

1
1    2
2    3
dtype: int64
4    5
3    4
2    3
1    2
0    1
dtype: int64
3    4
4    5
dtype: int64
0    2
1    3
2    4
3    5
4    6
dtype: int64
0    10
1    20
2    30
3    40
4    50
dtype: int64
0     2
1     4
2     6
3     8
4    10
dtype: int64


# Series Index

In [9]:
import pandas as pd
import numpy as np

x = [1, 2, 3, 4, 5]
x = pd.Series(x)
print(x.index)
print(x.values)

RangeIndex(start=0, stop=5, step=1)
[1 2 3 4 5]


In [10]:
x = [1, 2, 3, 4, 5]
x = pd.Series(x, index=['a','b','c','d','e']) #인덱스설정
print(x)

print(x['a']) #명시적 인덱스접근
print(x[0]) #묵시적 인덱스접근
print(x[['a','e']]) #팬시색인, 한번에 여러값 접근
print(x.a)

a    1
b    2
c    3
d    4
e    5
dtype: int64
1
1
a    1
e    5
dtype: int64
1


In [11]:
x = [1, 2, 3, 4, 5]
x = pd.Series(x, index=[1, 2, 3, 4, 5])

# x[0] 오류
print(x.iloc[0]) #묵시적 인덱스로만
print(x.loc[1]) #명시적 인덱스로만

1
1


## 딕셔너리 to Series

In [12]:
x = {"수학":90, "영어":80, "과학":95, "미술":80}
x = pd.Series(x)
x

수학    90
영어    80
과학    95
미술    80
dtype: int64

In [13]:
print(x['수학'])

90


In [14]:
print(x['영어':])

영어    80
과학    95
미술    80
dtype: int64


In [15]:
x = {"수학":90, "영어":80, "과학":95, "미술":80}
x = pd.Series(x, index=["수학", "영어", "과학"])
x

수학    90
영어    80
과학    95
dtype: int64

## Multi Index

In [16]:
student_1 = {"수학":90, "영어":80, "과학":95, "미술":80}
student_2 = {"수학":70, "영어":90, "과학":100, "미술":70}

#index_1 = ['홍길동','홍길동','홍길동','홍길동','이몽룡','이몽룡','이몽룡','이몽룡']
index_1 = ['홍길동' for i in range(len(student_1))] + ['이몽룡' for i in range(len(student_2))]

#index_2 = ['수학','영어','과학','미술','수학','영어','과학','미술']
index_2 = [i for i in student_1] + [i for i in student_2]

value_all = list(student_1.values()) + list(student_2.values())

students = pd.Series(value_all, index=[index_1, index_2])
students

홍길동  수학     90
     영어     80
     과학     95
     미술     80
이몽룡  수학     70
     영어     90
     과학    100
     미술     70
dtype: int64

In [17]:
students['홍길동']

수학    90
영어    80
과학    95
미술    80
dtype: int64

In [18]:
students['이몽룡']

수학     70
영어     90
과학    100
미술     70
dtype: int64

## 결측값 처리방법 (NaN. None)

In [19]:
x = [1, None, 2, None, 3, 4, None]
x = pd.Series(x)
print(x)

print(x.sum())
print(x.max())
print(x.min())

#결측값 개수
print(x.isnull().sum())
print(x.notnull().sum())

#결측값 제거
print(x.dropna())

#결측값을 다른값으로 채우기
x.fillna(0)

0    1.0
1    NaN
2    2.0
3    NaN
4    3.0
5    4.0
6    NaN
dtype: float64
10.0
4.0
1.0
3
4
0    1.0
2    2.0
4    3.0
5    4.0
dtype: float64


0    1.0
1    0.0
2    2.0
3    0.0
4    3.0
5    4.0
6    0.0
dtype: float64

## concat

In [20]:
x = pd.Series([1, 2, 3])
y = pd.Series([4, 5, 6])
z = pd.Series([7, 8, 9])

pd.concat([x, y, z])

0    1
1    2
2    3
0    4
1    5
2    6
0    7
1    8
2    9
dtype: int64

In [21]:
pd.concat([x, y, z], verify_integrity=True) #같은 값의 인덱스가 있을경우 오류

ValueError: Indexes have overlapping values: Int64Index([0, 1, 2], dtype='int64')

In [22]:
pd.concat([x, y, z], verify_integrity=False, ignore_index=True)

0    1
1    2
2    3
3    4
4    5
5    6
6    7
7    8
8    9
dtype: int64

In [23]:
pd.concat([x, y, z], verify_integrity=False, ignore_index=True, axis=1)

Unnamed: 0,0,1,2
0,1,4,7
1,2,5,8
2,3,6,9


In [24]:
x = pd.Series([1, 2, 3, 4])
y = pd.Series([4, 5, 6])
z = pd.Series([7, 8, 9])

In [25]:
pd.concat([x, y, z], verify_integrity=False, ignore_index=True, axis=1, join='inner')

Unnamed: 0,0,1,2
0,1,4,7
1,2,5,8
2,3,6,9


In [26]:
pd.concat([x, y, z], verify_integrity=False, ignore_index=True, axis=1, join='outer')

Unnamed: 0,0,1,2
0,1,4.0,7.0
1,2,5.0,8.0
2,3,6.0,9.0
3,4,,


## 연산과 집계함수

In [27]:
x = pd.Series([1, 2, 3, 4, 5])
y = pd.Series([6, 7, 8, 9, 0])

#더하기
print(x.add(10))
print(x.add(y))

#빼기
print(x.sub(y))

#곱하기
print(x.mul(y))

#나누기
print(x.floordiv(2))
print(x.div(2))
print(x.mod(2))

#제곱
print(x.pow(2))

#기초통계
print(x.count())
print(x.min())
print(x.max())
print(x.mean())
print(x.median()) #중간값
print(x.sum())
print(x.std()) # 표준편차
print(x.var()) # 분산
print(x.mad()) # 절대표준편차
print(x.describe()) #기초통계모두

print(x.head(2)) #앞의 일부데이터 확인
print(x.tail(2)) #뒤의 일부데이터 확인

0    11
1    12
2    13
3    14
4    15
dtype: int64
0     7
1     9
2    11
3    13
4     5
dtype: int64
0   -5
1   -5
2   -5
3   -5
4    5
dtype: int64
0     6
1    14
2    24
3    36
4     0
dtype: int64
0    0
1    1
2    1
3    2
4    2
dtype: int64
0    0.5
1    1.0
2    1.5
3    2.0
4    2.5
dtype: float64
0    1
1    0
2    1
3    0
4    1
dtype: int64
0     1
1     4
2     9
3    16
4    25
dtype: int64
5
1
5
3.0
3.0
15
1.5811388300841898
2.5
1.2
count    5.000000
mean     3.000000
std      1.581139
min      1.000000
25%      2.000000
50%      3.000000
75%      4.000000
max      5.000000
dtype: float64
0    1
1    2
dtype: int64
3    4
4    5
dtype: int64


## 실습1

In [33]:
x = [np.nan,1,2,3,4,5]
y = pd.Series ([x[ np.random.randint (0,6)] for i in range(20)])
print('y:', y.isna().sum())
y = y.fillna(y.mean())
print(y)

y: 5
0     1.000000
1     1.000000
2     2.666667
3     5.000000
4     4.000000
5     2.666667
6     2.000000
7     4.000000
8     5.000000
9     2.666667
10    4.000000
11    2.666667
12    3.000000
13    2.666667
14    1.000000
15    1.000000
16    2.000000
17    1.000000
18    5.000000
19    1.000000
dtype: float64


# DataFrame

In [34]:
sales_data = {    
    '연도':[2015, 2016, 2017, 2018, 2019, 2020],
    '판매량':[103, 70, 130, 160, 190, 230],
    '매출':[500000, 300000, 400000, 550000, 700000, 680000],
    '순이익':[370000, 190000, 300000, 480000, 600000, 590000]
}

sales_data = pd.DataFrame(sales_data)
sales_data

Unnamed: 0,연도,판매량,매출,순이익
0,2015,103,500000,370000
1,2016,70,300000,190000
2,2017,130,400000,300000
3,2018,160,550000,480000
4,2019,190,700000,600000
5,2020,230,680000,590000


In [35]:
sales_data['판매량']

0    103
1     70
2    130
3    160
4    190
5    230
Name: 판매량, dtype: int64

In [36]:
sales_data.iloc[1]

연도       2016
판매량        70
매출     300000
순이익    190000
Name: 1, dtype: int64

In [37]:
sales_data = {    
    '연도':[2015, 2016, 2017, 2018, 2019, 2020],
    '판매량':[103, 70, 130, 160, 190, 230],
    '매출':[500000, 300000, 400000, 550000, 700000, 680000],
    '순이익':[370000, 190000, 300000, 480000, 600000, 590000]
}

pd.DataFrame(sales_data, columns=['판매량','매출','순이익'], index=sales_data['연도'])

Unnamed: 0,판매량,매출,순이익
2015,103,500000,370000
2016,70,300000,190000
2017,130,400000,300000
2018,160,550000,480000
2019,190,700000,600000
2020,230,680000,590000


## 파일생성

In [38]:
%%writefile sales_data.csv
연도,판매량,매출,순이익
2015,103,500000,370000
2016,70,300000,190000
2017,130,400000,300000
2018,60,550000,480000
2019,190,700000,600000
2020,230,680000,590000

Overwriting sales_data.csv


## CSV 파일읽기

In [39]:
sales_data = pd.read_csv('sales_data.csv', index_col='연도', header=0, sep=',')
sales_data

Unnamed: 0_level_0,판매량,매출,순이익
연도,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015,103,500000,370000
2016,70,300000,190000
2017,130,400000,300000
2018,60,550000,480000
2019,190,700000,600000
2020,230,680000,590000


## CSV 파일쓰기

In [40]:
sales_data.to_csv('sales_data_save.csv', encoding='utf-8-sig')

## 데이터 추가

In [41]:
sales_data = {    
    '연도':[2015, 2016, 2017, 2018, 2019, 2020],
    '판매량':[103, 70, 130, 160, 190, 230],
    '매출':[500000, 300000, 400000, 550000, 700000, 680000],
    '순이익':[370000, 190000, 300000, 480000, 600000, 590000]
}

sales_data = pd.DataFrame(sales_data)
sales_data

Unnamed: 0,연도,판매량,매출,순이익
0,2015,103,500000,370000
1,2016,70,300000,190000
2,2017,130,400000,300000
3,2018,160,550000,480000
4,2019,190,700000,600000
5,2020,230,680000,590000


In [42]:
sales_data['순이익율'] = (sales_data['순이익']/sales_data['매출']) * 100
sales_data

Unnamed: 0,연도,판매량,매출,순이익,순이익율
0,2015,103,500000,370000,74.0
1,2016,70,300000,190000,63.333333
2,2017,130,400000,300000,75.0
3,2018,160,550000,480000,87.272727
4,2019,190,700000,600000,85.714286
5,2020,230,680000,590000,86.764706


In [43]:
def check(n):
    if n > 80:
        return '높음'
    else:
        return '낮음'

sales_data['순이익율_비교'] = sales_data['순이익율'].apply(check)
#sales_data['순이익율_비교'] = sales_data['순이익율'].apply(lambda n: '높음' if n > 80 else '낮음')
sales_data

Unnamed: 0,연도,판매량,매출,순이익,순이익율,순이익율_비교
0,2015,103,500000,370000,74.0,낮음
1,2016,70,300000,190000,63.333333,낮음
2,2017,130,400000,300000,75.0,낮음
3,2018,160,550000,480000,87.272727,높음
4,2019,190,700000,600000,85.714286,높음
5,2020,230,680000,590000,86.764706,높음


## 데이터 추가 2

In [44]:
sales_data = {    
    '연도':[2015, 2016, 2017, 2018, 2019, 2020],
    '판매량':[103, 70, 130, 160, 190, 230],
    '매출':[500000, 300000, 400000, 550000, 700000, 680000],
    '순이익':[370000, 190000, 300000, 480000, 600000, 590000]
}

sales_data = pd.DataFrame(sales_data)
sales_data

Unnamed: 0,연도,판매량,매출,순이익
0,2015,103,500000,370000
1,2016,70,300000,190000
2,2017,130,400000,300000
3,2018,160,550000,480000
4,2019,190,700000,600000
5,2020,230,680000,590000


In [45]:
sales_data[sales_data['매출'] > 300000]

Unnamed: 0,연도,판매량,매출,순이익
0,2015,103,500000,370000
2,2017,130,400000,300000
3,2018,160,550000,480000
4,2019,190,700000,600000
5,2020,230,680000,590000


In [46]:
sales_data['테스트1'] = np.where(sales_data['판매량'] > 200, 0, sales_data['판매량'])
sales_data

Unnamed: 0,연도,판매량,매출,순이익,테스트1
0,2015,103,500000,370000,103
1,2016,70,300000,190000,70
2,2017,130,400000,300000,130
3,2018,160,550000,480000,160
4,2019,190,700000,600000,190
5,2020,230,680000,590000,0


In [47]:
sales_data['테스트2'] = sales_data[sales_data['판매량'] < 100]['판매량'] + 50
sales_data

Unnamed: 0,연도,판매량,매출,순이익,테스트1,테스트2
0,2015,103,500000,370000,103,
1,2016,70,300000,190000,70,120.0
2,2017,130,400000,300000,130,
3,2018,160,550000,480000,160,
4,2019,190,700000,600000,190,
5,2020,230,680000,590000,0,


In [48]:
sales_data.loc[6] = [2021, 720000,650000, 360, 0, 0]
sales_data

Unnamed: 0,연도,판매량,매출,순이익,테스트1,테스트2
0,2015,103,500000,370000,103,
1,2016,70,300000,190000,70,120.0
2,2017,130,400000,300000,130,
3,2018,160,550000,480000,160,
4,2019,190,700000,600000,190,
5,2020,230,680000,590000,0,
6,2021,720000,650000,360,0,0.0


In [49]:
sales_data.loc[7] = sales_data.loc[5] + 100
sales_data

Unnamed: 0,연도,판매량,매출,순이익,테스트1,테스트2
0,2015.0,103.0,500000.0,370000.0,103.0,
1,2016.0,70.0,300000.0,190000.0,70.0,120.0
2,2017.0,130.0,400000.0,300000.0,130.0,
3,2018.0,160.0,550000.0,480000.0,160.0,
4,2019.0,190.0,700000.0,600000.0,190.0,
5,2020.0,230.0,680000.0,590000.0,0.0,
6,2021.0,720000.0,650000.0,360.0,0.0,0.0
7,2120.0,330.0,680100.0,590100.0,100.0,


## 데이터 삭제

In [50]:
sales_data = {    
    '연도':[2015, 2016, 2017, 2018, 2019, 2020],
    '판매량':[103, 70, 130, 160, 190, 230],
    '매출':[500000, 300000, 400000, 550000, 700000, 680000],
    '순이익':[370000, 190000, 300000, 480000, 600000, 590000]
}

sales_data = pd.DataFrame(sales_data)
sales_data['테스트1'] = 'test1'
sales_data['테스트2'] = 'test2'
sales_data['테스트3'] = 'test3'

sales_data

Unnamed: 0,연도,판매량,매출,순이익,테스트1,테스트2,테스트3
0,2015,103,500000,370000,test1,test2,test3
1,2016,70,300000,190000,test1,test2,test3
2,2017,130,400000,300000,test1,test2,test3
3,2018,160,550000,480000,test1,test2,test3
4,2019,190,700000,600000,test1,test2,test3
5,2020,230,680000,590000,test1,test2,test3


In [51]:
del sales_data['테스트1']
sales_data

Unnamed: 0,연도,판매량,매출,순이익,테스트2,테스트3
0,2015,103,500000,370000,test2,test3
1,2016,70,300000,190000,test2,test3
2,2017,130,400000,300000,test2,test3
3,2018,160,550000,480000,test2,test3
4,2019,190,700000,600000,test2,test3
5,2020,230,680000,590000,test2,test3


In [52]:
sales_data.drop(['테스트2'], axis='columns', inplace=True)
sales_data

Unnamed: 0,연도,판매량,매출,순이익,테스트3
0,2015,103,500000,370000,test3
1,2016,70,300000,190000,test3
2,2017,130,400000,300000,test3
3,2018,160,550000,480000,test3
4,2019,190,700000,600000,test3
5,2020,230,680000,590000,test3


In [53]:
sales_data.drop(['테스트3'], axis='columns', inplace=True)
sales_data

Unnamed: 0,연도,판매량,매출,순이익
0,2015,103,500000,370000
1,2016,70,300000,190000
2,2017,130,400000,300000
3,2018,160,550000,480000
4,2019,190,700000,600000
5,2020,230,680000,590000


In [54]:
sales_data.drop(sales_data.columns[[0, 2]], axis='columns', inplace=True)
sales_data

Unnamed: 0,판매량,순이익
0,103,370000
1,70,190000
2,130,300000
3,160,480000
4,190,600000
5,230,590000


In [55]:
sales_data.drop(0, inplace=True)
sales_data

Unnamed: 0,판매량,순이익
1,70,190000
2,130,300000
3,160,480000
4,190,600000
5,230,590000


In [56]:
sales_data.drop([3, 4, 5], inplace=True)
sales_data

Unnamed: 0,판매량,순이익
1,70,190000
2,130,300000


## Dataframe MultiIndex

In [57]:
df = pd.DataFrame(np.random.randint(1, 100, size=(4, 4)), index=[['A','A','B','B'],['a','b','a','b']], columns=[['가가','가가','나나','나나'],['가','나','가','나']])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,가가,가가,나나,나나
Unnamed: 0_level_1,Unnamed: 1_level_1,가,나,가,나
A,a,54,31,40,19
A,b,5,1,58,88
B,a,12,75,79,20
B,b,24,18,70,3


In [58]:
df['가가']

Unnamed: 0,Unnamed: 1,가,나
A,a,54,31
A,b,5,1
B,a,12,75
B,b,24,18


In [59]:
df.loc['A']

Unnamed: 0_level_0,가가,가가,나나,나나
Unnamed: 0_level_1,가,나,가,나
a,54,31,40,19
b,5,1,58,88


In [60]:
df.index

MultiIndex([('A', 'a'),
            ('A', 'b'),
            ('B', 'a'),
            ('B', 'b')],
           )

In [61]:
df.columns

MultiIndex([('가가', '가'),
            ('가가', '나'),
            ('나나', '가'),
            ('나나', '나')],
           )

## GroupBy

In [62]:
df = pd.DataFrame(np.random.randint(1, 100, size=(8, 2)), index=[['A창고','A창고','A창고','A창고','B창고','B창고','B창고','B창고'],['사과','배','바나나','사과','사과','배','바나나','배']], columns=['판매','재고'])
df.index.names = ['창고명','상품명']
df

Unnamed: 0_level_0,Unnamed: 1_level_0,판매,재고
창고명,상품명,Unnamed: 2_level_1,Unnamed: 3_level_1
A창고,사과,46,9
A창고,배,66,15
A창고,바나나,44,49
A창고,사과,72,38
B창고,사과,60,92
B창고,배,38,60
B창고,바나나,60,20
B창고,배,61,55


In [63]:
df.groupby('창고명').sum()

Unnamed: 0_level_0,판매,재고
창고명,Unnamed: 1_level_1,Unnamed: 2_level_1
A창고,228,111
B창고,219,227


In [64]:
df.groupby('창고명').max()

Unnamed: 0_level_0,판매,재고
창고명,Unnamed: 1_level_1,Unnamed: 2_level_1
A창고,72,49
B창고,61,92


In [65]:
df.groupby('창고명').min()

Unnamed: 0_level_0,판매,재고
창고명,Unnamed: 1_level_1,Unnamed: 2_level_1
A창고,44,9
B창고,38,20


In [66]:
df.groupby('상품명').sum()

Unnamed: 0_level_0,판매,재고
상품명,Unnamed: 1_level_1,Unnamed: 2_level_1
바나나,104,69
배,165,130
사과,178,139


In [67]:
df.groupby(['창고명','상품명']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,판매,재고
창고명,상품명,Unnamed: 2_level_1,Unnamed: 3_level_1
A창고,바나나,44,49
A창고,배,66,15
A창고,사과,118,47
B창고,바나나,60,20
B창고,배,99,115
B창고,사과,60,92


## sort_values

In [68]:
df = pd.DataFrame(np.random.randint(1, 100, size=(8, 2)), index=[['A창고','A창고','A창고','A창고','B창고','B창고','B창고','B창고'],['사과','배','바나나','사과','사과','배','바나나','배']], columns=['판매','재고'])
df

Unnamed: 0,Unnamed: 1,판매,재고
A창고,사과,80,63
A창고,배,55,9
A창고,바나나,66,88
A창고,사과,81,65
B창고,사과,50,73
B창고,배,73,98
B창고,바나나,80,26
B창고,배,14,25


In [69]:
df.sort_values(by="판매", ascending=True)

Unnamed: 0,Unnamed: 1,판매,재고
B창고,배,14,25
B창고,사과,50,73
A창고,배,55,9
A창고,바나나,66,88
B창고,배,73,98
A창고,사과,80,63
B창고,바나나,80,26
A창고,사과,81,65


In [70]:
df.sort_values(by=["판매", "재고"], ascending=[True, False])

Unnamed: 0,Unnamed: 1,판매,재고
B창고,배,14,25
B창고,사과,50,73
A창고,배,55,9
A창고,바나나,66,88
B창고,배,73,98
A창고,사과,80,63
B창고,바나나,80,26
A창고,사과,81,65


## 실습2

In [105]:
import numpy as np
data1 = np.random.randint(1, 101, size=(15, 3))
data1
index1 = []
for i in [1, 2, 3]:
    temp = [str(i) + '학년'] * 5
    index1 = index1 + temp
index2 = [str(i) + '반' for i in range(1,6)]
index2 = index2 * 3

df1 = pd.DataFrame(data1, index = [index1, index2], columns = ['국어', '영어', '과학'])
df1['총점'] = df1['국어'] + df1['영어'] + df1['과학']
df1['평균'] = round(df1['총점'] / 3, 2)
display(df1)
df1 = df1.reset_index()
del df1['level_1']
df1 = df1.rename( columns = {'level_0' : '학년'})
df1.index = df1['학년']
del df1['학년']
df1.groupby('학년').sum()

Unnamed: 0,Unnamed: 1,국어,영어,과학,총점,평균
1학년,1반,2,88,46,136,45.33
1학년,2반,93,73,37,203,67.67
1학년,3반,19,23,91,133,44.33
1학년,4반,14,83,17,114,38.0
1학년,5반,72,86,34,192,64.0
2학년,1반,66,24,44,134,44.67
2학년,2반,62,2,32,96,32.0
2학년,3반,49,57,31,137,45.67
2학년,4반,47,35,94,176,58.67
2학년,5반,7,43,14,64,21.33


Unnamed: 0_level_0,국어,영어,과학,총점,평균
학년,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1학년,200,353,225,778,259.33
2학년,231,161,215,607,202.34
3학년,205,140,207,552,184.0


## 실습3

In [147]:
df1 = pd.read_csv('train.csv')
df1['Age'] = df1['Age'].fillna(df1.Age.mean())
df1['Cabin'] = df1['Cabin'].fillna('N')
df1['Embarked'] = df1['Embarked'].fillna('N')
display(df1)
# p1
print(df1.groupby('Sex')['Survived'].count())

# p2
print(df1.groupby('Pclass')['Survived'].count())

# p3
df1['Age_Range'] = ['유아' if df1.loc[i,'Age'] < 10 else 
                   '10대' if df1.loc[i,'Age'] <20 else
                  '20대' if df1.loc[i,'Age'] <30 else 
                  '30대' if df1.loc[i,'Age'] <40 else 
                  '40대' if df1.loc[i,'Age'] <50 else 
                  '50대' if df1.loc[i,'Age'] <60 else 
                  '60대' if df1.loc[i,'Age'] <70 else '노인'
                   for i in range(len(df1))]
print(df1.groupby('Age_Range')['Survived'].count())

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.000000,1,0,A/5 21171,7.2500,N,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.000000,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.000000,0,0,STON/O2. 3101282,7.9250,N,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.000000,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.000000,0,0,373450,8.0500,N,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.000000,0,0,211536,13.0000,N,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.000000,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,29.699118,1,2,W./C. 6607,23.4500,N,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.000000,0,0,111369,30.0000,C148,C


Sex
female    314
male      577
Name: Survived, dtype: int64
Pclass
1    216
2    184
3    491
Name: Survived, dtype: int64
Age_Range
10대    102
20대    397
30대    167
40대     89
50대     48
60대     19
노인       7
유아      62
Name: Survived, dtype: int64


In [119]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB
