In [1]:
import pandas as pd
import numpy as np

In [39]:
arr = ([25, '남', 185, 89],
      [19, '여', 163, 53],
      [27, '남', 177, 63],)
index = (['이겨레', '조약돌', '강산애'])

df = pd.DataFrame(arr, index = index, columns = ['나이','성별', '키', '체중'])

df

Unnamed: 0,나이,성별,키,체중
0,25,남,185,89
1,19,여,163,53
2,27,남,177,63


In [40]:
arr = (['이겨레', 25, '남', 185, 89],
      ['조약돌', 19, '여', 163, 53],
      ['강산애', 27, '남', 177, 63],)

df = pd.DataFrame(arr, index = index, columns = ['성명', '나이','성별', '키', '체중'])

df

Unnamed: 0,성명,나이,성별,키,체중
이겨레,이겨레,25,남,185,89
조약돌,조약돌,19,여,163,53
강산애,강산애,27,남,177,63


### 행과 열 위치 변경

In [6]:
df.T

Unnamed: 0,이겨레,조약돌,강산애
나이,25,19,27
성별,남,여,남
키,185,163,177
체중,89,53,63


### 행 삭제

In [7]:
df01 = df.copy()

In [10]:
df01.drop(['이겨레', '조약돌'], inplace = True)

In [11]:
df01

Unnamed: 0,나이,성별,키,체중
강산애,27,남,177,63


### 열 삭제

In [13]:
df02 = df.copy()
df02. drop(['나이', '성별'], axis= 1, inplace=True)

df02

Unnamed: 0,키,체중
이겨레,185,89
조약돌,163,53
강산애,177,63


### excel, csv, json 로 저장하기

In [14]:
df.to_excel('df.patient.xlsx')

In [16]:
df.to_csv('df.patient.csv', encoding='cp949')

In [20]:
df.to_json('df.patient.json')

In [24]:
exWriter = pd.ExcelWriter('excelWriter.xlsx')
df01.to_excel(exWriter, sheet_name="01")
df02.to_excel(exWriter, sheet_name="02")

In [25]:
bio_data = {'성명' : ['이겨레','조약돌'],
           '나이' : [25, 19], '성별' : ['남', '여'],
           '키' : [185, 163], '체중' : [89, 53]}

pulse_data = {'환자코드' : ['A00301', 'D00809'],
             '수축기' : [123, 118], '이완기' : [88, 79]}

In [26]:
df_01 = pd.DataFrame(bio_data)
df_01.set_index('성명', inplace=True)

df_02 = pd.DataFrame(pulse_data)
df_02.set_index('환자코드', inplace=True)

d_save = pd.ExcelWriter('df_blood.xlsx')
df_01.to_excel(d_save, sheet_name='생체 데이터')
df_02.to_excel(d_save, sheet_name='혈압 데이터')
d_save.save()

display(df_01)
display(df_02)

Unnamed: 0_level_0,나이,성별,키,체중
성명,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
이겨레,25,남,185,89
조약돌,19,여,163,53


Unnamed: 0_level_0,수축기,이완기
환자코드,Unnamed: 1_level_1,Unnamed: 2_level_1
A00301,123,88
D00809,118,79


### 파일 읽어오기

In [27]:
data = './file/health_data.xlsx'

df_01 = pd.read_excel(data)
df_02 = pd.read_excel(data, header=None)

In [28]:
df_01

Unnamed: 0,코드번호,성별,키,체중,BMI,수축기,이완기
0,33001,남,175,65,21.22,143,110
1,34037,여,165,52,19.1,119,78
2,35123,여,159,67,26.5,130,83


In [29]:
df_02

Unnamed: 0,0,1,2,3,4,5,6
0,코드번호,성별,키,체중,BMI,수축기,이완기
1,33001,남,175,65,21.22,143,110
2,34037,여,165,52,19.1,119,78
3,35123,여,159,67,26.5,130,83


In [31]:
data = './file/health_data_01.csv'
df_03 = pd.read_csv(data, encoding='cp949')

In [32]:
df_03

Unnamed: 0,코드번호,성별,키,체중,BMI,수축기,이완기
0,33001,남,175,65,21.22,143,110
1,34037,여,165,52,19.1,119,78
2,35123,여,159,67,26.5,130,83


In [34]:
data = './file/health_data_02.csv'
df_04 = pd.read_csv(data, encoding='utf-8')

In [35]:
df_04

Unnamed: 0,코드번호,성별,키,체중,BMI,수축기,이완기
0,33001,남,175,65,21.22,143,110
1,34037,여,165,52,19.1,119,78
2,35123,여,159,67,26.5,130,83


In [36]:
data_json = './file/df_patient.json'
df_05 = pd.read_json(data_json, encoding='cp949')

In [37]:
df_05

Unnamed: 0,나이,성별,키,체중
이겨레,25,남,185,89
조약돌,19,여,163,53


In [41]:
df

Unnamed: 0,성명,나이,성별,키,체중
이겨레,이겨레,25,남,185,89
조약돌,조약돌,19,여,163,53
강산애,강산애,27,남,177,63


In [48]:
dic_data = {'A': [10, 20, 30], 'B':[40, 50, 60], 'C':[70,80,90]}

df = pd.DataFrame(dic_data, index=['R0', 'R1', 'R2'])

df

Unnamed: 0,A,B,C
R0,10,40,70
R1,20,50,80
R2,30,60,90


### reindex()

In [49]:
new_index = ['R0', 'R1', 'R2', 'R3', 'R4', 'R5']
df_01 = df.reindex(new_index)

df_02 = df.reindex(new_index, fill_value=0)

In [51]:
display(df_01)
display(df_02)

Unnamed: 0,A,B,C
R0,10.0,40.0,70.0
R1,20.0,50.0,80.0
R2,30.0,60.0,90.0
R3,,,
R4,,,
R5,,,


Unnamed: 0,A,B,C
R0,10,40,70
R1,20,50,80
R2,30,60,90
R3,0,0,0
R4,0,0,0
R5,0,0,0


### reset_index()

In [43]:
df.reset_index()

Unnamed: 0,index,A,B,C
0,R0,10,40,70
1,R1,20,50,80
2,R2,30,60,90


In [52]:
data = './file/member.csv'

df_member = pd.read_csv(data)

In [53]:
df_member

Unnamed: 0,코드,이름,성별,연령,혈액형,키,체중,BMI,운동시작일
0,330531,이겨레,남,26,A,181,76,23.2,2021-01-01
1,340037,조약돌,여,25,B,185,89,26.0,2020-12-25
2,194563,강산애,남,27,A,177,63,20.11,
3,503546,원미연,여,48,O,166,55,19.96,2021-03-05
4,330543,오대양,남,31,AB,193,95,25.5,
5,341257,송가인,여,45,O,157,48,19.47,
6,472358,성공찬,여,53,B,159,50,19.78,1997-10-24
7,185823,황산성,남,52,B,178,87,27.46,
8,232305,신영웅,남,19,AB,163,53,19.95,2021-05-08
9,489602,최고봉,남,64,A,168,57,20.2,1988-08-15


In [54]:
display(df_member.head(2))
display(df_member.tail())

Unnamed: 0,코드,이름,성별,연령,혈액형,키,체중,BMI,운동시작일
0,330531,이겨레,남,26,A,181,76,23.2,2021-01-01
1,340037,조약돌,여,25,B,185,89,26.0,2020-12-25


Unnamed: 0,코드,이름,성별,연령,혈액형,키,체중,BMI,운동시작일
5,341257,송가인,여,45,O,157,48,19.47,
6,472358,성공찬,여,53,B,159,50,19.78,1997-10-24
7,185823,황산성,남,52,B,178,87,27.46,
8,232305,신영웅,남,19,AB,163,53,19.95,2021-05-08
9,489602,최고봉,남,64,A,168,57,20.2,1988-08-15


In [55]:
df_member.shape

(10, 9)

In [56]:
df_member.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 9 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   코드      10 non-null     int64  
 1   이름      10 non-null     object 
 2   성별      10 non-null     object 
 3   연령      10 non-null     int64  
 4   혈액형     10 non-null     object 
 5   키       10 non-null     int64  
 6   체중      10 non-null     int64  
 7   BMI     10 non-null     float64
 8   운동시작일   6 non-null      object 
dtypes: float64(1), int64(4), object(4)
memory usage: 848.0+ bytes


In [61]:
df_member.성별.dtypes

dtype('O')

In [63]:
df_member.연령.dtypes

dtype('int64')

In [64]:
df_member.운동시작일.dtypes

dtype('O')

In [65]:
df_member.describe()

Unnamed: 0,코드,연령,키,체중,BMI
count,10.0,10.0,10.0,10.0,10.0
mean,342056.5,39.0,172.7,67.3,22.163
std,117049.733168,15.202339,11.898179,17.807926,3.086555
min,185823.0,19.0,157.0,48.0,19.47
25%,256861.5,26.25,163.75,53.5,19.9525
50%,335290.0,38.0,172.5,60.0,20.155
75%,439582.75,51.0,180.25,84.25,24.925
max,503546.0,64.0,193.0,95.0,27.46


In [67]:
df_member.describe(include='all')

Unnamed: 0,코드,이름,성별,연령,혈액형,키,체중,BMI,운동시작일
count,10.0,10,10,10.0,10,10.0,10.0,10.0,6
unique,,10,2,,4,,,,6
top,,이겨레,남,,A,,,,1997-10-24
freq,,1,6,,3,,,,1
mean,342056.5,,,39.0,,172.7,67.3,22.163,
std,117049.733168,,,15.202339,,11.898179,17.807926,3.086555,
min,185823.0,,,19.0,,157.0,48.0,19.47,
25%,256861.5,,,26.25,,163.75,53.5,19.9525,
50%,335290.0,,,38.0,,172.5,60.0,20.155,
75%,439582.75,,,51.0,,180.25,84.25,24.925,


In [74]:
df_loc = df_member.describe(include='all')
df_loc.iloc[0:3]

Unnamed: 0,코드,이름,성별,연령,혈액형,키,체중,BMI,운동시작일
count,10.0,10,10,10.0,10,10.0,10.0,10.0,6
unique,,10,2,,4,,,,6
top,,이겨레,남,,A,,,,1997-10-24


In [77]:
df_loc.loc['50%', '키']

172.5

In [78]:
df_loc.iloc[0, 3]

10.0

In [80]:
df_loc.iloc[2, 8]

'1997-10-24'

In [81]:
df_loc.loc['top', '운동시작일']

'1997-10-24'

### 데이터 개수 count()

In [82]:
df_member.count()

코드       10
이름       10
성별       10
연령       10
혈액형      10
키        10
체중       10
BMI      10
운동시작일     6
dtype: int64

In [88]:
df_member['혈액형'].value_counts()

A     3
B     3
O     2
AB    2
Name: 혈액형, dtype: int64

In [86]:
df_member['운동시작일'].value_counts(dropna=True)

1997-10-24    1
1988-08-15    1
2020-12-25    1
2021-05-08    1
2021-03-05    1
2021-01-01    1
Name: 운동시작일, dtype: int64

### 평균값과 중간값

In [105]:
df_member.mean()

코드     342056.500
연령         39.000
키         172.700
체중         67.300
BMI        22.163
dtype: float64

In [111]:
df_member[['BMI']].mean()

BMI    22.163
dtype: float64

In [113]:
df_member[['키', '체중']].mean()

키     172.7
체중     67.3
dtype: float64

In [114]:
df_member.median()

코드     335290.000
연령         38.000
키         172.500
체중         60.000
BMI        20.155
dtype: float64

In [117]:
df_member[['키']].median()

키    172.5
dtype: float64

In [116]:
df_member[['키','체중']].median()

키     172.5
체중     60.0
dtype: float64

### 최대값과 최소값

In [119]:
df_member['연령'].max()

64

In [120]:
df_member[['키', '체중']].max()

키     193
체중     95
dtype: int64

In [121]:
df_member['연령'].min()

19

In [122]:
df_member[['키', '체중']].min()

키     157
체중     48
dtype: int64

### 표준편차와 상관계수

In [123]:
df_member.std()

코드     117049.733168
연령         15.202339
키          11.898179
체중         17.807926
BMI         3.086555
dtype: float64

In [125]:
df_member[['키', '체중']].std()

키     11.898179
체중    17.807926
dtype: float64

In [126]:
df_member.corr()

Unnamed: 0,코드,연령,키,체중,BMI
코드,1.0,0.572318,-0.343057,-0.37751,-0.397892
연령,0.572318,1.0,-0.426925,-0.316438,-0.176555
키,-0.343057,-0.426925,1.0,0.942296,0.796228
체중,-0.37751,-0.316438,0.942296,1.0,0.95022
BMI,-0.397892,-0.176555,0.796228,0.95022,1.0


In [128]:
df_member[['키', '체중']].corr()

Unnamed: 0,키,체중
키,1.0,0.942296
체중,0.942296,1.0
