In [None]:
!sudo apt-get install -y fonts-nanum
!sudo fc-cache -fv
!rm ~/.cache/matplotlib -rf

import matplotlib.pyplot as plt
import matplotlib.font_manager as fm

plt.rcParams['font.family'] = 'NanumBarunGothic'
plt.rcParams['axes.unicode_minus'] = False  # 음수 부호 깨짐 방지

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data = pd.read_csv('NHISS-Health-1000.csv')

heightdata = data['height']
weightdata = data['weight']
plt.figure(figsize=(10,4))
plt.scatter(heightdata, weightdata)
plt.xlabel('키')
plt.ylabel('몸무게')
plt.title('2020 건강검진 산점도 그래프')
plt.grid()
plt.show()

In [None]:
HDLdata = data['HDL']
LDLdata = data['LDL']
cholesteroldata = data['cholesterol']

plt.figure(figsize=(10,6))
plt.scatter(cholesteroldata, LDLdata, color='r', edgecolor='w', label='Cholesterol*LDL')
plt.scatter(HDLdata, cholesteroldata, color='g', edgecolor='w', label='HDL*Cholesterol')
plt.scatter(HDLdata, LDLdata, color='b', edgecolor='w', label='HDL*LDL')

plt.xlim(0,500)
plt.ylim(0,400)
plt.xlabel('HDL')
plt.ylabel('LDL')
plt.title('2020년 건강검진 HDL-LDL 산점도 그래프')
plt.legend()
plt.grid()
plt.show()

In [None]:
mandata = data.loc[data.gender==1.0, ['gender','height']]

plt.figure(figsize=(10,5))
# np.histogram으로 빈도수와 간격을 구함
plt.hist(mandata['height'], bins=np.arange(140, 201, 5), label='남성') # 2단위로 bins 설정
plt.xlim(140, 200)
plt.ylim(0, 160)
plt.xlabel('키')
plt.ylabel('빈도수')
plt.title('2020년 건강검진 남성 키 히스토그램')
plt.legend()
plt.grid()
plt.show()

In [None]:
mandata = data.loc[data.gender==1.0,['gender','height']]
womandata = data.loc[data.gender==2.0,['gender','height']]
plt.figure(figsize=(10,6))
plt.hist(mandata['height'], bins=20, alpha=0.5, label='Man')
plt.hist(womandata['height'], bins=20, alpha=0.5, label='Woman')
plt.xlim(120,200)
plt.ylim(0,180)
plt.xlabel('키')
plt.ylabel('빈도수')
plt.title('2020년 건강검진 성별-키 히스토그램')
plt.legend()
plt.grid()
plt.show()

In [None]:
mandata = data.loc[data.gender==1.0, ['gender','weight']]
womandata = data.loc[data.gender==2.0, ['gender','weight']]
print(mandata.shape)
womandata.shape

In [None]:
female = np.array(womandata['weight'])
male = np.array(mandata['weight'])
plt.figure(figsize=(4,6))
plt.boxplot([female,male], labels=['여성','남성'])
plt.ylim(30,130)
plt.xlabel('성별')
plt.ylabel('몸무게')
plt.title('2020년 건강검진 남성 & 여성 몸무게 상자수염 그래프')
plt.show()

In [None]:
plt.axis('equal')
data = [2812, 967, 306, 69, 710, 29]
categories = ['뇌질환 등', '치매', '파킨슨병', '알츠하이머', '기타', '중풍후유증']
plt.pie(data, labels=categories, autopct='%d%%', startangle=90)
plt.title('서울 지역  65세 이상 남성 주요 질병 비율 ')
plt.show()

## Seaborn 실습

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.rc('font', family='NanumBarunGothic')
data = pd.read_csv('NHISS-Health-1000.csv')

In [None]:
data6 = data.loc[:,['gender', 'height', 'weight', 'waist', 'drinking', 'smoking']]
data6.loc[data6['gender']==1,['gender']]='Male'
data6.loc[data6['gender']==2,['gender']]='Female'
data6.loc[data6['drinking']==0,['drinking']]='Non-drinking'
data6.loc[data6['drinking']==1,['drinking']]='Drinking'
data6.loc[(data6['smoking']==1) | (data6['smoking']==2),['smoking']]='Non-smoking'
data6.loc[data6['smoking']==3,['smoking']]='Smoking'

drinking = data6.groupby(['gender', 'drinking'])['drinking'].count()
smoking = data6.groupby(['gender', 'smoking'])['smoking'].count()
drinking = drinking.to_frame(name='count')
smoking = smoking.to_frame(name='count')
drinking = drinking.reset_index()
smoking = smoking.reset_index()

In [None]:
fig = plt.figure(figsize=(17,6))
area1 = fig.add_subplot(1,2,1)
area2 = fig.add_subplot(1,2,2)
ax1 = sns.barplot(data=drinking,x='gender',y='count',hue='drinking',ax=area1)
ax2 = sns.barplot(data=smoking,x='gender',y='count',hue='smoking',ax=area2)
fig.suptitle('2020년 건강검진 음주 및 흡연 여부 시본 막대 그래프',fontweight='bold')
area1.set_title('Drinking Type')
area2.set_title('Smoking Type')
plt.show()

In [None]:
plt.figure(figsize=(5,3))
plt.title('음주 시본 카운트 플롯 그래프',fontweight='bold')
sns.countplot(data=data6, x='drinking', hue='gender')
plt.show()

In [None]:
plt.figure(figsize=(6,3))
plt.title('흡연 시본 카운트 플롯 그래프',fontweight='bold')
sns.countplot(data=data6, y='smoking', hue='gender', order=['Smoking','Non-smoking'])
plt.show()

In [None]:
male_data = data6.loc[data6.gender=='Male',['gender','weight','waist','drinking','smoking']]
female_data = data6.loc[data6.gender=='Female',['gender','weight','waist','drinking','smoking']]
print(min(female_data['waist']), max(male_data['waist']))

In [None]:
plt.figure(figsize=(10,5))
plt.title('몸무게-허리둘레 시본 스트립 플롯 그래프')
sns.stripplot(data=male_data, x='waist', y='weight')
sns.stripplot(data=female_data, x='waist', y='weight', )
plt.xticks(np.arange(0,128,63.5), labels=[50, 90, 130])
plt.show()

In [None]:
plt.figure(figsize=(10,5))
plt.title('몸무게-허리둘레 스트립 플롯 그래프-팔레트 설정')
sns.stripplot(data=male_data, x='waist', y='weight', hue='gender', palette='dark')
sns.stripplot(data=female_data, x='waist', y='weight', hue='gender', palette='Set1')
plt.xticks(np.arange(0,127,63), labels=[53, 90.5, 128])
plt.show()

In [None]:
male_data_100 = male_data.head(100)
female_data_100 = female_data.head(100)

plt.figure(figsize=(10,5))
plt.title('허리-몸무게 시본 스웜 플롯 그래프')
sns.swarmplot(data=male_data_100,x='waist',y='weight',hue='gender',palette='dark',size=4)
sns.swarmplot(data=female_data_100,x='waist',y='weight',hue='gender',palette='Set1',size=4)
plt.xticks(np.arange(0,46,22.5), labels=[53, 75.5, 98])
plt.show()

In [None]:
plt.figure(figsize=(10,6))
plt.title('음주와 몸무게 시본 박스 플롯 그래프')
sns.boxplot(data=data6, x='drinking', y='weight', hue='gender')
plt.ylim(30,100)
plt.show()

In [None]:
plt.figure(figsize=(10,6))
plt.title('흡연와 몸무게 관계 수평 시본 박스 플롯 그래프')
sns.boxplot(data=data6, x='waist', y='drinking', hue='gender', orient='h')
plt.xlim(50,110)
plt.show()

In [None]:
plt.figure(figsize=(10,6))
plt.title('성별에 따른 음주 여부와 몸무게 바이올린 플롯 그래프')
sns.violinplot(data=data6[data6.weight<100], x='gender', y='weight', hue='drinking')
plt.show()

In [None]:
plt.figure(figsize=(8,5))
plt.title('성별에 따른 음주 여부와 허리둘레 수평 바이올린 플롯 그래프')
sns.violinplot(data=data6[data6.waist<120], y='gender', x='waist', hue='drinking')
plt.show()

In [None]:
data3 = data.loc[:,['gender','weight','waist']]
data8 = data.loc[:,['gender','weight','waist','drinking','smoking','cholesterol','HDL','LDL']]

In [None]:
plt.figure(figsize=(10,6))
plt.title('3×3 히트맵 만들기')
correlation_data3 = data3.corr()
sns.heatmap(correlation_data3, annot=True, cmap='YlGnBu')
plt.show()

In [None]:
plt.figure(figsize=(13,10))
plt.title('8×8 히트맵 만들기')
correlation_data8= data8.corr()
upp_mat = np.triu(correlation_data8)
sns.heatmap(correlation_data8, annot=True, cmap='RdYlGn', mask=upp_mat)
plt.show()

In [None]:
fg = sns.FacetGrid(data6, col='drinking', height=5, aspect=1.3)
fg.map(sns.histplot, 'waist', bins=10, color='g', kde=True)
plt.show()

In [None]:
fg = sns.FacetGrid(data6, row='gender', col='drinking', height=4.5, aspect=1.5)
fg.map(sns.histplot, 'waist', bins=10, color='deeppink', kde=True)
plt.show()

In [None]:
color = ['#00994C', '#FF007F']
pp = sns.PairGrid(data6, hue='gender', palette=color, height=3.3, aspect=1.3)
pp.map_diag(sns.histplot, bins=10)
pp.map_offdiag(sns.scatterplot)
pp.add_legend()
plt.show()