### 한국에서 코로나 바이러스가 발생함에 따른 NAVER에서 검색된 단어의 수치

 - csv 파일 형식
 
 - 단어 : the search volume of 'XXX' in Korean in NAVER
 
 - column 항목
 
 > 0. 날짜
 > 
 > 1. cold 라는 단어의 검색 수치
 > 
 > 2. flu 라는 단어의 검색 수치
 > 
 > 3. pneumonia 라는 단어의 검색 수치
 > 
 > 4. coronavirus 라는 단어의 검색 수치

In [5]:
# draw charts
%matplotlib notebook

from matplotlib import pyplot as plt
import numpy as np

In [6]:
# CSV 파일 읽어들이기

import csv  # csv 모듈 임포트
import pprint

# 파일 위치, 경로 정의
file_path = '01_South_Korea/trend.csv'

# patient.csv 파일 열기
with open(file_path) as file:
    reader = csv.reader(file)  # CSV 파일을 읽어들이는 읽기 객체
    raw = list(reader)      #  CSV 파일 내용을 리스트로 읽어들인다

pprint.pprint(raw[:10])  # 읽어들인 내용을 화면에 출력

[['date', 'cold', 'flu', 'pneumonia', 'coronavirus'],
 ['2019-12-07', '0.12408', '0.24699', '0.22144', '0.01309'],
 ['2019-12-08', '0.11863', '0.2539', '0.19617', '0.00836'],
 ['2019-12-09', '0.16308', '0.4788', '0.3318', '0.01272'],
 ['2019-12-10', '0.13981', '0.40789', '0.30671', '0.01881'],
 ['2019-12-11', '0.13972', '0.39007', '0.30299', '0.01854'],
 ['2019-12-12', '0.13954', '0.38226', '0.30526', '0.0199'],
 ['2019-12-13', '0.14217', '0.44098', '0.28344', '0.01881'],
 ['2019-12-14', '0.11363', '0.35898', '0.21672', '0.01499'],
 ['2019-12-15', '0.12445', '0.35189', '0.20381', '0.01163']]


In [7]:
# 안내를 위한 row를 삭제
raw_ = raw[1:]

In [11]:
date_list = []
cold_trend = []
flu_trend = []
pneumonia_trend = []
coronavirus_trend = []

for i in range(len(raw_)):
    date = raw_[i][0]
    cold_value = float(raw_[i][1])
    flu_value = float(raw_[i][2])
    pneumonia_value = float(raw_[i][3])
    coronavirus_value = float(raw_[i][4])
    
    date_list.append(date)
    cold_trend.append(cold_value)
    flu_trend.append(flu_value)
    pneumonia_trend.append(pneumonia_value)
    coronavirus_trend.append(coronavirus_value)

###### draw 'cold' word trend

In [16]:
# draw line graph
dates = date_list
counts = cold_trend

x_pos = [i for i, _ in enumerate(dates)]

dates_sorting = []
for i in range(len(dates)):
    if i % 5 == 0:
        dates_sorting.append(dates[i])
    else:
        dates_sorting.append("")

plt.plot(x_pos, counts, color='black')
plt.xlabel("date")
plt.ylabel("volume of word")
plt.title("search 'cold' in NAVER after infection COVID 19")

plt.xticks(x_pos, dates_sorting, rotation=90)

plt.show()

<IPython.core.display.Javascript object>

###### draw 'flu' word trend

In [20]:
# draw line graph
dates = date_list
counts = flu_trend

x_pos = [i for i, _ in enumerate(dates)]

dates_sorting = []
for i in range(len(dates)):
    if i % 5 == 0:
        dates_sorting.append(dates[i])
    else:
        dates_sorting.append("")

plt.plot(x_pos, counts, color='black')
plt.xlabel("date")
plt.ylabel("volume of word")
plt.title("search 'flu' in NAVER after infection COVID 19")

plt.xticks(x_pos, dates_sorting, rotation=90)

plt.show()

<IPython.core.display.Javascript object>

###### draw 'pneumonia' word trend

In [19]:
# draw line graph
dates = date_list
counts = pneumonia_trend

x_pos = [i for i, _ in enumerate(dates)]

dates_sorting = []
for i in range(len(dates)):
    if i % 5 == 0:
        dates_sorting.append(dates[i])
    else:
        dates_sorting.append("")

plt.plot(x_pos, counts, color='black')
plt.xlabel("date")
plt.ylabel("volume of word")
plt.title("search 'pneumonia' in NAVER after infection COVID 19")

plt.xticks(x_pos, dates_sorting, rotation=90)

plt.show()

<IPython.core.display.Javascript object>

###### draw 'corona virus' word trend

In [21]:
# draw line graph
dates = date_list
counts = coronavirus_trend

x_pos = [i for i, _ in enumerate(dates)]

dates_sorting = []
for i in range(len(dates)):
    if i % 5 == 0:
        dates_sorting.append(dates[i])
    else:
        dates_sorting.append("")

plt.plot(x_pos, counts, color='black')
plt.xlabel("date")
plt.ylabel("volume of word")
plt.title("search 'corona virus' in NAVER after infection COVID 19")

plt.xticks(x_pos, dates_sorting, rotation=90)

plt.show()

<IPython.core.display.Javascript object>

### data를 균일화하여 같이 확인해보기

In [23]:
# x axis dates data
dates_sorting
x_pos

# labels for legend
labels_word = ['cold', 'flu', 'pneumonia', 'coronavirus']

# each word data
cold_trend
flu_trend
pneumonia_trend
coronavirus_trend

# plot each value in word data
plt.plot(x_pos, cold_trend, color='red')
plt.plot(x_pos, flu_trend, color='green')
plt.plot(x_pos, pneumonia_trend, color='blue')
plt.plot(x_pos, coronavirus_trend, color='magenta')

# plot legend
plt.legend(labels_word)

# plot each axis info.
plt.xlabel("date")
plt.ylabel("volume of word")
plt.title("search 'word' in NAVER after infection COVID 19")
plt.xticks(x_pos, dates_sorting, rotation=90)

plt.show()

<IPython.core.display.Javascript object>

In [25]:
# x axis dates data
dates_sorting
x_pos

# labels for legend
labels_word = ['cold', 'flu', 'pneumonia', 'coronavirus']

# each word data with normalization
cold_trend_normalized = [float(i)/max(cold_trend) for i in cold_trend]
flu_trend_normalized = [float(i)/max(flu_trend) for i in flu_trend]
pneumonia_trend_normalized = [float(i)/max(pneumonia_trend) for i in pneumonia_trend]
coronavirus_trend_normalized = [float(i)/max(coronavirus_trend) for i in coronavirus_trend]

# plot each value in word data
plt.plot(x_pos, cold_trend_normalized, color='red')
plt.plot(x_pos, flu_trend_normalized, color='green')
plt.plot(x_pos, pneumonia_trend_normalized, color='blue')
plt.plot(x_pos, coronavirus_trend_normalized, color='magenta')

# plot legend
plt.legend(labels_word)

# plot each axis info.
plt.xlabel("date")
plt.ylabel("volume of word")
plt.title("search 'word' in NAVER after infection COVID 19")
plt.xticks(x_pos, dates_sorting, rotation=90)

plt.show()

<IPython.core.display.Javascript object>