# Pandas 심화 (2020-2학기 인사이트 교육세션)

- groupby
- merge
- pivot_table
- stack, unstack

In [2]:
import pandas as pd

In [3]:
# csv 파일 불러오기
data_path = 'data/'

seoulfloating = pd.read_csv(data_path + 'SeoulFloating.csv')
patientInfo = pd.read_csv(data_path + 'PatientInfo.csv')
seoulfloating.head()

Unnamed: 0,date,hour,birth_year,sex,province,city,fp_num
0,2020-01-01,0,20,female,Seoul,Dobong-gu,19140
1,2020-01-01,0,20,male,Seoul,Dobong-gu,19950
2,2020-01-01,0,20,female,Seoul,Dongdaemun-gu,25450
3,2020-01-01,0,20,male,Seoul,Dongdaemun-gu,27050
4,2020-01-01,0,20,female,Seoul,Dongjag-gu,28880


## groupby

- 명목변수를 기준으로 그룹별로 데이터를 묶어주는 함수


1. agg
2. lambda
3. apply

### DataFrame.groupby('컬럼명') 으로 사용
- groupby 객체를 리턴해주기 때문에 원하는 값을 조회하려면 추가적인 함수를 사용해야함
- 여러개의 컬럼을 기준으로 그룹핑하려면
- DataFrame.groupby(['컬럼명1','컬럼명2']) 로 사용해주면 됨



- DataFrame.groupby('컬럼명').함수() 처럼 사용하면
    - 컬럼별로 데이터를 묶어주고 함수를 적용시켜줌

In [4]:
# groupby 함수로 날짜별 합계
# groupby 객체를 리턴해줌

seoulfloating.groupby('date')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x00000246D408D408>

In [5]:
# 원하는 데이터를 조회하기 위해서 추가적인 함수를 사용해야함
# 날짜별로 합계를 조회함

seoulfloating.groupby('date').sum()

Unnamed: 0_level_0,hour,birth_year,fp_num
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-01,82800,324000,192406170
2020-01-02,82800,324000,202929360
2020-01-03,82800,324000,203337150
2020-01-04,82800,324000,197396050
2020-01-05,82800,324000,195747670
...,...,...,...
2020-05-27,82800,324000,199293080
2020-05-28,82800,324000,199209840
2020-05-29,40800,229500,140847320
2020-05-30,82800,324000,189674150


### agg

- 각종 통계량을 보는 함수
- DataFrame.groupby('컬럼').agg([함수1, 함수2, ...])


- 기본적으로 간단한 통계함수(평균, 합, 최댓값, 최솟값, 표준편차)는 지원해줌

In [6]:
# 합계, 평균, 최댓값, 최솟값, 표준편차

seoulfloating.groupby('date').agg(['mean', 'sum', 'max', 'min', 'std'])['fp_num']

Unnamed: 0_level_0,mean,sum,max,min,std
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-01,26723.079167,192406170,58790,5000,10141.901583
2020-01-02,28184.633333,202929360,121050,4970,13123.840968
2020-01-03,28241.270833,203337150,122400,5190,13203.603225
2020-01-04,27416.118056,197396050,79880,5200,10884.040223
2020-01-05,27187.176389,195747670,65310,5040,10517.133129
...,...,...,...,...,...
2020-05-27,27679.594444,199293080,120120,4670,13360.498096
2020-05-28,27668.033333,199209840,119810,4650,13358.392621
2020-05-29,27617.121569,140847320,117730,4670,13565.792431
2020-05-30,26343.631944,189674150,74900,4560,11076.653088


In [7]:
# 사용자 정의 함수 가능

def my_mean(x):
    return sum(x) / len(x)

# list로 안받아주면 Series로 출력
seoul_mean = seoulfloating.groupby('city')['fp_num'].agg([my_mean])
seoul_mean

Unnamed: 0_level_0,my_mean
city,Unnamed: 1_level_1
Dobong-gu,20048.519082
Dongdaemun-gu,24221.652148
Dongjag-gu,26070.77111
Eunpyeong-gu,28041.551899
Gangbuk-gu,19911.660444
Gangdong-gu,27832.631361
Gangnam-gu,48831.048811
Gangseo-gu,36899.837528
Geumcheon-gu,18908.174087
Guro-gu,29579.639795


## 실습1. SeoulFloating 데이터에서 유동인구를 연령별로 조회

In [8]:
# 아래와 같은 결과가 나오면 됩니다!!
a = seoulfloating.groupby('birth_year').agg('sum')
pd.DataFrame(a['fp_num'])

Unnamed: 0_level_0,fp_num
birth_year,Unnamed: 1_level_1
20,5441769700
30,6069018790
40,6054699830
50,5675739830
60,3706127050
70,2805416690


### lambda

- 함수를 간단히 한줄로 표현가능
- lambda 변수1, 변수2, ... : 변수들의 연산

In [9]:
# lambda 예시 : 두 수의 곱셈을 구현

f = lambda x, y : x*y
f(2,5)

10

In [10]:
# lambda를 이용한 유동인구 평균 구하기

seoulfloating.groupby('city').agg([lambda x: (sum(x) / len(x))])['fp_num']

Unnamed: 0_level_0,<lambda>
city,Unnamed: 1_level_1
Dobong-gu,20048.519082
Dongdaemun-gu,24221.652148
Dongjag-gu,26070.77111
Eunpyeong-gu,28041.551899
Gangbuk-gu,19911.660444
Gangdong-gu,27832.631361
Gangnam-gu,48831.048811
Gangseo-gu,36899.837528
Geumcheon-gu,18908.174087
Guro-gu,29579.639795


### apply

- agg는 여러함수를 한번에 처리가능, groupby 객체에 최적화
- 통계량을 다루기 때문에 데이터type중 숫자형만 가능
- apply는 dataframe형식에 최적화


- DataFrame.apply(함수, axis) 으로 사용
- axis는 데이터의 처리방향 결정 0은 행방향, 1은 열방향 보통 1을 많이 씀

In [11]:
seoul_mean

Unnamed: 0_level_0,my_mean
city,Unnamed: 1_level_1
Dobong-gu,20048.519082
Dongdaemun-gu,24221.652148
Dongjag-gu,26070.77111
Eunpyeong-gu,28041.551899
Gangbuk-gu,19911.660444
Gangdong-gu,27832.631361
Gangnam-gu,48831.048811
Gangseo-gu,36899.837528
Geumcheon-gu,18908.174087
Guro-gu,29579.639795


In [12]:
# lambda를 이용해 3개의 조건을 이용한 함수를 만듦
# axis = 1로 설정해 열방향으로 처리

seoul_mean.apply(lambda x : '상' if x['my_mean'] > 40000
                else '중' if x['my_mean'] > 20000
                else '하', axis = 1).to_frame()

Unnamed: 0_level_0,0
city,Unnamed: 1_level_1
Dobong-gu,중
Dongdaemun-gu,중
Dongjag-gu,중
Eunpyeong-gu,중
Gangbuk-gu,하
Gangdong-gu,중
Gangnam-gu,상
Gangseo-gu,중
Geumcheon-gu,하
Guro-gu,중


In [13]:
# series 데이터 타입에 apply를 적용하는 법

seoul_mean['순위'] = seoul_mean['my_mean'].apply(lambda x : '상' if x > 40000
                else '중' if x>20000
                else '하')
seoul_mean

Unnamed: 0_level_0,my_mean,순위
city,Unnamed: 1_level_1,Unnamed: 2_level_1
Dobong-gu,20048.519082,중
Dongdaemun-gu,24221.652148,중
Dongjag-gu,26070.77111,중
Eunpyeong-gu,28041.551899,중
Gangbuk-gu,19911.660444,하
Gangdong-gu,27832.631361,중
Gangnam-gu,48831.048811,상
Gangseo-gu,36899.837528,중
Geumcheon-gu,18908.174087,하
Guro-gu,29579.639795,중


In [14]:
# 위 함수는 다음과 같음
# apply 역시 함수를 정의해서 사용가능

def rank(x):
    if x > 40000:
        return '상'
    elif x > 20000:
        return '중'
    else:
        return '하'
    
seoul_mean['my_mean'].apply([rank])

Unnamed: 0_level_0,rank
city,Unnamed: 1_level_1
Dobong-gu,중
Dongdaemun-gu,중
Dongjag-gu,중
Eunpyeong-gu,중
Gangbuk-gu,하
Gangdong-gu,중
Gangnam-gu,상
Gangseo-gu,중
Geumcheon-gu,하
Guro-gu,중


## 실습2. SeoulFloating 데이터에서 마포구의 유동인구를 날짜와 연령별로 조회

In [15]:
# 아래와 같은 결과가 나오면 됩니다!!
a = seoulfloating.copy()
a = a[a['city'] == 'Mapo-gu']
pd.DataFrame(a.groupby(['date','birth_year']).sum()['fp_num'])

Unnamed: 0_level_0,Unnamed: 1_level_0,fp_num
date,birth_year,Unnamed: 2_level_1
2020-01-01,20,1660930
2020-01-01,30,1637350
2020-01-01,40,1492290
2020-01-01,50,1267310
2020-01-01,60,854060
...,...,...
2020-05-31,30,1740330
2020-05-31,40,1500480
2020-05-31,50,1247850
2020-05-31,60,777610


## merge

- 두개의 테이블을 같은 컬럼을 기준으로 병합


- pd.merge(데이터프레임1, 데이터프레임2, on = '컬럼명', how = '')
    - how에 들어갈 인자는 inner, outer, left, right 등등이 있다
    - default 값은 inner

In [17]:
# 각종 지역정보를 담고있는 데이터

region = pd.read_csv(data_path + 'Region.csv')
region.head()

Unnamed: 0,code,province,city,latitude,longitude,elementary_school_count,kindergarten_count,university_count,academy_ratio,elderly_population_ratio,elderly_alone_ratio,nursing_home_count
0,10000,Seoul,Seoul,37.566953,126.977977,607,830,48,1.44,15.38,5.8,22739
1,10010,Seoul,Gangnam-gu,37.518421,127.047222,33,38,0,4.18,13.17,4.3,3088
2,10020,Seoul,Gangdong-gu,37.530492,127.123837,27,32,0,1.54,14.55,5.4,1023
3,10030,Seoul,Gangbuk-gu,37.639938,127.025508,14,21,0,0.67,19.49,8.5,628
4,10040,Seoul,Gangseo-gu,37.551166,126.849506,36,56,1,1.17,14.39,5.7,1080


In [18]:
# 환자정보 데이터

patientInfo.head()

Unnamed: 0,patient_id,sex,age,country,province,city,infection_case,infected_by,contact_number,symptom_onset_date,confirmed_date,released_date,deceased_date,state
0,1000000001,male,50s,Korea,Seoul,Gangseo-gu,overseas inflow,,75,2020-01-22,2020-01-23,2020-02-05,,released
1,1000000002,male,30s,Korea,Seoul,Jungnang-gu,overseas inflow,,31,,2020-01-30,2020-03-02,,released
2,1000000003,male,50s,Korea,Seoul,Jongno-gu,contact with patient,2002000001.0,17,,2020-01-30,2020-02-19,,released
3,1000000004,male,20s,Korea,Seoul,Mapo-gu,overseas inflow,,9,2020-01-26,2020-01-30,2020-02-15,,released
4,1000000005,female,20s,Korea,Seoul,Seongbuk-gu,contact with patient,1000000002.0,2,,2020-01-31,2020-02-24,,released


In [19]:
# 환자정보중 서울거주자만 조회

seoul_patientInfo = patientInfo[patientInfo['province'] == 'Seoul']
seoul_patientInfo.head()

Unnamed: 0,patient_id,sex,age,country,province,city,infection_case,infected_by,contact_number,symptom_onset_date,confirmed_date,released_date,deceased_date,state
0,1000000001,male,50s,Korea,Seoul,Gangseo-gu,overseas inflow,,75,2020-01-22,2020-01-23,2020-02-05,,released
1,1000000002,male,30s,Korea,Seoul,Jungnang-gu,overseas inflow,,31,,2020-01-30,2020-03-02,,released
2,1000000003,male,50s,Korea,Seoul,Jongno-gu,contact with patient,2002000001.0,17,,2020-01-30,2020-02-19,,released
3,1000000004,male,20s,Korea,Seoul,Mapo-gu,overseas inflow,,9,2020-01-26,2020-01-30,2020-02-15,,released
4,1000000005,female,20s,Korea,Seoul,Seongbuk-gu,contact with patient,1000000002.0,2,,2020-01-31,2020-02-24,,released


In [24]:
# city를 기준으로 확진자 수 계산

seoul_patients = seoul_patientInfo.groupby('city').count()[['patient_id']].reset_index()
seoul_patients

Unnamed: 0,city,patient_id
0,Dobong-gu,62
1,Dongdaemun-gu,38
2,Dongjak-gu,53
3,Eunpyeong-gu,53
4,Gangbuk-gu,24
5,Gangdong-gu,35
6,Gangnam-gu,83
7,Gangseo-gu,78
8,Geumcheon-gu,32
9,Guro-gu,85


In [25]:
# 서울지역의 데이터 조회 

region_seoul = region[region['province'] == 'Seoul']
region_seoul

Unnamed: 0,code,province,city,latitude,longitude,elementary_school_count,kindergarten_count,university_count,academy_ratio,elderly_population_ratio,elderly_alone_ratio,nursing_home_count
0,10000,Seoul,Seoul,37.566953,126.977977,607,830,48,1.44,15.38,5.8,22739
1,10010,Seoul,Gangnam-gu,37.518421,127.047222,33,38,0,4.18,13.17,4.3,3088
2,10020,Seoul,Gangdong-gu,37.530492,127.123837,27,32,0,1.54,14.55,5.4,1023
3,10030,Seoul,Gangbuk-gu,37.639938,127.025508,14,21,0,0.67,19.49,8.5,628
4,10040,Seoul,Gangseo-gu,37.551166,126.849506,36,56,1,1.17,14.39,5.7,1080
5,10050,Seoul,Gwanak-gu,37.47829,126.951502,22,33,1,0.89,15.12,4.9,909
6,10060,Seoul,Gwangjin-gu,37.538712,127.082366,22,33,3,1.16,13.75,4.8,723
7,10070,Seoul,Guro-gu,37.495632,126.88765,26,34,3,1.0,16.21,5.7,741
8,10080,Seoul,Geumcheon-gu,37.456852,126.895229,18,19,0,0.96,16.15,6.7,475
9,10090,Seoul,Nowon-gu,37.654259,127.056294,42,66,6,1.39,15.4,7.4,952


In [26]:
# 교집합을 조회하는 inner join

pd.merge(region_seoul, seoul_patients, on='city')
# pd.merge(region_seoul, seoul_patients, on='city', how = 'inner') 과 결과가 같음

Unnamed: 0,code,province,city,latitude,longitude,elementary_school_count,kindergarten_count,university_count,academy_ratio,elderly_population_ratio,elderly_alone_ratio,nursing_home_count,patient_id
0,10010,Seoul,Gangnam-gu,37.518421,127.047222,33,38,0,4.18,13.17,4.3,3088,83
1,10020,Seoul,Gangdong-gu,37.530492,127.123837,27,32,0,1.54,14.55,5.4,1023,35
2,10030,Seoul,Gangbuk-gu,37.639938,127.025508,14,21,0,0.67,19.49,8.5,628,24
3,10040,Seoul,Gangseo-gu,37.551166,126.849506,36,56,1,1.17,14.39,5.7,1080,78
4,10050,Seoul,Gwanak-gu,37.47829,126.951502,22,33,1,0.89,15.12,4.9,909,113
5,10060,Seoul,Gwangjin-gu,37.538712,127.082366,22,33,3,1.16,13.75,4.8,723,17
6,10070,Seoul,Guro-gu,37.495632,126.88765,26,34,3,1.0,16.21,5.7,741,85
7,10080,Seoul,Geumcheon-gu,37.456852,126.895229,18,19,0,0.96,16.15,6.7,475,32
8,10090,Seoul,Nowon-gu,37.654259,127.056294,42,66,6,1.39,15.4,7.4,952,43
9,10100,Seoul,Dobong-gu,37.668952,127.047082,23,26,1,0.95,17.89,7.2,485,62


In [27]:
# left outer join
# seoul_patients에는 없지만 region_seoul에 있는데이터도 출력

pd.merge(region_seoul, seoul_patients, on='city', how = 'left')

Unnamed: 0,code,province,city,latitude,longitude,elementary_school_count,kindergarten_count,university_count,academy_ratio,elderly_population_ratio,elderly_alone_ratio,nursing_home_count,patient_id
0,10000,Seoul,Seoul,37.566953,126.977977,607,830,48,1.44,15.38,5.8,22739,
1,10010,Seoul,Gangnam-gu,37.518421,127.047222,33,38,0,4.18,13.17,4.3,3088,83.0
2,10020,Seoul,Gangdong-gu,37.530492,127.123837,27,32,0,1.54,14.55,5.4,1023,35.0
3,10030,Seoul,Gangbuk-gu,37.639938,127.025508,14,21,0,0.67,19.49,8.5,628,24.0
4,10040,Seoul,Gangseo-gu,37.551166,126.849506,36,56,1,1.17,14.39,5.7,1080,78.0
5,10050,Seoul,Gwanak-gu,37.47829,126.951502,22,33,1,0.89,15.12,4.9,909,113.0
6,10060,Seoul,Gwangjin-gu,37.538712,127.082366,22,33,3,1.16,13.75,4.8,723,17.0
7,10070,Seoul,Guro-gu,37.495632,126.88765,26,34,3,1.0,16.21,5.7,741,85.0
8,10080,Seoul,Geumcheon-gu,37.456852,126.895229,18,19,0,0.96,16.15,6.7,475,32.0
9,10090,Seoul,Nowon-gu,37.654259,127.056294,42,66,6,1.39,15.4,7.4,952,43.0


In [28]:
# right outer join
# region_seoul에는 없지만 seoul_patients에 있는데이터도 출력

pd.merge(region_seoul, seoul_patients, on='city', how = 'right')

Unnamed: 0,code,province,city,latitude,longitude,elementary_school_count,kindergarten_count,university_count,academy_ratio,elderly_population_ratio,elderly_alone_ratio,nursing_home_count,patient_id
0,10100.0,Seoul,Dobong-gu,37.668952,127.047082,23.0,26.0,1.0,0.95,17.89,7.2,485.0,62
1,10110.0,Seoul,Dongdaemun-gu,37.574552,127.039721,21.0,31.0,4.0,1.06,17.26,6.7,832.0,38
2,10120.0,Seoul,Dongjak-gu,37.510571,126.963604,21.0,34.0,3.0,1.17,15.85,5.2,762.0,53
3,10220.0,Seoul,Eunpyeong-gu,37.603481,126.929173,31.0,44.0,1.0,1.09,17.0,6.5,874.0,53
4,10030.0,Seoul,Gangbuk-gu,37.639938,127.025508,14.0,21.0,0.0,0.67,19.49,8.5,628.0,24
5,10020.0,Seoul,Gangdong-gu,37.530492,127.123837,27.0,32.0,0.0,1.54,14.55,5.4,1023.0,35
6,10010.0,Seoul,Gangnam-gu,37.518421,127.047222,33.0,38.0,0.0,4.18,13.17,4.3,3088.0,83
7,10040.0,Seoul,Gangseo-gu,37.551166,126.849506,36.0,56.0,1.0,1.17,14.39,5.7,1080.0,78
8,10080.0,Seoul,Geumcheon-gu,37.456852,126.895229,18.0,19.0,0.0,0.96,16.15,6.7,475.0,32
9,10070.0,Seoul,Guro-gu,37.495632,126.88765,26.0,34.0,3.0,1.0,16.21,5.7,741.0,85


In [29]:
# full outer join
# 모든 데이터들을 출력

pd.merge(region_seoul, seoul_patients, on='city', how = 'outer')

Unnamed: 0,code,province,city,latitude,longitude,elementary_school_count,kindergarten_count,university_count,academy_ratio,elderly_population_ratio,elderly_alone_ratio,nursing_home_count,patient_id
0,10000.0,Seoul,Seoul,37.566953,126.977977,607.0,830.0,48.0,1.44,15.38,5.8,22739.0,
1,10010.0,Seoul,Gangnam-gu,37.518421,127.047222,33.0,38.0,0.0,4.18,13.17,4.3,3088.0,83.0
2,10020.0,Seoul,Gangdong-gu,37.530492,127.123837,27.0,32.0,0.0,1.54,14.55,5.4,1023.0,35.0
3,10030.0,Seoul,Gangbuk-gu,37.639938,127.025508,14.0,21.0,0.0,0.67,19.49,8.5,628.0,24.0
4,10040.0,Seoul,Gangseo-gu,37.551166,126.849506,36.0,56.0,1.0,1.17,14.39,5.7,1080.0,78.0
5,10050.0,Seoul,Gwanak-gu,37.47829,126.951502,22.0,33.0,1.0,0.89,15.12,4.9,909.0,113.0
6,10060.0,Seoul,Gwangjin-gu,37.538712,127.082366,22.0,33.0,3.0,1.16,13.75,4.8,723.0,17.0
7,10070.0,Seoul,Guro-gu,37.495632,126.88765,26.0,34.0,3.0,1.0,16.21,5.7,741.0,85.0
8,10080.0,Seoul,Geumcheon-gu,37.456852,126.895229,18.0,19.0,0.0,0.96,16.15,6.7,475.0,32.0
9,10090.0,Seoul,Nowon-gu,37.654259,127.056294,42.0,66.0,6.0,1.39,15.4,7.4,952.0,43.0


In [30]:
# index가 다른 경우
# seoul_patients의 컬럼명을 city에서 town으로 바꿈

seoul_patients.columns = ['town', 'patient_id']
seoul_patients.columns

Index(['town', 'patient_id'], dtype='object')

In [31]:
# left_on, right_on을 이용

pd.merge(region_seoul, seoul_patients, 
         left_on = 'city', right_on = 'town', how = 'inner')

Unnamed: 0,code,province,city,latitude,longitude,elementary_school_count,kindergarten_count,university_count,academy_ratio,elderly_population_ratio,elderly_alone_ratio,nursing_home_count,town,patient_id
0,10010,Seoul,Gangnam-gu,37.518421,127.047222,33,38,0,4.18,13.17,4.3,3088,Gangnam-gu,83
1,10020,Seoul,Gangdong-gu,37.530492,127.123837,27,32,0,1.54,14.55,5.4,1023,Gangdong-gu,35
2,10030,Seoul,Gangbuk-gu,37.639938,127.025508,14,21,0,0.67,19.49,8.5,628,Gangbuk-gu,24
3,10040,Seoul,Gangseo-gu,37.551166,126.849506,36,56,1,1.17,14.39,5.7,1080,Gangseo-gu,78
4,10050,Seoul,Gwanak-gu,37.47829,126.951502,22,33,1,0.89,15.12,4.9,909,Gwanak-gu,113
5,10060,Seoul,Gwangjin-gu,37.538712,127.082366,22,33,3,1.16,13.75,4.8,723,Gwangjin-gu,17
6,10070,Seoul,Guro-gu,37.495632,126.88765,26,34,3,1.0,16.21,5.7,741,Guro-gu,85
7,10080,Seoul,Geumcheon-gu,37.456852,126.895229,18,19,0,0.96,16.15,6.7,475,Geumcheon-gu,32
8,10090,Seoul,Nowon-gu,37.654259,127.056294,42,66,6,1.39,15.4,7.4,952,Nowon-gu,43
9,10100,Seoul,Dobong-gu,37.668952,127.047082,23,26,1,0.95,17.89,7.2,485,Dobong-gu,62


In [32]:
# 서울 지역구별로 확진자수 조회

seoul_patients = seoul_patientInfo.groupby('city').count()[['patient_id']]
seoul_patients.head()

Unnamed: 0_level_0,patient_id
city,Unnamed: 1_level_1
Dobong-gu,62
Dongdaemun-gu,38
Dongjak-gu,53
Eunpyeong-gu,53
Gangbuk-gu,24


In [33]:
# 컬럼말고 index를 기준으로 조인
# right_index, left_index를 이용한 join

pd.merge(region_seoul, seoul_patients, 
         left_on = 'city', right_index = True, how = 'inner')

Unnamed: 0,code,province,city,latitude,longitude,elementary_school_count,kindergarten_count,university_count,academy_ratio,elderly_population_ratio,elderly_alone_ratio,nursing_home_count,patient_id
1,10010,Seoul,Gangnam-gu,37.518421,127.047222,33,38,0,4.18,13.17,4.3,3088,83
2,10020,Seoul,Gangdong-gu,37.530492,127.123837,27,32,0,1.54,14.55,5.4,1023,35
3,10030,Seoul,Gangbuk-gu,37.639938,127.025508,14,21,0,0.67,19.49,8.5,628,24
4,10040,Seoul,Gangseo-gu,37.551166,126.849506,36,56,1,1.17,14.39,5.7,1080,78
5,10050,Seoul,Gwanak-gu,37.47829,126.951502,22,33,1,0.89,15.12,4.9,909,113
6,10060,Seoul,Gwangjin-gu,37.538712,127.082366,22,33,3,1.16,13.75,4.8,723,17
7,10070,Seoul,Guro-gu,37.495632,126.88765,26,34,3,1.0,16.21,5.7,741,85
8,10080,Seoul,Geumcheon-gu,37.456852,126.895229,18,19,0,0.96,16.15,6.7,475,32
9,10090,Seoul,Nowon-gu,37.654259,127.056294,42,66,6,1.39,15.4,7.4,952,43
10,10100,Seoul,Dobong-gu,37.668952,127.047082,23,26,1,0.95,17.89,7.2,485,62


## 실습3. patient_ele, region_ele는 각각 전국 자치구별 환자수, 지역 정보입니다

## - province 컬럼을 기준으로 inner join하기

In [34]:
patient_ele = patientInfo.groupby('province').count()['patient_id'].reset_index()
region_ele = region.groupby('province').sum().reset_index().drop(columns = ['code','latitude','longitude'])

In [35]:
# 아래와 같은 결과가 나오면 됩니다!!
pd.merge(patient_ele, region_ele, 
         left_on = 'province', right_on = 'province', how = 'inner')

Unnamed: 0,province,patient_id,elementary_school_count,kindergarten_count,university_count,academy_ratio,elderly_population_ratio,elderly_alone_ratio,nursing_home_count
0,Busan,151,608,816,44,22.57,329.08,154.6,13504
1,Chungcheongbuk-do,56,518,656,34,12.28,278.31,141.9,5538
2,Chungcheongnam-do,168,818,998,42,17.96,381.51,187.3,7282
3,Daegu,137,458,710,22,15.42,153.28,71.7,10166
4,Daejeon,119,296,520,30,8.55,86.26,37.2,5968
5,Gangwon-do,63,698,736,36,21.22,434.92,220.0,5038
6,Gwangju,44,310,624,34,14.84,88.89,40.3,5704
7,Gyeonggi-do,1208,2554,4474,122,45.46,461.75,194.7,40982
8,Gyeongsangbuk-do,1254,942,1414,66,22.77,661.35,363.2,8948
9,Gyeongsangnam-do,133,1002,1372,42,25.9,470.24,279.7,10728


## 실습4. region_ele는 자치구 데이터뿐만 아니라 전국데이터도 포함되어있습니다.

## - province 컬럼을 기준으로 right outer join하기

In [36]:
# 아래와 같은 결과가 나오면 됩니다!!
pd.merge(patient_ele, region_ele, 
         left_on = 'province', right_on = 'province', how = 'right')

Unnamed: 0,province,patient_id,elementary_school_count,kindergarten_count,university_count,academy_ratio,elderly_population_ratio,elderly_alone_ratio,nursing_home_count
0,Busan,151.0,608,816,44,22.57,329.08,154.6,13504
1,Chungcheongbuk-do,56.0,518,656,34,12.28,278.31,141.9,5538
2,Chungcheongnam-do,168.0,818,998,42,17.96,381.51,187.3,7282
3,Daegu,137.0,458,710,22,15.42,153.28,71.7,10166
4,Daejeon,119.0,296,520,30,8.55,86.26,37.2,5968
5,Gangwon-do,63.0,698,736,36,21.22,434.92,220.0,5038
6,Gwangju,44.0,310,624,34,14.84,88.89,40.3,5704
7,Gyeonggi-do,1208.0,2554,4474,122,45.46,461.75,194.7,40982
8,Gyeongsangbuk-do,1254.0,942,1414,66,22.77,661.35,363.2,8948
9,Gyeongsangnam-do,133.0,1002,1372,42,25.9,470.24,279.7,10728


## pivot_table

- 엑셀의 pivot_table 기능처럼 사용


- 데이터프레임.pivot_table(index, values, columns, aggfunc)
    - index는 필수
    - index에는 그룹화하고 싶은 컬럼 넣으면 됨(두개이상이면 list로)
    - 만약 특정 컬럼에 대한 통계량을 보고싶으면 values 이용
    - 추가로 구분하고싶은 컬럼있으면 columns 이용
    - aggfunc은 조회하고 싶은 통계량 조회, default는 mean

In [37]:
# 서울시 유동인구

seoulfloating

Unnamed: 0,date,hour,birth_year,sex,province,city,fp_num
0,2020-01-01,0,20,female,Seoul,Dobong-gu,19140
1,2020-01-01,0,20,male,Seoul,Dobong-gu,19950
2,2020-01-01,0,20,female,Seoul,Dongdaemun-gu,25450
3,2020-01-01,0,20,male,Seoul,Dongdaemun-gu,27050
4,2020-01-01,0,20,female,Seoul,Dongjag-gu,28880
...,...,...,...,...,...,...,...
1084795,2020-05-31,21,40,female,Seoul,Dobong-gu,27620
1084796,2020-05-31,21,40,female,Seoul,Songpa-gu,56560
1084797,2020-05-31,21,50,female,Seoul,Gangdong-gu,38960
1084798,2020-05-31,22,60,female,Seoul,Guro-gu,25420


In [38]:
# date와 city로 그룹화

seoulfloating.pivot_table(index = ['date', 'city'])

Unnamed: 0_level_0,Unnamed: 1_level_0,birth_year,fp_num,hour
date,city,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-01,Dobong-gu,45,21824.409722,11.5
2020-01-01,Dongdaemun-gu,45,24022.534722,11.5
2020-01-01,Dongjag-gu,45,26628.576389,11.5
2020-01-01,Eunpyeong-gu,45,30489.340278,11.5
2020-01-01,Gangbuk-gu,45,21391.006944,11.5
...,...,...,...,...
2020-05-31,Seongdong-gu,45,19659.965278,11.5
2020-05-31,Songpa-gu,45,42232.881944,11.5
2020-05-31,Yangcheon-gu,45,28259.062500,11.5
2020-05-31,Yeongdeungpo-gu,45,28785.277778,11.5


In [39]:
# date와 city로 그룹화하고 유동인구의 합 조회

seoulfloating.pivot_table(index = ['date', 'city'], values = 'fp_num', aggfunc = 'sum')

Unnamed: 0_level_0,Unnamed: 1_level_0,fp_num
date,city,Unnamed: 2_level_1
2020-01-01,Dobong-gu,6285430
2020-01-01,Dongdaemun-gu,6918490
2020-01-01,Dongjag-gu,7669030
2020-01-01,Eunpyeong-gu,8780930
2020-01-01,Gangbuk-gu,6160610
...,...,...
2020-05-31,Seongdong-gu,5662070
2020-05-31,Songpa-gu,12163070
2020-05-31,Yangcheon-gu,8138610
2020-05-31,Yeongdeungpo-gu,8290160


In [40]:
# date로 그룹화하고 city를 구분해서 합계 조회 

seoulfloating.pivot_table(index = 'date', columns = 'city', values = 'fp_num',
                          aggfunc = 'sum')

city,Dobong-gu,Dongdaemun-gu,Dongjag-gu,Eunpyeong-gu,Gangbuk-gu,Gangdong-gu,Gangnam-gu,Gangseo-gu,Geumcheon-gu,Guro-gu,...,Mapo-gu,Nowon-gu,Seocho-gu,Seodaemun-gu,Seongbuk-gu,Seongdong-gu,Songpa-gu,Yangcheon-gu,Yeongdeungpo-gu,Yongsan-gu
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-01,6285430,6918490,7669030,8780930,6160610,8296230,11001340,10998410,4990670,8605430,...,7685110,10180430,8238170,6068870,8327910,5705250,12573680,8334990,8429420,4891430
2020-01-02,5674220,6989170,7465090,7901040,5634030,7933530,15751250,10676170,5802770,8760520,...,8363570,9356350,10729750,6239170,7900440,6302820,12776490,7811270,10122630,5352050
2020-01-03,5601640,6994200,7474610,7824170,5604240,7924490,16034360,10632520,5712000,8674290,...,8511950,9317260,10891610,6242510,7846680,6309900,12817800,7740920,10135000,5422740
2020-01-04,6005300,7045120,7741980,8464390,5952680,8237950,12944420,10858450,4942510,8566630,...,8247720,9995770,9378300,6194480,8191000,5894740,12779500,8194100,8837490,5174980
2020-01-05,6253260,7102720,7757170,8686730,6101710,8402030,11822840,10963500,4957360,8705360,...,7916840,10197320,8796060,6160290,8398280,5775070,12645940,8354430,8725190,5063560
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-05-27,5523470,6984010,7422880,7710210,5495020,7773150,15314740,10438080,5670870,8493490,...,8300840,9242830,10532760,6058570,8015820,6314680,12498030,7606310,9927940,5177480
2020-05-28,5517890,6981200,7410110,7711610,5485320,7779000,15337040,10425070,5652870,8497310,...,8301340,9238570,10548220,6063250,8008210,6310410,12477800,7610580,9906720,5193920
2020-05-29,3863340,4921780,5178630,5400700,3831210,5472280,11007760,7366570,4060480,5973500,...,5820760,6458910,7598330,4253450,5605280,4468680,8801920,5329550,7080190,3694630
2020-05-30,5845120,6819990,7534280,8249540,5768720,7996910,12229270,10459100,4800670,8177060,...,7841600,9750510,8944550,5922110,8149970,5793170,12242600,7990820,8483270,4718410


## 실습5. SeoulFloating 데이터에서 유동인구의 합을 행은 날짜 열은 구로 구분하는 pivot_table을 만들기

In [41]:
# 아래와 같은 결과가 나오면 됩니다!!
seoulfloating.pivot_table(index = 'date', columns = 'city', values = 'fp_num',
                          aggfunc = 'sum')

city,Dobong-gu,Dongdaemun-gu,Dongjag-gu,Eunpyeong-gu,Gangbuk-gu,Gangdong-gu,Gangnam-gu,Gangseo-gu,Geumcheon-gu,Guro-gu,...,Mapo-gu,Nowon-gu,Seocho-gu,Seodaemun-gu,Seongbuk-gu,Seongdong-gu,Songpa-gu,Yangcheon-gu,Yeongdeungpo-gu,Yongsan-gu
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-01,6285430,6918490,7669030,8780930,6160610,8296230,11001340,10998410,4990670,8605430,...,7685110,10180430,8238170,6068870,8327910,5705250,12573680,8334990,8429420,4891430
2020-01-02,5674220,6989170,7465090,7901040,5634030,7933530,15751250,10676170,5802770,8760520,...,8363570,9356350,10729750,6239170,7900440,6302820,12776490,7811270,10122630,5352050
2020-01-03,5601640,6994200,7474610,7824170,5604240,7924490,16034360,10632520,5712000,8674290,...,8511950,9317260,10891610,6242510,7846680,6309900,12817800,7740920,10135000,5422740
2020-01-04,6005300,7045120,7741980,8464390,5952680,8237950,12944420,10858450,4942510,8566630,...,8247720,9995770,9378300,6194480,8191000,5894740,12779500,8194100,8837490,5174980
2020-01-05,6253260,7102720,7757170,8686730,6101710,8402030,11822840,10963500,4957360,8705360,...,7916840,10197320,8796060,6160290,8398280,5775070,12645940,8354430,8725190,5063560
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-05-27,5523470,6984010,7422880,7710210,5495020,7773150,15314740,10438080,5670870,8493490,...,8300840,9242830,10532760,6058570,8015820,6314680,12498030,7606310,9927940,5177480
2020-05-28,5517890,6981200,7410110,7711610,5485320,7779000,15337040,10425070,5652870,8497310,...,8301340,9238570,10548220,6063250,8008210,6310410,12477800,7610580,9906720,5193920
2020-05-29,3863340,4921780,5178630,5400700,3831210,5472280,11007760,7366570,4060480,5973500,...,5820760,6458910,7598330,4253450,5605280,4468680,8801920,5329550,7080190,3694630
2020-05-30,5845120,6819990,7534280,8249540,5768720,7996910,12229270,10459100,4800670,8177060,...,7841600,9750510,8944550,5922110,8149970,5793170,12242600,7990820,8483270,4718410


## unstack, stack

- 계층적 인덱스가 있을때 사용
    - 계층적 인덱스란 상위 인덱스, 하위 인덱스처럼 인덱스가 두개 이상 있음


- stack
    - 열을 행으로 바꿔줌
    - 데이터프레임.stack()


- unstack
    - 행을 열으로 바꿔줌
    - 데이터프레임.unstack()

In [42]:
# 계층적 인덱스의 예시
# values를 지정 안했기에 값이 여러개를 갖게 되어 계층이 생성됨.

seoul_pivot = seoulfloating.pivot_table(index = 'date', columns = 'city',
                          aggfunc = 'sum')
seoul_pivot

Unnamed: 0_level_0,birth_year,birth_year,birth_year,birth_year,birth_year,birth_year,birth_year,birth_year,birth_year,birth_year,...,hour,hour,hour,hour,hour,hour,hour,hour,hour,hour
city,Dobong-gu,Dongdaemun-gu,Dongjag-gu,Eunpyeong-gu,Gangbuk-gu,Gangdong-gu,Gangnam-gu,Gangseo-gu,Geumcheon-gu,Guro-gu,...,Mapo-gu,Nowon-gu,Seocho-gu,Seodaemun-gu,Seongbuk-gu,Seongdong-gu,Songpa-gu,Yangcheon-gu,Yeongdeungpo-gu,Yongsan-gu
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2020-01-01,12960,12960,12960,12960,12960,12960,12960,12960,12960,12960,...,3312,3312,3312,3312,3312,3312,3312,3312,3312,3312
2020-01-02,12960,12960,12960,12960,12960,12960,12960,12960,12960,12960,...,3312,3312,3312,3312,3312,3312,3312,3312,3312,3312
2020-01-03,12960,12960,12960,12960,12960,12960,12960,12960,12960,12960,...,3312,3312,3312,3312,3312,3312,3312,3312,3312,3312
2020-01-04,12960,12960,12960,12960,12960,12960,12960,12960,12960,12960,...,3312,3312,3312,3312,3312,3312,3312,3312,3312,3312
2020-01-05,12960,12960,12960,12960,12960,12960,12960,12960,12960,12960,...,3312,3312,3312,3312,3312,3312,3312,3312,3312,3312
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-05-27,12960,12960,12960,12960,12960,12960,12960,12960,12960,12960,...,3312,3312,3312,3312,3312,3312,3312,3312,3312,3312
2020-05-28,12960,12960,12960,12960,12960,12960,12960,12960,12960,12960,...,3312,3312,3312,3312,3312,3312,3312,3312,3312,3312
2020-05-29,9180,9180,9180,9180,9180,9180,9180,9180,9180,9180,...,1632,1632,1632,1632,1632,1632,1632,1632,1632,1632
2020-05-30,12960,12960,12960,12960,12960,12960,12960,12960,12960,12960,...,3312,3312,3312,3312,3312,3312,3312,3312,3312,3312


In [43]:
# 컬럼의 데이터가 두개가 있음

seoul_pivot.columns

MultiIndex([('birth_year',       'Dobong-gu'),
            ('birth_year',   'Dongdaemun-gu'),
            ('birth_year',      'Dongjag-gu'),
            ('birth_year',    'Eunpyeong-gu'),
            ('birth_year',      'Gangbuk-gu'),
            ('birth_year',     'Gangdong-gu'),
            ('birth_year',      'Gangnam-gu'),
            ('birth_year',      'Gangseo-gu'),
            ('birth_year',    'Geumcheon-gu'),
            ('birth_year',         'Guro-gu'),
            ('birth_year',       'Gwanak-gu'),
            ('birth_year',     'Gwangjin-gu'),
            ('birth_year',       'Jongno-gu'),
            ('birth_year',         'Jung-gu'),
            ('birth_year',     'Jungnang-gu'),
            ('birth_year',         'Mapo-gu'),
            ('birth_year',        'Nowon-gu'),
            ('birth_year',       'Seocho-gu'),
            ('birth_year',    'Seodaemun-gu'),
            ('birth_year',     'Seongbuk-gu'),
            ('birth_year',    'Seongdong-gu'),
            (

In [44]:
# stack
# 기존 컬럼에 있는 city를 행으로 옮겨줌

seoul_stack = seoul_pivot.stack()
seoul_stack

Unnamed: 0_level_0,Unnamed: 1_level_0,birth_year,fp_num,hour
date,city,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-01,Dobong-gu,12960,6285430,3312
2020-01-01,Dongdaemun-gu,12960,6918490,3312
2020-01-01,Dongjag-gu,12960,7669030,3312
2020-01-01,Eunpyeong-gu,12960,8780930,3312
2020-01-01,Gangbuk-gu,12960,6160610,3312
...,...,...,...,...
2020-05-31,Seongdong-gu,12960,5662070,3312
2020-05-31,Songpa-gu,12960,12163070,3312
2020-05-31,Yangcheon-gu,12960,8138610,3312
2020-05-31,Yeongdeungpo-gu,12960,8290160,3312


In [45]:
# unstack
# 기존 행에 있는 city를 컬럼으로 옮겨줌

seoul_stack.unstack()

Unnamed: 0_level_0,birth_year,birth_year,birth_year,birth_year,birth_year,birth_year,birth_year,birth_year,birth_year,birth_year,...,hour,hour,hour,hour,hour,hour,hour,hour,hour,hour
city,Dobong-gu,Dongdaemun-gu,Dongjag-gu,Eunpyeong-gu,Gangbuk-gu,Gangdong-gu,Gangnam-gu,Gangseo-gu,Geumcheon-gu,Guro-gu,...,Mapo-gu,Nowon-gu,Seocho-gu,Seodaemun-gu,Seongbuk-gu,Seongdong-gu,Songpa-gu,Yangcheon-gu,Yeongdeungpo-gu,Yongsan-gu
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2020-01-01,12960,12960,12960,12960,12960,12960,12960,12960,12960,12960,...,3312,3312,3312,3312,3312,3312,3312,3312,3312,3312
2020-01-02,12960,12960,12960,12960,12960,12960,12960,12960,12960,12960,...,3312,3312,3312,3312,3312,3312,3312,3312,3312,3312
2020-01-03,12960,12960,12960,12960,12960,12960,12960,12960,12960,12960,...,3312,3312,3312,3312,3312,3312,3312,3312,3312,3312
2020-01-04,12960,12960,12960,12960,12960,12960,12960,12960,12960,12960,...,3312,3312,3312,3312,3312,3312,3312,3312,3312,3312
2020-01-05,12960,12960,12960,12960,12960,12960,12960,12960,12960,12960,...,3312,3312,3312,3312,3312,3312,3312,3312,3312,3312
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-05-27,12960,12960,12960,12960,12960,12960,12960,12960,12960,12960,...,3312,3312,3312,3312,3312,3312,3312,3312,3312,3312
2020-05-28,12960,12960,12960,12960,12960,12960,12960,12960,12960,12960,...,3312,3312,3312,3312,3312,3312,3312,3312,3312,3312
2020-05-29,9180,9180,9180,9180,9180,9180,9180,9180,9180,9180,...,1632,1632,1632,1632,1632,1632,1632,1632,1632,1632
2020-05-30,12960,12960,12960,12960,12960,12960,12960,12960,12960,12960,...,3312,3312,3312,3312,3312,3312,3312,3312,3312,3312


# 과제

## 1. SeoulFloating 데이터를 불러들이고 마포구의 유동인구의 평균을 날짜별로 조회

In [46]:
# 아래와 같은 결과가 나오면 됩니다!!
# 컬럼명은 상관없어요
a = seoulfloating.copy()
a= a[a['city'] == 'Mapo-gu'].groupby('date').agg('mean')['fp_num']
a = pd.DataFrame(a)
a

Unnamed: 0_level_0,fp_num
date,Unnamed: 1_level_1
2020-01-01,26684.409722
2020-01-02,29040.173611
2020-01-03,29555.381944
2020-01-04,28637.916667
2020-01-05,27489.027778
...,...
2020-05-27,28822.361111
2020-05-28,28824.097222
2020-05-29,28533.137255
2020-05-30,27227.777778


## 2. patientInfo 데이터를 불러들이고 서울지역 환자수를 날짜별로 조회

- 날짜는 confirmed_date 컬럼을 이용하시면 됩니다

In [47]:
b = patientInfo.copy()
b = b[b['province'] == 'Seoul'].groupby('confirmed_date').agg('count')['patient_id']
b= pd.DataFrame(b)
b

Unnamed: 0_level_0,patient_id
confirmed_date,Unnamed: 1_level_1
2020-01-23,1
2020-01-30,3
2020-01-31,3
2020-02-02,1
2020-02-05,2
...,...
2020-06-25,17
2020-06-26,17
2020-06-27,14
2020-06-28,7


## 3.  1번과 2번의 결과물을 날짜를 기준으로 join을 하고 확진자 수가 있는 날짜만 조회

In [48]:
pd.merge(a, b, right_index = True,  left_index = True, how = 'inner')

Unnamed: 0,fp_num,patient_id
2020-01-23,27756.631944,1
2020-01-30,29385.833333,3
2020-01-31,28919.375000,3
2020-02-02,26879.618056,1
2020-02-05,28737.881944,2
...,...,...
2020-05-27,28822.361111,25
2020-05-28,28824.097222,19
2020-05-29,28533.137255,9
2020-05-30,27227.777778,6
