# 음식물쓰레기 데이터 기본 전처리

### 1) 데이터 불러오기

In [None]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
foods = pd.read_csv('./rowdata/01_음식물쓰레기_FOOD_WASTE_210811_update.CSV', encoding='CP949')
foods

### 2) 알수없음 지역과 em_g=0 인 행 삭제

In [None]:
non = foods[foods['emd_nm']=='알수없음']

In [None]:
non = non[non['em_g']!=0]

### 3) EDA를 위한 날짜 년월일 및 주말 유무 체크

In [None]:
foods['base_date'] = pd.to_datetime(foods['base_date'])
foods['base_year'] = foods['base_date'].dt.year
foods['base_month'] = foods['base_date'].dt.month
foods['base_day'] = foods['base_date'].dt.day
foods['base_weekday'] = foods['base_date'].dt.day_name()

In [None]:
foods.loc[0]['base_weekday']

'Monday'

In [None]:
base_weekend = [];
for f in foods.index:
    if foods.loc[f]['base_weekday'] == 'Sunday' or foods.loc[f]['base_weekday'] == 'Saturday':
        base_weekend.append(True)
    else :
        base_weekend.append(False)

In [None]:
foods['base_weekend'] = base_weekend

In [None]:
foods = foods.loc[:,['base_year', 'base_month', 'base_day','base_weekend','city', 'emd_nm','emd_cd', 'em_cnt', 'em_g', 'pay_amt']]
foods

Unnamed: 0,base_year,base_month,base_day,base_weekend,city,emd_nm,emd_cd,em_cnt,em_g,pay_amt
0,2018,1,1,False,제주시,외도동,50110670,2,15500,464
1,2018,1,1,False,제주시,아라동,50110630,25,59800,1787
2,2018,1,1,False,제주시,아라동,50110630,8,25350,758
3,2018,1,1,False,제주시,아라동,50110630,12,30000,898
4,2018,1,1,False,제주시,아라동,50110630,14,44050,1317
...,...,...,...,...,...,...,...,...,...,...
2208737,2021,6,30,False,제주시,한림읍,50110250,12,58250,1743
2208738,2021,6,30,False,제주시,한림읍,50110250,15,111000,3326
2208739,2021,6,30,False,제주시,한림읍,50110250,21,48300,1444
2208740,2021,6,30,False,제주시,한림읍,50110250,5,22050,661


### 4) pay_amt 제거
   - 18년도 부터 21년도 까지 약 1원당 33.4g 가격을 유지함. em_g과 비례하기 때문에 제거한다.

In [None]:
amt = foods['em_g']/foods['pay_amt']
amt

0          33.405172
1          33.463906
2          33.443272
3          33.407572
4          33.447229
             ...    
2208737    33.419392
2208738    33.373422
2208739    33.448753
2208740    33.358548
2208741    33.333333
Length: 2208742, dtype: float64

### 5) 제주도 지역 6개의 구역으로 나누기

In [None]:
def test(x) :
  if x in ['성산읍','표선면','남원읍'] : return '서귀포시_동'
  elif x in ['대륜동','대천동','동홍동','서홍동','송산동','영천동','예래동','정방동','중문동','중앙동','천지동','효돈동'] : return '서귀포시_중'
  elif x in ['대정읍','안덕면'] : return '서귀포시_서'
  elif x in ['구좌읍','조천읍'] : return '제주시_동'
  elif x in ['건입동','노형동','도두동','봉개동','삼도이동','삼도일동','삼양동','아라동','연동','오라동','외도동','용담이동','용담일동','이도이동','이도일동','이호동','일도이동','일도일동','화북동'] : return '제주시_중'
  elif x in ['애월읍','한림읍','한경면'] : return '제주시_서'

foods['region'] = foods['emd_nm'].apply(test)

In [None]:
final =foods.groupby(['base_date','region','emd_nm','emd_cd']).sum()

In [None]:
final = final.loc[:,['em_cnt','em_g']]
final = final.reset_index()
final


Unnamed: 0,base_date,region,emd_nm,emd_cd,em_cnt,em_g
0,2018-01-01,서귀포시_동,남원읍,50130253,304,1239600
1,2018-01-01,서귀포시_동,성산읍,50130259,90,424100
2,2018-01-01,서귀포시_동,표선면,50130320,112,452850
3,2018-01-01,서귀포시_서,대정읍,50130250,309,1264950
4,2018-01-01,서귀포시_서,안덕면,50130310,77,296200
...,...,...,...,...,...,...
49602,2021-06-30,제주시_중,이도이동,50110540,4032,7304500
49603,2021-06-30,제주시_중,이도일동,50110530,90,139750
49604,2021-06-30,제주시_중,이호동,50110680,339,764450
49605,2021-06-30,제주시_중,일도이동,50110520,2856,4963450


### 6) 음식물쓰레기 데이터 Nan처리

In [None]:
final.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49607 entries, 0 to 49606
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   base_date  49607 non-null  object
 1   region     49607 non-null  object
 2   emd_nm     49607 non-null  object
 3   emd_cd     49607 non-null  int64 
 4   em_cnt     49607 non-null  int64 
 5   em_g       49607 non-null  int64 
dtypes: int64(3), object(3)
memory usage: 2.3+ MB


#### 1) 지역별로 없는 날짜에 NaN 넣기



- 일별로 42개의 행정동을 가진 dataframe을 생성 후 실제 음식물쓰레기 데이터와 merge

In [None]:
from datetime import datetime as dt
add_nm = final[final['base_date']=='2021-04-02']['emd_nm']
add_nm = np.array(add_nm)
dt_index = pd.date_range(start='20180101', end='20210630')
base_date = [];
temp=[];
for date in dt_index:
    for v in add_nm:
        base_date.append(date)
        temp.append(v)
temp

['남원읍',
 '성산읍',
 '표선면',
 '대정읍',
 '안덕면',
 '대륜동',
 '대천동',
 '동홍동',
 '서홍동',
 '송산동',
 '영천동',
 '예래동',
 '정방동',
 '중문동',
 '중앙동',
 '천지동',
 '효돈동',
 '구좌읍',
 '조천읍',
 '애월읍',
 '한경면',
 '한림읍',
 '건입동',
 '노형동',
 '도두동',
 '봉개동',
 '삼도이동',
 '삼도일동',
 '삼양동',
 '아라동',
 '연동',
 '오라동',
 '외도동',
 '용담이동',
 '용담일동',
 '이도이동',
 '이도일동',
 '이호동',
 '일도이동',
 '일도일동',
 '화북동',
 '남원읍',
 '성산읍',
 '표선면',
 '대정읍',
 '안덕면',
 '대륜동',
 '대천동',
 '동홍동',
 '서홍동',
 '송산동',
 '영천동',
 '예래동',
 '정방동',
 '중문동',
 '중앙동',
 '천지동',
 '효돈동',
 '구좌읍',
 '조천읍',
 '애월읍',
 '한경면',
 '한림읍',
 '건입동',
 '노형동',
 '도두동',
 '봉개동',
 '삼도이동',
 '삼도일동',
 '삼양동',
 '아라동',
 '연동',
 '오라동',
 '외도동',
 '용담이동',
 '용담일동',
 '이도이동',
 '이도일동',
 '이호동',
 '일도이동',
 '일도일동',
 '화북동',
 '남원읍',
 '성산읍',
 '표선면',
 '대정읍',
 '안덕면',
 '대륜동',
 '대천동',
 '동홍동',
 '서홍동',
 '송산동',
 '영천동',
 '예래동',
 '정방동',
 '중문동',
 '중앙동',
 '천지동',
 '효돈동',
 '구좌읍',
 '조천읍',
 '애월읍',
 '한경면',
 '한림읍',
 '건입동',
 '노형동',
 '도두동',
 '봉개동',
 '삼도이동',
 '삼도일동',
 '삼양동',
 '아라동',
 '연동',
 '오라동',
 '외도동',
 '용담이동',
 '용담일동',
 '이도이동',
 '이도일동',
 '이호동',
 '일도이동',
 '일도일동',
 '화

In [None]:
df_date = pd.DataFrame(base_date,temp, columns=['base_date'])
df_date['base_date']= df_date['base_date'].astype('str')
df_date['emd_nm'] =temp
df_date = df_date.reset_index()

In [None]:
df_date

Unnamed: 0,index,base_date,emd_nm
0,남원읍,2018-01-01,남원읍
1,성산읍,2018-01-01,성산읍
2,표선면,2018-01-01,표선면
3,대정읍,2018-01-01,대정읍
4,안덕면,2018-01-01,안덕면
...,...,...,...
52352,이도일동,2021-06-30,이도일동
52353,이호동,2021-06-30,이호동
52354,일도이동,2021-06-30,일도이동
52355,일도일동,2021-06-30,일도일동


In [None]:
merge_final = pd.merge(df_date,final, how='outer', on=['base_date','emd_nm'])
merge_final

Unnamed: 0,index,base_date,emd_nm,region,emd_cd,em_cnt,em_g
0,남원읍,2018-01-01,남원읍,서귀포시_동,50130253.0,304.0,1239600.0
1,성산읍,2018-01-01,성산읍,서귀포시_동,50130259.0,90.0,424100.0
2,표선면,2018-01-01,표선면,서귀포시_동,50130320.0,112.0,452850.0
3,대정읍,2018-01-01,대정읍,서귀포시_서,50130250.0,309.0,1264950.0
4,안덕면,2018-01-01,안덕면,서귀포시_서,50130310.0,77.0,296200.0
...,...,...,...,...,...,...,...
52352,이도일동,2021-06-30,이도일동,제주시_중,50110530.0,90.0,139750.0
52353,이호동,2021-06-30,이호동,제주시_중,50110680.0,339.0,764450.0
52354,일도이동,2021-06-30,일도이동,제주시_중,50110520.0,2856.0,4963450.0
52355,일도일동,2021-06-30,일도일동,,,,


In [None]:
merge_final = merge_final.drop(['index'],axis=1)
merge_final

Unnamed: 0,base_date,emd_nm,region,emd_cd,em_cnt,em_g
0,2018-01-01,남원읍,서귀포시_동,50130253.0,304.0,1239600.0
1,2018-01-01,성산읍,서귀포시_동,50130259.0,90.0,424100.0
2,2018-01-01,표선면,서귀포시_동,50130320.0,112.0,452850.0
3,2018-01-01,대정읍,서귀포시_서,50130250.0,309.0,1264950.0
4,2018-01-01,안덕면,서귀포시_서,50130310.0,77.0,296200.0
...,...,...,...,...,...,...
52352,2021-06-30,이도일동,제주시_중,50110530.0,90.0,139750.0
52353,2021-06-30,이호동,제주시_중,50110680.0,339.0,764450.0
52354,2021-06-30,일도이동,제주시_중,50110520.0,2856.0,4963450.0
52355,2021-06-30,일도일동,,,,


#### 2) 구간별 NaN값 처리

In [None]:
merge_final.isna().sum()

base_date       0
emd_nm          0
region          0
emd_cd       2750
em_cnt       2750
em_g         2750
dtype: int64

In [None]:
rmnan = merge_final.copy()

##### 2021-03-20
- 건입동,용담일동,이호동,일도일동(모두 제주시_중 지역)
- 이 날짜 데이터가 있는 제주시_중 지역을 보면  모두 19일보다 20일에 더 감소하며 21일에  급격히 상승한다.

**=> 제주시_중의 19일 20일 평균 감소 비율을 적용한다.**

In [None]:
jm_0320 = rmnan[(rmnan['base_date']=='2021-03-20') &(rmnan['region']=='제주시_중')]
jm_0319 =rmnan[(rmnan['base_date']=='2021-03-19') &(rmnan['region']=='제주시_중')]
jm_0319 = jm_0319.reset_index()
jm_0320 = jm_0320.reset_index()
jm_rate_g= jm_0320['em_g']/jm_0319['em_g']
np.round(jm_rate_g.mean(),3)

0.557

In [None]:
jm_rate_cnt = jm_0320['em_cnt']/jm_0319['em_cnt']
jm_rate_cnt
# np.round(jm_rate_cnt.mean(),3)

0          NaN
1     0.938261
2     0.272727
3     0.609929
4     0.933908
5     0.150685
6     0.578947
7     0.984227
8     0.941594
9     0.440909
10    0.790199
11    0.394161
12         NaN
13    0.372529
14    0.367742
15         NaN
16    0.072893
17         NaN
18    0.337121
Name: em_cnt, dtype: float64

In [None]:
## 바꿔야하는 index 48156(0) 48168(12) 48171(15) 48173(17)

jm_0320.loc[0,['em_g']] =jm_0319.loc[0,['em_g']]*0.557
jm_0320.loc[12,['em_g']] =jm_0319.loc[12,['em_g']]*0.557
jm_0320.loc[15,['em_g']] =jm_0319.loc[15,['em_g']]*0.557
jm_0320.loc[17,['em_g']] =jm_0319.loc[17,['em_g']]*0.557

jm_0320.loc[0,['em_cnt']] =jm_0319.loc[0,['em_cnt']]*0.546
jm_0320.loc[12,['em_cnt']] =jm_0319.loc[12,['em_cnt']]*0.546
jm_0320.loc[15,['em_cnt']] =jm_0319.loc[15,['em_cnt']]*0.546
jm_0320.loc[17,['em_cnt']] =jm_0319.loc[17,['em_cnt']]*0.546
jm_0320.loc[17,['em_cnt']]

em_cnt    20.202
Name: 17, dtype: object

In [None]:
##그 위치 값 변경해주기 
merge_final.loc[48156,['em_cnt','em_g']] = jm_0320.loc[0,['em_cnt','em_g']]
merge_final.loc[48168,['em_cnt','em_g']] = jm_0320.loc[12,['em_cnt','em_g']]
merge_final.loc[48171,['em_cnt','em_g']] = jm_0320.loc[15,['em_cnt','em_g']]
merge_final.loc[48173,['em_cnt','em_g']] = jm_0320.loc[17,['em_cnt','em_g']]

##### 2020-10-23
- 남원읍, 대륜동,대정읍, 대천동, 동홍동,서홍동,성산읍,송산동,안덕면,영천동,예래동,중문동,중앙동,정방동,천지동,표선면,효돈동
- 22, 24일이 모두 원만한 상승 또는 하락세 그린다.

**==> 22일 24일의 평균 값을 넣어준다.**

In [None]:
s_1023 = rmnan[rmnan['base_date']=='2020-10-23']
s_1024 = rmnan[rmnan['base_date']=='2020-10-24']
s_1022 = rmnan[rmnan['base_date']=='2020-10-22']

s_1023= s_1023.reset_index()
s_1022= s_1022.reset_index()
s_1024=s_1024.reset_index()

##42066 ~ 42082

In [None]:
##42066 ~ 42082
for idx in range(0,17):
    s_1023.loc[idx,['em_g']] =(s_1022.loc[idx,['em_g']]+s_1024.loc[idx,['em_g']])/2
    s_1023.loc[idx,['em_cnt']] =(s_1022.loc[idx,['em_cnt']]+s_1024.loc[idx,['em_cnt']])/2
s_1023

Unnamed: 0,index,base_date,emd_nm,region,emd_cd,em_cnt,em_g
0,42066,2020-10-23,남원읍,서귀포시_동,,486.5,2137500.0
1,42067,2020-10-23,성산읍,서귀포시_동,,415.5,2458425.0
2,42068,2020-10-23,표선면,서귀포시_동,,227.0,1031000.0
3,42069,2020-10-23,대정읍,서귀포시_서,,1261.0,3435700.0
4,42070,2020-10-23,안덕면,서귀포시_서,,373.5,1273050.0
5,42071,2020-10-23,대륜동,서귀포시_중,,589.0,1400550.0
6,42072,2020-10-23,대천동,서귀포시_중,,897.0,1997075.0
7,42073,2020-10-23,동홍동,서귀포시_중,,1565.0,3472150.0
8,42074,2020-10-23,서홍동,서귀포시_중,,643.0,1445575.0
9,42075,2020-10-23,송산동,서귀포시_중,,208.0,640525.0


In [None]:
for idx in range(42066,42083):
    merge_final.loc[idx,['em_cnt','em_g']] =s_1023.loc[idx-42066,['em_cnt','em_g']]
merge_final.loc[42066:42083]

Unnamed: 0,base_date,emd_nm,region,emd_cd,em_cnt,em_g
42066,2020-10-23,남원읍,서귀포시_동,,486.5,2137500.0
42067,2020-10-23,성산읍,서귀포시_동,,415.5,2458425.0
42068,2020-10-23,표선면,서귀포시_동,,227.0,1031000.0
42069,2020-10-23,대정읍,서귀포시_서,,1261.0,3435700.0
42070,2020-10-23,안덕면,서귀포시_서,,373.5,1273050.0
42071,2020-10-23,대륜동,서귀포시_중,,589.0,1400550.0
42072,2020-10-23,대천동,서귀포시_중,,897.0,1997075.0
42073,2020-10-23,동홍동,서귀포시_중,,1565.0,3472150.0
42074,2020-10-23,서홍동,서귀포시_중,,643.0,1445575.0
42075,2020-10-23,송산동,서귀포시_중,,208.0,640525.0


##### 2020-06-25 - 2020-06-28
- 구좌읍, 애월읍,이호동,한경면,한림읍
- 대체적으로 일주일 전후가 상승곡선에서 감소 추세이다.

**==> 4일간의 데이터가 사라져서 일주일 전의 평균량을 넣는다.**

In [None]:
je_0624 = rmnan[(rmnan['base_date']>'2020-06-17') & (rmnan['base_date']<'2020-06-25')]
gus = je_0624[je_0624['emd_nm']=='구좌읍']
g_means = gus.mean()

tos = je_0624[je_0624['emd_nm']=='이호동']
t_means = tos.mean()

aes = je_0624[je_0624['emd_nm']=='애월읍']
a_means = aes.mean()

hgs = je_0624[je_0624['emd_nm']=='한경면']
hg_means = hgs.mean()

hns = je_0624[je_0624['emd_nm']=='한림읍']
hn_means = hns.mean()

In [None]:
je_0628 = rmnan[(rmnan['base_date']>'2020-06-24') & (rmnan['base_date']<'2020-06-29')]
je_0628 = je_0628[ je_0628['em_g'].isnull()]
gu_0628 = je_0628[je_0628['emd_nm']=='구좌읍']
to_0628 = je_0628[je_0628['emd_nm']=='이호동']
ae_0628 = je_0628[je_0628['emd_nm']=='애월읍']
hg_0628 = je_0628[je_0628['emd_nm']=='한경면']
hn_0628 = je_0628[je_0628['emd_nm']=='한림읍']

gu_0628

Unnamed: 0,base_date,emd_nm,region,emd_cd,em_cnt,em_g
37163,2020-06-25,구좌읍,제주시_동,,,
37204,2020-06-26,구좌읍,제주시_동,,,
37245,2020-06-27,구좌읍,제주시_동,,,
37286,2020-06-28,구좌읍,제주시_동,,,


In [None]:
gu_0628['em_cnt'] = g_means['em_cnt']
gu_0628['em_g'] = g_means['em_g']

to_0628['em_cnt'] = t_means['em_cnt']
to_0628['em_g'] = t_means['em_g']

ae_0628['em_cnt'] = a_means['em_cnt']
ae_0628['em_g'] = a_means['em_g']

hg_0628['em_cnt'] = hg_means['em_cnt']
hg_0628['em_g'] = hg_means['em_g']

hn_0628['em_cnt'] = hn_means['em_cnt']
hn_0628['em_g'] = hn_means['em_g']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value ins

In [None]:
for idx in gu_0628.index :
    merge_final.loc[idx,['em_cnt','em_g']] = gu_0628.loc[idx,['em_cnt','em_g']]

for idx in to_0628.index :
    merge_final.loc[idx,['em_cnt','em_g']] = to_0628.loc[idx,['em_cnt','em_g']]
for idx in ae_0628.index :
    merge_final.loc[idx,['em_cnt','em_g']] = ae_0628.loc[idx,['em_cnt','em_g']]
for idx in hg_0628.index :
    merge_final.loc[idx,['em_cnt','em_g']] = hg_0628.loc[idx,['em_cnt','em_g']]
for idx in hn_0628.index :
    merge_final.loc[idx,['em_cnt','em_g']] = hn_0628.loc[idx,['em_cnt','em_g']]



In [None]:
merge_final.loc[37286]

base_date     2020-06-28
emd_nm               구좌읍
region             제주시_동
emd_cd               NaN
em_cnt           372.714
em_g         1.07269e+06
Name: 37286, dtype: object

##### 2021-06-19 - 2021-06-30
- 일도일동
- 3년간의 데이터를 살펴보면 6개월간격으로 상승하락을 그리면서 대체로 유지하고 있다.

**==> 2021년의 4월 5월의 평균 배출량을 넣는다.**

In [None]:
pd.options.display.float_format = '{:.5f}'.format
one_0601 = rmnan[(rmnan['base_date']>'2021-03-31') & (rmnan['base_date']<'2021-06-01')&(rmnan['emd_nm']=='일도일동')]
one_0630 = rmnan[(rmnan['base_date']>'2021-06-18') & (rmnan['base_date']<'2021-07-01')&(rmnan['emd_nm']=='일도일동')]
means = one_0601.mean()
one_0630['em_g'] = means['em_g']
one_0630['em_cnt'] = means['em_cnt']
one_0630

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,base_date,emd_nm,region,emd_cd,em_cnt,em_g
51904,2021-06-19,일도일동,제주시_중,,197.83607,608689.34426
51945,2021-06-20,일도일동,제주시_중,,197.83607,608689.34426
51986,2021-06-21,일도일동,제주시_중,,197.83607,608689.34426
52027,2021-06-22,일도일동,제주시_중,,197.83607,608689.34426
52068,2021-06-23,일도일동,제주시_중,,197.83607,608689.34426
52109,2021-06-24,일도일동,제주시_중,,197.83607,608689.34426
52150,2021-06-25,일도일동,제주시_중,,197.83607,608689.34426
52191,2021-06-26,일도일동,제주시_중,,197.83607,608689.34426
52232,2021-06-27,일도일동,제주시_중,,197.83607,608689.34426
52273,2021-06-28,일도일동,제주시_중,,197.83607,608689.34426


In [None]:
for idx in one_0630.index:
    merge_final.loc[idx,['em_cnt','em_g']] =one_0630.loc[idx,['em_cnt','em_g']]
merge_final.loc[51904:52354]

Unnamed: 0,base_date,emd_nm,region,emd_cd,em_cnt,em_g
51904,2021-06-19,일도일동,제주시_중,,197.83607,608689.34426
51905,2021-06-19,화북동,제주시_중,50110600.00000,2211.00000,3661550.00000
51906,2021-06-20,남원읍,서귀포시_동,50130253.00000,644.00000,2213000.00000
51907,2021-06-20,성산읍,서귀포시_동,50130259.00000,720.00000,3519250.00000
51908,2021-06-20,표선면,서귀포시_동,50130320.00000,310.00000,1427550.00000
...,...,...,...,...,...,...
52350,2021-06-30,용담일동,제주시_중,50110570.00000,591.00000,1105550.00000
52351,2021-06-30,이도이동,제주시_중,50110540.00000,4032.00000,7304500.00000
52352,2021-06-30,이도일동,제주시_중,50110530.00000,90.00000,139750.00000
52353,2021-06-30,이호동,제주시_중,50110680.00000,339.00000,764450.00000



##### 2018-01-01 ~ 2019-11
- 구좌읍(11월 4일) 조천읍(11월 3일) 한림읍(11월 6일 및 10일)  한경면(11월 6일 10일)

**=> 많은 기간의 데이터가 없어서 임의로 데이터를 넣어준다면 유의미한 데이터가 되지 않을 가능성이 크기때문에 NaN값을 유지한다.**

In [None]:
merge_final_dropnull = merge_final.dropna(axis=0)

In [None]:
merge_final_dropnull[merge_final['em_g'].isnull()]

  """Entry point for launching an IPython kernel.


Unnamed: 0,base_date,emd_nm,region,emd_cd,em_cnt,em_g


### 7) 데이터 저장

In [None]:
merge_final_dropnull.to_csv("./datas/newdata_음식물쓰레기.csv",index=False,encoding='CP949')