In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import missingno as msno
import numpy  as np

path = './data/'
pd.options.display.max_rows = 150
pd.options.display.max_columns = 350
plt.rc('font',family='Malgun Gothic')
plt.rcParams['axes.unicode_minus'] = False

In [2]:
화재 = pd.read_csv(path + '화재분야.csv', encoding='cp949')
교통 = pd.read_csv(path + '교통분야.csv', encoding='cp949')
자연재해 = pd.read_csv(path + '자연재해분야.csv', encoding='cp949')
범죄 = pd.read_csv(path + '범죄분야.csv', encoding='cp949')
안전사고 = pd.read_csv(path + '안전사고분야.csv', encoding='cp949')
자살 = pd.read_csv(path + '자살분야.csv', encoding='cp949')
감염병 = pd.read_csv(path + '감염병분야.csv', encoding='cp949')

In [3]:
def 컬럼수정(table):
    
    table['시도'] = table.지역 #시도열 추가
    
    table['시도']=np.where(table['시도'].str.startswith('('), table['시도'], np.nan)
    table['시도'].fillna(method='ffill', inplace=True)

    sido_short=['(강원)','(경기)','(경남)',
                '(경북)','(광주)','(대구)',
                '(대전)','(부산)','(서울)',
                '(울산)','(인천)','(전남)',
                '(전북)','(충남)','(충북)'] #제주, 세종 없음
    
    sido_full=['강원도','경기도','경상남도',
               '경상북도','광주광역시','대구광역시',
               '대전광역시','부산광역시','서울특별시',
               '울산광역시','인천광역시','전라남도',
               '전라북도','충청남도','충청북도']
    
    table.시도.replace(sido_short,sido_full, inplace=True)
    
    table['시도_지역']='2015_'+table.시도+'_'+table.지역

In [4]:
table_list=[화재, 교통, 자연재해, 범죄, 안전사고, 자살, 감염병]

for i in table_list:
    컬럼수정(i)

In [5]:
교통.loc[교통.시도_지역=='2015_']

Unnamed: 0,지역,등급,시도,시도_지역


In [6]:
화재.rename(columns={'등급': 'a화재'}, inplace=True)
자연재해.rename(columns={'등급': 'a자연재해'}, inplace=True)
범죄.rename(columns={'등급': 'a범죄'}, inplace=True)
자살.rename(columns={'등급': 'a자살'}, inplace=True)
감염병.rename(columns={'등급': 'a감염병'}, inplace=True)
안전사고.rename(columns={'등급': 'a안전사고'}, inplace=True)
교통.rename(columns={'등급': 'a교통'}, inplace=True)

In [7]:
화재.sort_index(axis=1, inplace=True)
자연재해.sort_index(axis=1, inplace=True)
범죄.sort_index(axis=1, inplace=True)
자살.sort_index(axis=1, inplace=True)
감염병.sort_index(axis=1, inplace=True)
안전사고.sort_index(axis=1, inplace=True)
교통.sort_index(axis=1, inplace=True)
교통

Unnamed: 0,a교통,시도,시도_지역,지역
0,1,경기도,2015_경기도_(경기),(경기)
1,1,경기도,2015_경기도_수원시,수원시
2,1,경기도,2015_경기도_성남시,성남시
3,1,경기도,2015_경기도_안양시,안양시
4,1,경기도,2015_경기도_부천시,부천시
...,...,...,...,...
303,5,인천광역시,2015_인천광역시_중구,중구
304,5,광주광역시,2015_광주광역시_(광주),(광주)
305,5,광주광역시,2015_광주광역시_동구,동구
306,5,대전광역시,2015_대전광역시_(대전),(대전)


In [8]:
import re
p = re.compile('^\(')

In [9]:
def refine_df(field):
    field.지역 = field['지역'].map(lambda x : None if p.match(x) else x)
    df = field.dropna()
    return df

In [10]:
화재_df = refine_df(화재)
자연재해_df = refine_df(자연재해)
범죄_df = refine_df(범죄)
자살_df = refine_df(자살)
감염병_df = refine_df(감염병)
안전사고_df = refine_df(안전사고)
교통_df = refine_df(교통)
화재_df.shape,자연재해_df.shape,범죄_df.shape,자살_df.shape,감염병_df.shape,안전사고_df.shape,교통_df.shape

((226, 4), (226, 4), (226, 4), (226, 4), (226, 4), (226, 4), (226, 4))

In [11]:
화재_df.drop(['시도','지역'],axis=1, inplace=True)
자연재해_df.drop(['시도','지역'],axis=1, inplace=True)
범죄_df.drop(['시도','지역'],axis=1, inplace=True)
자살_df.drop(['시도','지역'],axis=1, inplace=True)
감염병_df.drop(['시도','지역'],axis=1, inplace=True)
안전사고_df.drop(['시도','지역'],axis=1, inplace=True)
교통_df.drop(['시도','지역'],axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [12]:
total_df = 화재_df.merge(교통_df).merge(자연재해_df).merge(범죄_df).merge(안전사고_df).merge(자살_df).merge(감염병_df)

In [13]:
total_df.shape

(226, 8)

In [14]:
total_df.rename(columns={'a화재':'화재','a교통':'교통','a자연재해':'자연재해','a범죄':'범죄','a안전사고':'안전사고','a자살':'자살','a감염병':'감염병' }, inplace=True)

In [15]:
total_df

Unnamed: 0,화재,시도_지역,교통,자연재해,범죄,안전사고,자살,감염병
0,1,2015_경기도_수원시,1,4,4,1,2,1
1,1,2015_경기도_성남시,1,4,4,2,2,2
2,1,2015_경기도_안양시,1,3,3,2,2,2
3,1,2015_경기도_부천시,1,3,5,1,2,2
4,1,2015_경기도_안산시,2,4,5,1,3,2
...,...,...,...,...,...,...,...,...
221,5,2015_부산광역시_중구,5,3,5,4,5,5
222,5,2015_부산광역시_강서구,5,4,3,5,3,3
223,5,2015_대구광역시_중구,5,1,5,4,4,5
224,5,2015_인천광역시_중구,5,4,3,5,4,4


In [16]:
y = pd.DataFrame(total_df.iloc[:,1])
y

Unnamed: 0,시도_지역
0,2015_경기도_수원시
1,2015_경기도_성남시
2,2015_경기도_안양시
3,2015_경기도_부천시
4,2015_경기도_안산시
...,...
221,2015_부산광역시_중구
222,2015_부산광역시_강서구
223,2015_대구광역시_중구
224,2015_인천광역시_중구


In [17]:
total_df.drop('시도_지역', axis=1,inplace=True)

In [18]:
sigungu_class = pd.concat([y,total_df],axis=1)

In [19]:
sigungu_class.rename(columns={'시도_지역': '지역'}, inplace=True)

In [20]:
sigungu_class.to_csv(path+'시군구등급.csv', encoding='cp949',index=False)

In [21]:
sigungu_class

Unnamed: 0,지역,화재,교통,자연재해,범죄,안전사고,자살,감염병
0,2015_경기도_수원시,1,1,4,4,1,2,1
1,2015_경기도_성남시,1,1,4,4,2,2,2
2,2015_경기도_안양시,1,1,3,3,2,2,2
3,2015_경기도_부천시,1,1,3,5,1,2,2
4,2015_경기도_안산시,1,2,4,5,1,3,2
...,...,...,...,...,...,...,...,...
221,2015_부산광역시_중구,5,5,3,5,4,5,5
222,2015_부산광역시_강서구,5,5,4,3,5,3,3
223,2015_대구광역시_중구,5,5,1,5,4,4,5
224,2015_인천광역시_중구,5,5,4,3,5,4,4
