# 서울시 범죄 현황 분석

### 데이터 파악하고 정리하기

In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('font', family='Malgun Gothic')
mpl.rc('axes', unicode_minus=False)

In [8]:
import warnings
warnings.filterwarnings('ignore')

In [12]:
crime = pd.read_csv('data/서울시 5대범죄 발생현황(2019).tsv',
                    sep='\t', skiprows=1, thousands=',')
crime.head()

Unnamed: 0,기간,자치구,발생,검거,발생.1,검거.1,발생.2,검거.2,발생.3,검거.3,발생.4,검거.4,발생.5,검거.5
0,2019,합계,103668,74805,136,128,136,133,6469,6007,42204,21284,54723,47253
1,2019,종로구,3846,4117,4,5,7,8,238,1143,1515,1069,2082,1892
2,2019,중구,4327,2804,2,1,6,5,195,115,2202,1050,1922,1633
3,2019,용산구,3313,2611,3,3,3,4,272,237,999,544,2036,1823
4,2019,성동구,2512,1838,6,5,9,10,133,96,970,511,1394,1216


In [13]:
crime.drop([0], inplace=True)                   # 첫번째 행 삭제 (합계행)
crime.drop(columns=['기간'], inplace=True)      # 기간 열 삭제
crime.head(3)

Unnamed: 0,자치구,발생,검거,발생.1,검거.1,발생.2,검거.2,발생.3,검거.3,발생.4,검거.4,발생.5,검거.5
1,종로구,3846,4117,4,5,7,8,238,1143,1515,1069,2082,1892
2,중구,4327,2804,2,1,6,5,195,115,2202,1050,1922,1633
3,용산구,3313,2611,3,3,3,4,272,237,999,544,2036,1823


In [14]:
crime.rename({
    '자치구':'구별', '발생.1':'살인발생', '검거.1':'살인검거',
    '발생.2':'강도발생', '검거.2':'강도검거',
    '발생.3':'강간발생', '검거.3':'강간검거',
    '발생.4':'절도발생', '검거.4':'절도검거',
    '발생.5':'폭력발생', '검거.5':'폭력검거'
}, inplace=True, axis=1)
crime.head(3)

Unnamed: 0,구별,발생,검거,살인발생,살인검거,강도발생,강도검거,강간발생,강간검거,절도발생,절도검거,폭력발생,폭력검거
1,종로구,3846,4117,4,5,7,8,238,1143,1515,1069,2082,1892
2,중구,4327,2804,2,1,6,5,195,115,2202,1050,1922,1633
3,용산구,3313,2611,3,3,3,4,272,237,999,544,2036,1823


In [15]:
crime['살인검거율'] = np.round(crime['살인검거']/crime['살인발생']*100, 2)
crime['강도검거율'] = np.round(crime['강도검거']/crime['강도발생']*100, 2)
crime['강간검거율'] = np.round(crime['강간검거']/crime['강간발생']*100, 2)
crime['절도검거율'] = np.round(crime['절도검거']/crime['절도발생']*100, 2)
crime['폭력검거율'] = np.round(crime['폭력검거']/crime['폭력발생']*100, 2)
crime.drop(columns=['발생','검거','살인검거','강도검거','강간검거','절도검거','폭력검거'],
           inplace=True)
crime.head(3)

Unnamed: 0,구별,살인발생,강도발생,강간발생,절도발생,폭력발생,살인검거율,강도검거율,강간검거율,절도검거율,폭력검거율
1,종로구,4,7,238,1515,2082,125.0,114.29,480.25,70.56,90.87
2,중구,2,6,195,2202,1922,50.0,83.33,58.97,47.68,84.96
3,용산구,3,3,272,999,2036,100.0,133.33,87.13,54.45,89.54


In [16]:
# 검거율이 100을 넘으면 100으로 처리
col_list = '살인검거율	강도검거율	강간검거율	절도검거율	폭력검거율'.split()
for col in col_list:
#    for i in crime.index:
#        crime[col][i] = 100. if crime[col][i] > 100. else crime[col][i]
    crime.loc[crime[col] > 100, col] = 100.
crime.head(3)

Unnamed: 0,구별,살인발생,강도발생,강간발생,절도발생,폭력발생,살인검거율,강도검거율,강간검거율,절도검거율,폭력검거율
1,종로구,4,7,238,1515,2082,100.0,100.0,100.0,70.56,90.87
2,중구,2,6,195,2202,1922,50.0,83.33,58.97,47.68,84.96
3,용산구,3,3,272,999,2036,100.0,100.0,87.13,54.45,89.54


In [17]:
df = pd.read_csv('data/서울시_CCTV_인구_병합데이터.csv')
df.head()

Unnamed: 0,구별,CCTV댓수,최근증가율,인구수,외국인,고령자,외국인비율,고령자비율
0,종로구,1772,20.49,155106,9077,27605,5.85,17.8
1,중구,2333,78.98,132259,9243,23980,6.99,18.13
2,용산구,2383,78.64,240665,14287,38884,5.94,16.16
3,성동구,3602,46.13,295767,6605,45591,2.23,15.41
4,광진구,2588,112.29,355306,12825,50815,3.61,14.3


In [19]:
print(crime['구별'])
print(df['구별'])

1      종로구
2       중구
3      용산구
4      성동구
5      광진구
6     동대문구
7      중랑구
8      성북구
9      강북구
10     도봉구
11     노원구
12     은평구
13    서대문구
14     마포구
15     양천구
16     강서구
17     구로구
18     금천구
19    영등포구
20     동작구
21     관악구
22     서초구
23     강남구
24     송파구
25     강동구
Name: 구별, dtype: object
0      종로구
1       중구
2      용산구
3      성동구
4      광진구
5     동대문구
6      중랑구
7      성북구
8      강북구
9      도봉구
10     노원구
11     은평구
12    서대문구
13     마포구
14     양천구
15     강서구
16     구로구
17     금천구
18    영등포구
19     동작구
20     관악구
21     서초구
22     강남구
23     송파구
24     강동구
Name: 구별, dtype: object
