#### Import Libraries

In [10]:
import os
import numpy as np
import pandas as pd

from copy import copy

#### Fetching Data

In [8]:
CCTV_CSV_PATH = os.path.join(
    os.getcwd(), '..', '60-GEOAI-GEOAIP39', '89_data', 'csv', 'road_cctv_info.csv'
)

COLUMNS = [
    'org_name',         # 관리기관명, Management Organization Name
    'r_addr',           # 소재지 도로명 주소, Road Address
    'l_addr',           # 소재지 지번 주소, Land Parcel Address
    'purp_type',         # 설치목적 구분, Purpose of Installation
    'n_camera',         # 카메라 대수, Number of Cameras
    'r_camera',         # 카메라 화소수, Camera Resolution
    'sd_info',          # 촬영방면정보, Shooting Direction Infomation
    'r_period',         # 보관일수, Retention Period
    'install_ym',       # 설치년월, Installation Year and Month
    'org_phone',        # 관리기관 전화번호, Management Organization Phone Number
    'lat',              # 위도, Latitude
    'lon',              # 경도, Longitude
    'ref_date',         # 데이터기준일자, Data Reference Date
    'porg_cd',          # 제공기관 코드, Providing Organization Code
    'porg_name'         # 제공기관 이름, Providing Organization Name
]

cctv_df = pd.read_csv(CCTV_CSV_PATH, encoding='cp949')
cctv_df.columns = COLUMNS
cctv_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 15 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   org_name    50000 non-null  object 
 1   r_addr      30380 non-null  object 
 2   l_addr      44295 non-null  object 
 3   purp_type   50000 non-null  object 
 4   n_camera    50000 non-null  object 
 5   r_camera    44968 non-null  float64
 6   sd_info     35413 non-null  object 
 7   r_period    48250 non-null  float64
 8   install_ym  37622 non-null  object 
 9   org_phone   50000 non-null  object 
 10  lat         49580 non-null  float64
 11  lon         49585 non-null  float64
 12  ref_date    50000 non-null  object 
 13  porg_cd     50000 non-null  object 
 14  porg_name   50000 non-null  object 
dtypes: float64(4), object(11)
memory usage: 5.7+ MB


In [9]:
cctv_df.head()

Unnamed: 0,org_name,r_addr,l_addr,purp_type,n_camera,r_camera,sd_info,r_period,install_ym,org_phone,lat,lon,ref_date,porg_cd,porg_name
0,경상남도 김해시청,,경상남도 김해시 구산동 1072-19,생활방범,3,200.0,현대병원뒤,30.0,2019-11,055-330-4741,35.249247,128.871639,2019-06-26,5350000,경상남도 김해시
1,경상남도 김해시청,,경상남도 김해시 구산동 305-15,생활방범,2,200.0,목화골공원,30.0,2019-12,055-330-4741,35.247411,128.873605,2019-06-26,5350000,경상남도 김해시
2,경상남도 김해시청,,경상남도 김해시 삼계동 1484-14,생활방범,2,200.0,정원빌라,30.0,2019-06,055-330-4741,35.261728,128.874144,2019-06-26,5350000,경상남도 김해시
3,경상남도 김해시청,,경상남도 김해시 구산동 1043,생활방범,4,200.0,최가아구찜,30.0,2019-05,055-330-4741,35.247076,128.872461,2019-06-26,5350000,경상남도 김해시
4,경상남도 김해시청,,경상남도 김해시 구산동 175-10,생활방범,2,200.0,동호맨션,30.0,2019-12,055-330-4741,35.241185,128.877334,2019-06-26,5350000,경상남도 김해시


#### Preprocessing Data

##### Deep Copy

In [11]:
# deep copy: default
cctv_df_original = cctv_df.copy()

##### 나주시 데이터 추출

In [17]:
cctv_df = cctv_df_original.copy()

In [18]:
cctv_df = cctv_df[cctv_df.org_name.str.contains('김해시청')]

In [19]:
cctv_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 942 entries, 0 to 31517
Data columns (total 15 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   org_name    942 non-null    object 
 1   r_addr      0 non-null      object 
 2   l_addr      942 non-null    object 
 3   purp_type   942 non-null    object 
 4   n_camera    942 non-null    object 
 5   r_camera    942 non-null    float64
 6   sd_info     942 non-null    object 
 7   r_period    942 non-null    float64
 8   install_ym  942 non-null    object 
 9   org_phone   942 non-null    object 
 10  lat         942 non-null    float64
 11  lon         942 non-null    float64
 12  ref_date    942 non-null    object 
 13  porg_cd     942 non-null    object 
 14  porg_name   942 non-null    object 
dtypes: float64(4), object(11)
memory usage: 117.8+ KB


In [21]:
cctv_df.n_camera = cctv_df.n_camera.astype(int)

In [22]:
cctv_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 942 entries, 0 to 31517
Data columns (total 15 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   org_name    942 non-null    object 
 1   r_addr      0 non-null      object 
 2   l_addr      942 non-null    object 
 3   purp_type   942 non-null    object 
 4   n_camera    942 non-null    int32  
 5   r_camera    942 non-null    float64
 6   sd_info     942 non-null    object 
 7   r_period    942 non-null    float64
 8   install_ym  942 non-null    object 
 9   org_phone   942 non-null    object 
 10  lat         942 non-null    float64
 11  lon         942 non-null    float64
 12  ref_date    942 non-null    object 
 13  porg_cd     942 non-null    object 
 14  porg_name   942 non-null    object 
dtypes: float64(4), int32(1), object(10)
memory usage: 114.1+ KB


In [23]:
cctv_df.n_camera.min(), cctv_df.n_camera.max()

(1, 10)