In [1]:
import numpy as np
import pandas as pd

* Series 생성

In [4]:
s = pd.Series([9904312, 3448737, 2890451, 2466052],
            index=["서울", "부산", "인천", "대구"])
s

서울    9904312
부산    3448737
인천    2890451
대구    2466052
dtype: int64

In [5]:
s[0], s['서울']

(9904312, 9904312)

In [6]:
s[1:3]

부산    3448737
인천    2890451
dtype: int64

In [7]:
s[s > 3000000]

서울    9904312
부산    3448737
dtype: int64

In [8]:
mask = s > 3000000
s[mask]

서울    9904312
부산    3448737
dtype: int64

In [9]:
cities = {'서울' : 9631482, '부산' : 3393191, '인천' : 2632035, '대전' : 1490158 }
s2 = pd.Series(cities)
s2

서울    9631482
부산    3393191
인천    2632035
대전    1490158
dtype: int64

In [11]:
ds = s - s2
ds

대구         NaN
대전         NaN
부산     55546.0
서울    272830.0
인천    258416.0
dtype: float64

* 시리즈 속성

In [12]:
s.values   # numpy array

array([9904312, 3448737, 2890451, 2466052], dtype=int64)

In [13]:
s.index

Index(['서울', '부산', '인천', '대구'], dtype='object')

In [14]:
for city in s.index:
    print(city)

서울
부산
인천
대구


* 시리즈 메소드

In [15]:
np.random.seed(2021)
dice = pd.Series(np.random.randint(1, 7, 100))

In [16]:
# element의 갯수
dice.count()

100

In [17]:
# 고유한 값의 갯수
dice.unique()

array([5, 6, 2, 1, 4, 3])

In [18]:
# 고유한 값(카테고리)별 갯수 ⭐⭐⭐⭐⭐
dice.value_counts()

2    21
6    21
3    19
4    18
5    13
1     8
dtype: int64

In [19]:
# 합, 평균, 최대, 최소, 표준편차
dice.sum(), dice.mean(), dice.max(), dice.min(), dice.std()


(370, 3.7, 6, 1, 1.6298974374376005)

In [20]:
# 요약 통계
dice.describe()

count    100.000000
mean       3.700000
std        1.629897
min        1.000000
25%        2.000000
50%        4.000000
75%        5.000000
max        6.000000
dtype: float64

In [22]:
# 정렬
s.sort_values(ascending=False)
s.sort_values()[::-1]

서울    9904312
부산    3448737
인천    2890451
대구    2466052
dtype: int64

In [38]:
array = np.array([1, 2, 3, 4, 5, 6])
array[::-1]

array([6, 5, 4, 3, 2, 1])

# DataFrame

In [2]:
import numpy as np
import pandas as pd

In [3]:
kor = [80, 90, 70, 30]            # list
eng = np.array([90, 70, 60, 40])  # np.array
math = (90, 60, 80, 70)           # tuple

In [4]:
df = pd.DataFrame({
    '국어' : kor, '영어' : eng, '수학' : math
}, index = ['춘향', '몽룡', '향단', '방자']
)
df

Unnamed: 0,국어,영어,수학
춘향,80,90,90
몽룡,90,70,60
향단,70,60,80
방자,30,40,70


In [5]:
np.random.seed(2021)
df2 = pd.DataFrame(np.random.randint(40, 101, 12).reshape(4, 3),
                    columns = ['국어', '영어', '수학'],
                    index = ['춘향', '몽룡', '향단', '방자'])
df2

Unnamed: 0,국어,영어,수학
춘향,92,61,97
몽룡,40,85,70
향단,62,84,67
방자,69,61,69


In [7]:
# column을 먼저 쓰고 index를 나중에

df['국어']['춘향']
df.국어['춘향']
df.국어[0]
df['국어'][0]
df.국어.춘향

80

In [8]:
df['국어']

춘향    80
몽룡    90
향단    70
방자    30
Name: 국어, dtype: int64

In [9]:
df.영어[1:3]

몽룡    70
향단    60
Name: 영어, dtype: int32

In [10]:
# 모든 학생의 수학 점수를 시리즈로 나타낸다
df['수학']

춘향    90
몽룡    60
향단    80
방자    70
Name: 수학, dtype: int64

In [11]:
# 모든 학생의 국어와 영어 점수를 데이터프레임으로 나타낸다
df[['국어', '영어']]

Unnamed: 0,국어,영어
춘향,80,90
몽룡,90,70
향단,70,60
방자,30,40


In [14]:
# 모든 학생의 각 과목 평균 점수를 새로운 열로 추가한다
df['평균'] = df.mean(axis = 1)
df

Unnamed: 0,국어,영어,수학,평균
춘향,80,90,90,86.666667
몽룡,90,70,60,73.333333
향단,70,60,80,70.0
방자,30,40,70,46.666667


In [15]:
# 방자의 영어 점수를 80점으로 수정하고 평균 점수도 다시 계산한다
df['영어']['방자'] = 80
df['평균'] = df.mean(axis = 1)
df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['영어']['방자'] = 80


Unnamed: 0,국어,영어,수학,평균
춘향,80,90,90,86.666667
몽룡,90,70,60,73.333333
향단,70,60,80,70.0
방자,30,80,70,56.666667


In [23]:
# 춘향의 점수를 DataFrame으로 나타낸다
df[0:1]
df.iloc[:1]

Unnamed: 0,국어,영어,수학,평균
춘향,80,90,90,86.666667


In [30]:
# 향단의 점수를 시리즈로 나타낸다.
df.loc['향단']

# transpose 후 조회
df.T['향단']

국어    70.0
영어    60.0
수학    80.0
평균    70.0
Name: 향단, dtype: float64

In [33]:
# 평균 열 삭제
df2 = df.copy()
del df2['평균']
df2

Unnamed: 0,국어,영어,수학
춘향,80,90,90
몽룡,90,70,60
향단,70,60,80
방자,30,80,70


In [35]:
df2.drop(columns = ['영어', '수학'])

Unnamed: 0,국어
춘향,80
몽룡,90
향단,70
방자,30


In [36]:
df2

Unnamed: 0,국어,영어,수학
춘향,80,90,90
몽룡,90,70,60
향단,70,60,80
방자,30,80,70


In [38]:
# 자기 자신을 변화시키려면 inplace = True 옵션을 주어야 한다.

df2.drop(columns = ['영어', '수학'], inplace = True)

KeyError: "['영어' '수학'] not found in axis"

In [39]:
df2

Unnamed: 0,국어
춘향,80
몽룡,90
향단,70
방자,30


In [40]:
df2.drop(index=['방자'], inplace = True)
df2

Unnamed: 0,국어
춘향,80
몽룡,90
향단,70


* loc, iloc 인덱서

In [45]:
df.drop(columns = ['평균'], inplace = True)
df

Unnamed: 0,국어,영어,수학
춘향,80,90,90
몽룡,90,70,60
향단,70,60,80
방자,30,80,70


In [46]:
# 춘향, 몽룡의 영어, 수학 점수
df.loc['춘향':'몽룡', '영어':] 

Unnamed: 0,영어,수학
춘향,90,90
몽룡,70,60


In [47]:
# 몽룡, 향단의 영어 수학점수
df.iloc[1:3, 1:]

Unnamed: 0,영어,수학
몽룡,70,60
향단,60,80


In [48]:
# 몽룡, 향단의 국어, 수학 점수
df.iloc[1:3, ::2]

Unnamed: 0,국어,수학
몽룡,90,60
향단,70,80


### 3. 데이터프레임 데이터 조작

* 탐색을 위한 메소드

In [51]:
import seaborn as sns
titanic = sns.load_dataset('titanic')
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [52]:
titanic.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 15 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   survived     891 non-null    int64   
 1   pclass       891 non-null    int64   
 2   sex          891 non-null    object  
 3   age          714 non-null    float64 
 4   sibsp        891 non-null    int64   
 5   parch        891 non-null    int64   
 6   fare         891 non-null    float64 
 7   embarked     889 non-null    object  
 8   class        891 non-null    category
 9   who          891 non-null    object  
 10  adult_male   891 non-null    bool    
 11  deck         203 non-null    category
 12  embark_town  889 non-null    object  
 13  alive        891 non-null    object  
 14  alone        891 non-null    bool    
dtypes: bool(2), category(2), float64(2), int64(4), object(5)
memory usage: 80.7+ KB


In [53]:
# 기초 통계
titanic.describe()

Unnamed: 0,survived,pclass,age,sibsp,parch,fare
count,891.0,891.0,714.0,891.0,891.0,891.0
mean,0.383838,2.308642,29.699118,0.523008,0.381594,32.204208
std,0.486592,0.836071,14.526497,1.102743,0.806057,49.693429
min,0.0,1.0,0.42,0.0,0.0,0.0
25%,0.0,2.0,20.125,0.0,0.0,7.9104
50%,0.0,3.0,28.0,0.0,0.0,14.4542
75%,1.0,3.0,38.0,1.0,0.0,31.0
max,1.0,3.0,80.0,8.0,6.0,512.3292


In [55]:
# 숫자로만 보면 큰일나는 데이터 (산포도를 확인 할 것)
ans = sns.load_dataset('anscombe')
ans.head()

Unnamed: 0,dataset,x,y
0,I,10.0,8.04
1,I,8.0,6.95
2,I,13.0,7.58
3,I,9.0,8.81
4,I,11.0,8.33


In [56]:
ans.groupby('dataset').describe()

Unnamed: 0_level_0,x,x,x,x,x,x,x,x,y,y,y,y,y,y,y,y
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
dataset,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
I,11.0,9.0,3.316625,4.0,6.5,9.0,11.5,14.0,11.0,7.500909,2.031568,4.26,6.315,7.58,8.57,10.84
II,11.0,9.0,3.316625,4.0,6.5,9.0,11.5,14.0,11.0,7.500909,2.031657,3.1,6.695,8.14,8.95,9.26
III,11.0,9.0,3.316625,4.0,6.5,9.0,11.5,14.0,11.0,7.5,2.030424,5.39,6.25,7.11,7.98,12.74
IV,11.0,9.0,3.316625,8.0,8.0,8.0,8.0,19.0,11.0,7.500909,2.030579,5.25,6.17,7.04,8.19,12.5


* 산술 메소드

In [50]:
np.random.seed(2021)
df = pd.DataFrame(np.random.randint(40, 101, 20).reshape(4, 5),
                  index = list('wxyz'), columns = list('ABCDE'))
df

Unnamed: 0,A,B,C,D,E
w,92,61,97,40,85
x,70,62,84,67,69
y,61,69,64,52,94
z,46,78,46,73,77


In [58]:
# axis = 0이 default
# numpy의 경우 sum이 전체의 sum이지만
# DataFrame의 경우 sum이 열 단위 혹은 행 단위의 sum이다
df.sum(axis = 1)

w    375
x    352
y    340
z    320
dtype: int64

In [60]:
df['평균'] = df.mean(axis = 1)
df

Unnamed: 0,A,B,C,D,E,평균
w,92,61,97,40,85,75.0
x,70,62,84,67,69,70.4
y,61,69,64,52,94,68.0
z,46,78,46,73,77,64.0


* 결측치 처리

In [61]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [64]:
titanic.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 15 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   survived     891 non-null    int64   
 1   pclass       891 non-null    int64   
 2   sex          891 non-null    object  
 3   age          714 non-null    float64 
 4   sibsp        891 non-null    int64   
 5   parch        891 non-null    int64   
 6   fare         891 non-null    float64 
 7   embarked     889 non-null    object  
 8   class        891 non-null    category
 9   who          891 non-null    object  
 10  adult_male   891 non-null    bool    
 11  deck         203 non-null    category
 12  embark_town  889 non-null    object  
 13  alive        891 non-null    object  
 14  alone        891 non-null    bool    
dtypes: bool(2), category(2), float64(2), int64(4), object(5)
memory usage: 80.7+ KB


In [62]:
# 데이터 전체 중에 결측치가 있는지 확인
titanic.isna().sum().sum()

869

In [63]:
# 어느 열에서 결측치가 있는지 확인
titanic.isnull().sum()

survived         0
pclass           0
sex              0
age            177
sibsp            0
parch            0
fare             0
embarked         2
class            0
who              0
adult_male       0
deck           688
embark_town      2
alive            0
alone            0
dtype: int64

In [65]:
# 대체하는 방법 - 카테고리형일 경우에는 최빈값
titanic.embark_town.value_counts()

Southampton    644
Cherbourg      168
Queenstown      77
Name: embark_town, dtype: int64

In [66]:
titanic.embark_town = titanic.embark_town.fillna('Southampton')
titanic.embarked = titanic.embarked.fillna('S')
titanic.embarked.isna().sum()

0

In [69]:
titanic['age'].mean()

29.69911764705882

In [72]:
# age를 평균으로 대체

titanic['age'] = titanic['age'].fillna(titanic['age'].mean(), inplace = True)
titanic['age'].isna().sum()


0

In [71]:
titanic.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 15 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   survived     891 non-null    int64   
 1   pclass       891 non-null    int64   
 2   sex          891 non-null    object  
 3   age          891 non-null    float64 
 4   sibsp        891 non-null    int64   
 5   parch        891 non-null    int64   
 6   fare         891 non-null    float64 
 7   embarked     891 non-null    object  
 8   class        891 non-null    category
 9   who          891 non-null    object  
 10  adult_male   891 non-null    bool    
 11  deck         203 non-null    category
 12  embark_town  891 non-null    object  
 13  alive        891 non-null    object  
 14  alone        891 non-null    bool    
dtypes: bool(2), category(2), float64(2), int64(4), object(5)
memory usage: 80.7+ KB


In [74]:
titanic.dropna(axis = 1, inplace=True)
titanic

# 또는
titanic.drop(columns=['deck'], inplace=True)

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,embark_town,alive,alone
0,0,3,male,22.000000,1,0,7.2500,S,Third,man,True,Southampton,no,False
1,1,1,female,38.000000,1,0,71.2833,C,First,woman,False,Cherbourg,yes,False
2,1,3,female,26.000000,0,0,7.9250,S,Third,woman,False,Southampton,yes,True
3,1,1,female,35.000000,1,0,53.1000,S,First,woman,False,Southampton,yes,False
4,0,3,male,35.000000,0,0,8.0500,S,Third,man,True,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.000000,0,0,13.0000,S,Second,man,True,Southampton,no,True
887,1,1,female,19.000000,0,0,30.0000,S,First,woman,False,Southampton,yes,True
888,0,3,female,29.699118,1,2,23.4500,S,Third,woman,False,Southampton,no,False
889,1,1,male,26.000000,0,0,30.0000,C,First,man,True,Cherbourg,yes,True


In [75]:
titanic.isnull().sum()

survived       0
pclass         0
sex            0
age            0
sibsp          0
parch          0
fare           0
embarked       0
class          0
who            0
adult_male     0
embark_town    0
alive          0
alone          0
dtype: int64

* 데이터 타입 변경

In [82]:
df['평균'].astype(int)
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, w to z
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   A       4 non-null      int32  
 1   B       4 non-null      int32  
 2   C       4 non-null      int32  
 3   D       4 non-null      int32  
 4   E       4 non-null      int32  
 5   평균      4 non-null      float64
dtypes: float64(1), int32(5)
memory usage: 316.0+ bytes


* apply 메소드

In [84]:
df3 = pd.DataFrame({
 'A': [1, 3, 4, 3, 4],
 'B': [2, 3, 1, 2, 3],
 'C': [1, 5, 2, 4, 4]
})

df3

Unnamed: 0,A,B,C
0,1,2,1
1,3,3,5
2,4,1,2
3,3,2,4
4,4,3,4


In [85]:
df3.apply(lambda x : x.max() - x.min())

A    3
B    2
C    4
dtype: int64

In [87]:
# 행단위로 주고 싶을 경우

df3.apply(lambda x : x.max() - x.min(), axis = 1)

0    1
1    2
2    3
3    2
4    1
dtype: int64

In [108]:
# 각 행별로 최댓값에서 최솟값을 뺀 결과를 '편차'라는 이름의 열로 만든다

df['편차'] = df.apply(lambda x : x.max() - x.min(), axis = 1)
df

Unnamed: 0,A,B,C,D,E,평균,편차
w,92,61,97,40,85,75.0,57.0
x,70,62,84,67,69,70.4,22.0
y,61,69,64,52,94,68.0,42.0
z,46,78,46,73,77,64.0,32.0


In [119]:
# 평균이 70점 이상이면 pass, 아니면 fail인 '결과'열을 추가

df['결과'] = df['평균'].apply(lambda x : 'PASS' if x >= 70 else 'FAIL')
df

Unnamed: 0,A,B,C,D,E,평균,편차,결과
w,92,61,97,40,85,75.0,57.0,PASS
x,70,62,84,67,69,70.4,22.0,PASS
y,61,69,64,52,94,68.0,42.0,FAIL
z,46,78,46,73,77,64.0,32.0,FAIL


In [120]:
# 다른 방법
df['결과2'] = df.apply(lambda x : '합격' if x['평균'] >= 70 else '불합격', axis = 1)
df

Unnamed: 0,A,B,C,D,E,평균,편차,결과,결과2
w,92,61,97,40,85,75.0,57.0,PASS,합격
x,70,62,84,67,69,70.4,22.0,PASS,합격
y,61,69,64,52,94,68.0,42.0,FAIL,불합격
z,46,78,46,73,77,64.0,32.0,FAIL,불합격


In [111]:
def func2(x):
    if x['평균'] >= 70:
        return '합격'
    else:
        return '불합격'

In [122]:
df1 = df.drop('편차', axis = 1)
df1

Unnamed: 0,A,B,C,D,E,평균,결과,결과2
w,92,61,97,40,85,75.0,PASS,합격
x,70,62,84,67,69,70.4,PASS,합격
y,61,69,64,52,94,68.0,FAIL,불합격
z,46,78,46,73,77,64.0,FAIL,불합격


In [121]:
df2 = df.drop(['결과', '결과2'], axis = 1)
df2

Unnamed: 0,A,B,C,D,E,평균,편차
w,92,61,97,40,85,75.0,57.0
x,70,62,84,67,69,70.4,22.0
y,61,69,64,52,94,68.0,42.0
z,46,78,46,73,77,64.0,32.0


* 데이터 타입 변경

In [126]:
# 1000 단위 구분기호 없애기

price_list = []
for i in range(12):
    price = np.random.randint(100, 1000, 1)[0] * 100  # 값이 array로 튀어나와서 [0]을 준 것
    price_list.append(f'{price:,d}')
price_list

['46,600',
 '58,100',
 '48,000',
 '84,100',
 '51,000',
 '80,600',
 '58,300',
 '16,300',
 '66,100',
 '75,600',
 '79,000',
 '92,200']

In [143]:
df = pd.DataFrame(np.array(price_list).reshape(3, 4), columns = list('ABCD'))
df

Unnamed: 0,A,B,C,D
0,46600,58100,48000,84100
1,51000,80600,58300,16300
2,66100,75600,79000,92200


In [132]:
df['A'] = df['A'].apply(lambda x : int(x.replace(',','')))
df

Unnamed: 0,A,B,C,D
0,46600,58100,48000,84100
1,51000,80600,58300,16300
2,66100,75600,79000,92200


In [144]:
for column in df.columns:
    df[column] = df[column].apply(lambda x : int(x.replace(',','')))
df


Unnamed: 0,A,B,C,D
0,46600,58100,48000,84100
1,51000,80600,58300,16300
2,66100,75600,79000,92200


In [138]:
cctv = pd.read_csv('서울시CCTV설치운영현황(자치구)_년도별_210731기준.csv',
                    skiprows = 1, encoding = 'euc-kr')
cctv.head()

Unnamed: 0,구분,총계,2012년 이전,2012년,2013년,2014년,2015년,2016년,2017년,2018년,2019년,2020년,2021년
0,계,77032,7667,2200,3491,4439,6582,8129,9947,9876,11961,11132,1608
1,종로구,1772,813,0,0,210,150,1,261,85,9,200,43
2,중 구,2333,16,114,87,77,236,240,372,386,155,361,289
3,용산구,2383,34,71,234,125,221,298,351,125,307,617,0
4,성동구,3602,448,125,212,105,339,310,874,390,262,461,76


In [137]:
cctv['총계'] = cctv['총계'].apply(lambda x : int(x.replace(",","")))
cctv

Unnamed: 0,구분,총계,2012년 이전,2012년,2013년,2014년,2015년,2016년,2017년,2018년,2019년,2020년,2021년
0,계,77032,7667,2200,3491,4439,6582,8129,9947,9876,11961,11132,1608
1,종로구,1772,813,0,0,210,150,1,261,85,9,200,43
2,중 구,2333,16,114,87,77,236,240,372,386,155,361,289
3,용산구,2383,34,71,234,125,221,298,351,125,307,617,0
4,성동구,3602,448,125,212,105,339,310,874,390,262,461,76
5,광진구,2588,35,57,100,187,98,52,675,465,712,175,32
6,동대문구,2497,1090,146,60,29,111,233,136,197,209,223,63
7,중랑구,3296,302,24,253,88,141,161,162,173,1049,939,4
8,성북구,3958,83,78,170,230,323,594,460,867,714,251,188
9,강북구,2462,0,0,24,65,105,243,6,392,1000,588,39


In [147]:
for column in cctv.columns[1:]:
    cctv[column] = cctv[column].apply(lambda x : int(x.replace(',','')))
cctv

Unnamed: 0,구분,총계,2012년 이전,2012년,2013년,2014년,2015년,2016년,2017년,2018년,2019년,2020년,2021년
0,계,77032,7667,2200,3491,4439,6582,8129,9947,9876,11961,11132,1608
1,종로구,1772,813,0,0,210,150,1,261,85,9,200,43
2,중 구,2333,16,114,87,77,236,240,372,386,155,361,289
3,용산구,2383,34,71,234,125,221,298,351,125,307,617,0
4,성동구,3602,448,125,212,105,339,310,874,390,262,461,76
5,광진구,2588,35,57,100,187,98,52,675,465,712,175,32
6,동대문구,2497,1090,146,60,29,111,233,136,197,209,223,63
7,중랑구,3296,302,24,253,88,141,161,162,173,1049,939,4
8,성북구,3958,83,78,170,230,323,594,460,867,714,251,188
9,강북구,2462,0,0,24,65,105,243,6,392,1000,588,39


In [148]:
cctv.columns[1:]

Index(['총계', '2012년 이전', '2012년', '2013년', '2014년', '2015년', '2016년', '2017년',
       '2018년', '2019년', '2020년', '2021년'],
      dtype='object')