# 시리즈(Series)
- 복수의 행(row)으로 이루어진 하나의 열(column)구조
- 색인(index)을 가지고 원하는 데이터에 접근가능
- 자동으로 색인 생성

In [1]:
import pandas as pd
pd.Series([1,4,7,9])

0    1
1    4
2    7
3    9
dtype: int64

### 인덱싱

In [2]:
x=pd.Series([1,7,4,9], index=['서울','대전','대구','부산'])

In [3]:
x['부산']

9

In [4]:
x[['대구','부산']]

대구    4
부산    9
dtype: int64

In [5]:
print(x.index)
print(x.values)

Index(['서울', '대전', '대구', '부산'], dtype='object')
[1 7 4 9]


In [6]:
sorted(x)
print(sorted(x))
sorted(x.index)
print(x.index)

[1, 4, 7, 9]
Index(['서울', '대전', '대구', '부산'], dtype='object')


In [7]:
x.reindex(sorted(['서울', '대전', '대구', '부산']))

대구    4
대전    7
부산    9
서울    1
dtype: int64

In [8]:
x=pd.Series([1,7,4,9], index=['서울','대전','대구','부산'])
y=pd.Series([3,8,11,19], index=['대구','서울','광주','부산'])
x+y

광주     NaN
대구     7.0
대전     NaN
부산    28.0
서울     9.0
dtype: float64

### unique()

In [26]:
medal = [1,3,2,4,2,5]
x=pd.Series(medal)
y=pd.unique(x)
print(y)

[1 3 2 4 5]


In [23]:
a=pd.unique(medal)
print(a)

[1 3 2 4 5]


### dict을 Series 으로 변환

In [27]:
dic = {"a":1,"b":10,"c":20}
pd.Series(dic)

a     1
b    10
c    20
dtype: int64

## 데이터프레임(DataFrame)

### Series 로 생성

In [34]:
ser_1 = pd.Series({"a":1,"b":10,"c":20})
ser_2 = pd.Series({"a":2,"b":5,"c":5})
ser_3 = pd.Series({"a":3,"b":11,"c":8})

pd.concat([ser_1,ser_2,ser_3], axis=1)

Unnamed: 0,0,1,2
a,1,2,3
b,10,5,11
c,20,5,8


### dataframe 생성

In [42]:
data = {'age':[23,43,12,45],
        'name':['민준','현우','서연','동현'],
        'height':[175.3,180.3,165.8,172.7]}
x = pd.DataFrame(data, columns=['name','age','height'])
x

Unnamed: 0,name,age,height
0,민준,23,175.3
1,현우,43,180.3
2,서연,12,165.8
3,동현,45,172.7


### 슬라이싱

In [45]:
x.iloc[1]

name         현우
age          43
height    180.3
Name: 1, dtype: object

In [51]:
x['name']

0    민준
1    현우
2    서연
3    동현
Name: name, dtype: object

### head() , tail()
- default : 5개

In [52]:
ary = [[1,2],[2,3],[4,2],[5,3],[6,2],[2,5],[1,1],[5,4],[2,4]]

In [55]:
data = pd.DataFrame(ary, columns=['first','second'])
data

Unnamed: 0,first,second
0,1,2
1,2,3
2,4,2
3,5,3
4,6,2
5,2,5
6,1,1
7,5,4
8,2,4


In [56]:
data.head()

Unnamed: 0,first,second
0,1,2
1,2,3
2,4,2
3,5,3
4,6,2


In [58]:
data.tail(3)

Unnamed: 0,first,second
6,1,1
7,5,4
8,2,4


In [16]:
import pandas as pd
df=pd.read_csv('경찰청_졸음운전 교통사고 현황_20191231.csv',engine='python',encoding='CP949')
df.isnull()

Unnamed: 0,구분,도로종류,사고(건),사망(명),부상(명)
0,False,False,False,False,False
1,False,False,False,False,False
2,False,False,False,False,False
3,False,False,False,False,False
4,False,False,False,False,False
5,False,False,True,False,False
6,False,False,False,False,False
7,False,False,False,False,False
8,False,False,False,False,True
9,False,False,False,False,False


In [18]:
df=pd.read_csv('경찰청_졸음운전 교통사고 현황_20191231.csv',engine='python',encoding='CP949')
drop_data = df.dropna()
drop_data

Unnamed: 0,구분,도로종류,사고(건),사망(명),부상(명)
0,2017년,일반국도,363.0,19,698.0
1,2017년,지방도,230.0,10,451.0
2,2017년,특별광역시도,590.0,12,1189.0
3,2017년,시도,577.0,11,1125.0
4,2017년,군도,101.0,3,169.0
6,2017년,기타,21.0,0,41.0
7,2018년,일반국도,249.0,18,487.0
9,2018년,특별광역시도,329.0,4,710.0
10,2018년,시도,355.0,2,698.0
11,2018년,군도,71.0,1,115.0


In [20]:
df=pd.read_csv('경찰청_졸음운전 교통사고 현황_20191231.csv',engine='python',encoding='CP949')
drop_data = df.dropna(subset=['사고(건)'])
drop_data

Unnamed: 0,구분,도로종류,사고(건),사망(명),부상(명)
0,2017년,일반국도,363.0,19,698.0
1,2017년,지방도,230.0,10,451.0
2,2017년,특별광역시도,590.0,12,1189.0
3,2017년,시도,577.0,11,1125.0
4,2017년,군도,101.0,3,169.0
6,2017년,기타,21.0,0,41.0
7,2018년,일반국도,249.0,18,487.0
8,2018년,지방도,170.0,11,
9,2018년,특별광역시도,329.0,4,710.0
10,2018년,시도,355.0,2,698.0


In [30]:
from numpy import NaN

df=pd.read_csv('경찰청_졸음운전 교통사고 현황_20191231.csv',engine='python',encoding='CP949')
print(df['사고(건)'].mean())
mean = df['사고(건)'].mean()
drop_data = df.replace(NaN,mean)
drop_data

276.42105263157896


Unnamed: 0,구분,도로종류,사고(건),사망(명),부상(명)
0,2017년,일반국도,363.0,19,698.0
1,2017년,지방도,230.0,10,451.0
2,2017년,특별광역시도,590.0,12,1189.0
3,2017년,시도,577.0,11,1125.0
4,2017년,군도,101.0,3,169.0
5,2017년,고속국도,276.421053,22,273.0
6,2017년,기타,21.0,0,41.0
7,2018년,일반국도,249.0,18,487.0
8,2018년,지방도,170.0,11,276.421053
9,2018년,특별광역시도,329.0,4,710.0


In [35]:
df=pd.read_csv('경찰청_졸음운전 교통사고 현황_20191231.csv',engine='python',encoding='CP949')
mean=df['부상(명)'].mean()
re_df = df.replace({'부상(명)':NaN},{'부상(명)':mean})
re_df

Unnamed: 0,구분,도로종류,사고(건),사망(명),부상(명)
0,2017년,일반국도,363.0,19,698.0
1,2017년,지방도,230.0,10,451.0
2,2017년,특별광역시도,590.0,12,1189.0
3,2017년,시도,577.0,11,1125.0
4,2017년,군도,101.0,3,169.0
5,2017년,고속국도,,22,273.0
6,2017년,기타,21.0,0,41.0
7,2018년,일반국도,249.0,18,487.0
8,2018년,지방도,170.0,11,585.15
9,2018년,특별광역시도,329.0,4,710.0


In [36]:
re_df.describe()

Unnamed: 0,사고(건),사망(명),부상(명)
count,19.0,21.0,21.0
mean,276.421053,11.0,585.15
std,226.393295,8.282512,477.019735
min,21.0,0.0,41.0
25%,110.0,3.0,206.0
50%,230.0,11.0,487.0
75%,359.0,18.0,710.0
max,876.0,29.0,1629.0


### DataFrame.fillna(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None)

In [40]:
df['사고(건)'].min()

21.0

In [41]:
df['부상(명)'].min()

41.0

In [42]:
df['사망(명)'].min()

0

In [61]:
df = df.fillna(df.min()['부상(명)'])
df

Unnamed: 0,구분,도로종류,사고(건),사망(명),부상(명)
0,2017년,일반국도,363.0,19,698.0
1,2017년,지방도,230.0,10,451.0
2,2017년,특별광역시도,590.0,12,1189.0
3,2017년,시도,577.0,11,1125.0
4,2017년,군도,101.0,3,169.0
5,2017년,고속국도,41.0,22,273.0
6,2017년,기타,21.0,0,41.0
7,2018년,일반국도,249.0,18,487.0
8,2018년,지방도,170.0,11,41.0
9,2018년,특별광역시도,329.0,4,710.0
