# PYTHON PROGRAMMING FUNDAMENTALS  


# Pandas 의 장점
- Allows the use of labels for rows and columns
- 기본적인 통계데이터 제공
- NaN values 를 알아서 처리함.
- 숫자 문자열을 알아서 로드함.
- 데이터셋들을 merge 할 수 있음.
- It integrates with NumPy and Matplotlib

In [1]:
import pandas as pd

## Pandas Series 데이터 생성하기
# Series 는 하나의 row만 있고 레이블당 하나의 값만 있음, 여러개면 데이터 프레임이라고 부름

In [86]:
groceries = pd.Series(data = [30, 6,'Yes', 'No'], 
                      index = ['eggs', 'apples', 'milk', 'bread'])
# index 는 데이터 레이블링

In [87]:
groceries

eggs       30
apples      6
milk      Yes
bread      No
dtype: object

In [88]:
groceries.shape
# 1차원임

(4,)

In [89]:
groceries.index

Index(['eggs', 'apples', 'milk', 'bread'], dtype='object')

In [90]:
x = 'bananas' in groceries

In [91]:
x

False

## Accessing and Deleting Elements in Pandas Series - 레이블과 인덱스

In [92]:
groceries['eggs']

30

In [93]:
groceries[['milk', 'bread']]

milk     Yes
bread     No
dtype: object

In [94]:
# loc 레이블로 접근하기
groceries.loc[['eggs', 'apples']]

eggs      30
apples     6
dtype: object

In [95]:
groceries

eggs       30
apples      6
milk      Yes
bread      No
dtype: object

In [96]:
# iloc 인덱스로 접근하기
groceries.iloc[0]

30

In [97]:
groceries.iloc[[0, 1]]

eggs      30
apples     6
dtype: object

In [98]:
# 특정 레이블(eggs)의 값을 바꾸기
groceries['eggs'] = 2

In [99]:
groceries

eggs        2
apples      6
milk      Yes
bread      No
dtype: object

In [100]:
groceries.iloc[0] = 50

In [101]:
groceries

eggs       50
apples      6
milk      Yes
bread      No
dtype: object

In [102]:
groceries.loc['eggs'] = 30

In [103]:
groceries

eggs       30
apples      6
milk      Yes
bread      No
dtype: object

In [104]:
# apples 의 항목을 아예 제외하고 본다.
groceries.drop('apples')

eggs      30
milk     Yes
bread     No
dtype: object

In [105]:
# drop가 원본에서 삭제하지 않으므로 새로운 변수로 저장해서 사용필요
new_groceries = groceries.drop('apples')
new_groceries

eggs      30
milk     Yes
bread     No
dtype: object

In [106]:
# 원본에서 지우고 싶다면?
groceries.drop('bread', inplace = True)

In [107]:
groceries

eggs       30
apples      6
milk      Yes
dtype: object

## Arithmetic Operations on Pandas Series

In [108]:
fruits = pd.Series(data = [10, 6, 3], index = ['apples', 'oranges', 'bananas'])

In [109]:
fruits

apples     10
oranges     6
bananas     3
dtype: int64

In [110]:
fruits + 2

apples     12
oranges     8
bananas     5
dtype: int64

In [111]:
fruits - 2

apples     8
oranges    4
bananas    1
dtype: int64

In [112]:
fruits / 2

apples     5.0
oranges    3.0
bananas    1.5
dtype: float64

In [113]:
import numpy as np

In [114]:
fruits

apples     10
oranges     6
bananas     3
dtype: int64

In [115]:
type(fruits)

pandas.core.series.Series

In [116]:
np.exp(fruits)

apples     22026.465795
oranges      403.428793
bananas       20.085537
dtype: float64

In [117]:
np.sqrt(fruits)

apples     3.162278
oranges    2.449490
bananas    1.732051
dtype: float64

In [118]:
# 제곱하는데(power), 3제곱하겠다
np.power(fruits, 3)

apples     1000
oranges     216
bananas      27
dtype: int64

In [119]:
fruits['bananas'] + 10

13

In [120]:
fruits.iloc[2] + 10

13

In [121]:
fruits[['apples', 'oranges']]

apples     10
oranges     6
dtype: int64

In [122]:
fruits[['apples', 'oranges']] * 100

apples     1000
oranges     600
dtype: int64

# 실습
import pandas as pd

## 1. 다음과 같은 레이블과 값을 가지는 Pandas Series 를 만드세요. 변수는 dist_planets 로 만드세요.

### distance_from_sun = [149.6, 1433.5, 227.9, 108.2, 778.6]

### planets = ['Earth','Saturn', 'Mars','Venus', 'Jupiter']

### dist_planets = 



## 3. 거리를 빛의 상수 c( 18 ) 로 나눠서, 가는 시간이 얼마나 걸리는 지 계산하여 저장하세요.
### time_light = 

## 3. Boolean indexing을 이용해서 가는 시간이 40분보다 작은것들만 셀렉트 하세요.
### close_planets = 

In [132]:
distance_from_sun = [149.6, 1433.5, 227.9, 108.2, 778.6]
planets =['Earth','Saturn', 'Mars','Venus', 'Jupiter']


In [133]:
dist_planets= pd.Series(data = distance_from_sun, 
                        index = planets)

dist_planets                       

Earth       149.6
Saturn     1433.5
Mars        227.9
Venus       108.2
Jupiter     778.6
dtype: float64

In [134]:
time_light = dist_planets / 18 

In [135]:
time_light

Earth       8.311111
Saturn     79.638889
Mars       12.661111
Venus       6.011111
Jupiter    43.255556
dtype: float64

## Pandas Dataframe

### 레이블로 생성하기

In [140]:
# 딕셔너리로 생성함
items = {'Bob': pd.Series(data = [245, 25, 55], index = ['bike', 'pants', 'watch']),
        'Alice': pd.Series(data = [40, 110, 500, 45], index = ['book', 'glasses', 'bike', 'pants']) }

In [141]:
type(items)

dict

In [142]:
items

{'Bob': bike     245
 pants     25
 watch     55
 dtype: int64, 'Alice': book        40
 glasses    110
 bike       500
 pants       45
 dtype: int64}

In [145]:
# 키와 벨류로 구성된 item 딕셔너리를 데이터 프레임으로 만들자
shopping_carts = pd.DataFrame(items)

In [146]:
shopping_carts

Unnamed: 0,Bob,Alice
bike,245.0,500.0
book,,40.0
glasses,,110.0
pants,25.0,45.0
watch,55.0,


#NaN 은 해당 항목에 값이 없음을 뜻합니다.  (Not a Number)

### 인덱스로 생성하기

In [344]:
# 인덱스 레이블링을 하지 않았으면 숫자로 인덱스가 생성됨
data = {'Bob' : pd.Series([245, 25, 55]),
       'Alice' : pd.Series([40, 110, 500, 45])}

In [345]:
type(data)

dict

In [346]:
data

{'Bob': 0    245
 1     25
 2     55
 dtype: int64, 'Alice': 0     40
 1    110
 2    500
 3     45
 dtype: int64}

In [153]:
df = pd.DataFrame(data)

In [154]:
df

Unnamed: 0,Bob,Alice
0,245.0,40
1,25.0,110
2,55.0,500
3,,45


In [156]:
shopping_carts.shape

(5, 2)

In [159]:
shopping_carts.size
# 전체 데이터 갯수

10

In [160]:
shopping_carts.values

array([[245., 500.],
       [ nan,  40.],
       [ nan, 110.],
       [ 25.,  45.],
       [ 55.,  nan]])

In [161]:
type(shopping_carts.values)

numpy.ndarray

In [163]:
shopping_carts.values.max
# NaN이 있어서 안되는 것임

<function ndarray.max>

In [164]:
shopping_carts.index

Index(['bike', 'book', 'glasses', 'pants', 'watch'], dtype='object')

In [165]:
shopping_carts.columns

Index(['Bob', 'Alice'], dtype='object')

In [170]:
#Bob의 데이터만 데이터프레임으로 만들자
bob_shopping_cart = pd.DataFrame(items, columns = ['Bob'])

In [171]:
bob_shopping_cart

Unnamed: 0,Bob
bike,245
pants,25
watch,55


## Accessing Elements in Pandas DataFrames

In [196]:
# 딕셔너리가 2개 있는 리스트 만들기
items2 = [
    {'bike':20, 'pants':30, 'watches':35},
    {'watches':10, 'glasses':50, 'bikes':15, 'pants':5}
]

In [197]:
store_item = pd.DataFrame(items2, index = {'store 1', 'store 2'})

In [198]:
# 주로 컬럼별로 분석을 하기 때문에 사람별로 분석하겠다는 의미
shopping_carts


Unnamed: 0,Bob,Alice
bike,245.0,500.0
book,,40.0
glasses,,110.0
pants,25.0,45.0
watch,55.0,


In [179]:
# item별로 분석하겠다는 의미
store_item

Unnamed: 0,bike,bikes,glasses,pants,watches
store 2,20.0,,,30,35
store 1,,15.0,50.0,5,10


In [180]:
store_item[['glasses', 'pants']]

Unnamed: 0,glasses,pants
store 2,,30
store 1,50.0,5


In [181]:
#store 1을 가지고 오고 싶다면
store_item.loc['store 1']

bike        NaN
bikes      15.0
glasses    50.0
pants       5.0
watches    10.0
Name: store 1, dtype: float64

In [182]:
store_item.iloc[1]

bike        NaN
bikes      15.0
glasses    50.0
pants       5.0
watches    10.0
Name: store 1, dtype: float64

## drop 을 이용해서 제거해보자

In [189]:
store_item

Unnamed: 0,bike,bikes,glasses,pants,watches
store 2,20.0,,,30,35
store 1,,15.0,50.0,5,10


In [191]:
store_item = store_item.drop(['glasses'], axis = 1)
# 열(컬럼)을 드롭하겠다

In [192]:
store_item

Unnamed: 0,bike,bikes,pants,watches
store 2,20.0,,30,35
store 1,,15.0,5,10


In [194]:
# 행(로우)을 드롭하겠다
store_item = store_item.drop(['store 1'], axis = 0)

In [195]:
store_item

Unnamed: 0,bike,bikes,pants,watches
store 2,20.0,,30,35


## Dealing with NaN

In [210]:
store_item = pd.DataFrame(items2, index = {'store 1', 'store 2'})

In [211]:
store_item

Unnamed: 0,bike,bikes,glasses,pants,watches
store 2,20.0,,,30,35
store 1,,15.0,50.0,5,10


In [212]:
# isnull 는 '빈값이면'의 의미
x = store_item.isnull().sum()

In [213]:
x

bike       1
bikes      1
glasses    1
pants      0
watches    0
dtype: int64

In [214]:
x.sum()

3

In [215]:
store_item.isnull()

Unnamed: 0,bike,bikes,glasses,pants,watches
store 2,False,True,True,False,False
store 1,True,False,False,False,False


In [216]:
store_item.isnull().sum().sum()

3

In [219]:
store_item.count()
#NaN이 아닌 데이터 개수

bike       1
bikes      1
glasses    1
pants      2
watches    2
dtype: int64

In [220]:
store_item.count().sum()

7

In [226]:
# drapna : NaN이 있으면 삭제하라는 의미, 행고정
store_item.dropna(axis=0)

Unnamed: 0,bike,bikes,glasses,pants,watches


In [227]:
# NaN값이 있는 열을 제거
store_item.dropna(axis=1)

Unnamed: 0,pants,watches
store 2,30,35
store 1,5,10


In [228]:
# fillna : NaN이면 모두 0으로 채우겠다
store_item.fillna(0)

Unnamed: 0,bike,bikes,glasses,pants,watches
store 2,20.0,0.0,0.0,30,35
store 1,0.0,15.0,50.0,5,10


In [230]:
# NaN이면 앞에 있는 데이터와 동일하게 맞추겠다
a= store_item.fillna(method = 'ffill', axis=0)

In [231]:
a

Unnamed: 0,bike,bikes,glasses,pants,watches
store 2,20.0,,,30,35
store 1,20.0,15.0,50.0,5,10


In [232]:
a.fillna(method = 'ffill', axis=1)

Unnamed: 0,bike,bikes,glasses,pants,watches
store 2,20.0,20.0,20.0,30.0,35.0
store 1,20.0,15.0,50.0,5.0,10.0


In [234]:
# 뒤에 있는 데이터와 동일하게 하겠다.
store_item.fillna(method = 'backfill', axis=0)

Unnamed: 0,bike,bikes,glasses,pants,watches
store 2,20.0,15.0,50.0,30,35
store 1,,15.0,50.0,5,10


In [236]:
# 비슷한 수와 맞추겠다.
store_item.interpolate(method = 'linear', axis = 0)

Unnamed: 0,bike,bikes,glasses,pants,watches
store 2,20.0,,,30,35
store 1,20.0,15.0,50.0,5,10


# 실습

In [241]:
import pandas as pd
import numpy as np

# 각 유저별 별점을 주는것이므로, 1 decimal 로 셋팅.
pd.set_option('precision', 1)

# 책 제목과 작가, 그리고 유저별 별점 데이터가 있다.

books = pd.Series(data = ['Great Expectations', 'Of Mice and Men', 'Romeo and Juliet', 'The Time Machine', 'Alice in Wonderland' ])
authors = pd.Series(data = ['Charles Dickens', 'John Steinbeck', 'William Shakespeare', ' H. G. Wells', 'Lewis Carroll' ])

user_1 = pd.Series(data = [3.2, np.nan ,2.5])
user_2 = pd.Series(data = [5., 1.3, 4.0, 3.8])
user_3 = pd.Series(data = [2.0, 2.3, np.nan, 4])
user_4 = pd.Series(data = [4, 3.5, 4, 5, 4.2])

#  np.nan values 는 해당 유저가 해당 책에는 아직 별점 주지 않은것이다.
# labels: 'Author', 'Book Title', 'User 1', 'User 2', 'User 3', 'User 4'. 
# 아래 그림처럼 나오도록 만든다.


# 1. 딕셔너리를 만들고,    2. 데이터프레임으로 만든 후,    3. nan을  평균값으로 채운다.

# 1. 딕셔너리를 만들고,
dat = {'Book Title' : books,
       'Author' : authors,
       'User 1' : user_1,
       'User 2' : user_2,
       'User 3' : user_3,
       'User 4' : user_4
      }


[%E1%84%89%E1%85%B3%E1%84%8F%E1%85%B3%E1%84%85%E1%85%B5%E1%86%AB%E1%84%89%E1%85%A3%E1%86%BA%202019-07-27%2023.54.38.png](attachment:%E1%84%89%E1%85%B3%E1%84%8F%E1%85%B3%E1%84%85%E1%85%B5%E1%86%AB%E1%84%89%E1%85%A3%E1%86%BA%202019-07-27%2023.54.38.png)

In [245]:
# 2. 데이터프레임으로 만든 후 
book_ratings = pd.DataFrame(dat)
book_ratings

Unnamed: 0,Book Title,Author,User 1,User 2,User 3,User 4
0,Great Expectations,Charles Dickens,3.2,5.0,2.0,4.0
1,Of Mice and Men,John Steinbeck,,1.3,2.3,3.5
2,Romeo and Juliet,William Shakespeare,2.5,4.0,,4.0
3,The Time Machine,H. G. Wells,,3.8,4.0,5.0
4,Alice in Wonderland,Lewis Carroll,,,,4.2


In [248]:
#3. nan을  평균값으로 채운다.
book_ratings.fillna(book_ratings.mean())

Unnamed: 0,Book Title,Author,User 1,User 2,User 3,User 4
0,Great Expectations,Charles Dickens,3.2,5.0,2.0,4.0
1,Of Mice and Men,John Steinbeck,2.9,1.3,2.3,3.5
2,Romeo and Juliet,William Shakespeare,2.5,4.0,2.8,4.0
3,The Time Machine,H. G. Wells,2.9,3.8,4.0,5.0
4,Alice in Wonderland,Lewis Carroll,2.9,3.5,2.8,4.2


## Loading Data into a Pandas DataFrame

In [249]:
Google_stock = pd.read_csv('GOOG.csv')

In [250]:
Google_stock.shape

(3313, 7)

In [251]:
Google_stock

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2004-08-19,49.7,51.7,47.7,49.8,49.8,44994500
1,2004-08-20,50.2,54.2,49.9,53.8,53.8,23005800
2,2004-08-23,55.0,56.4,54.2,54.3,54.3,18393200
3,2004-08-24,55.3,55.4,51.5,52.1,52.1,15361800
4,2004-08-25,52.1,53.7,51.6,52.7,52.7,9257400
5,2004-08-26,52.1,53.6,52.0,53.6,53.6,7148200
6,2004-08-27,53.7,54.0,52.5,52.7,52.7,6258300
7,2004-08-30,52.3,52.4,50.7,50.7,50.7,5235700
8,2004-08-31,50.8,51.5,50.7,50.9,50.9,4954800
9,2004-09-01,51.0,51.2,49.5,49.8,49.8,9206800


In [252]:
# head함수 앞에서부터 5줄(default), 원하는 숫자를 괄호안에 넣으면 그만큼 보여줌
Google_stock.head(10)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2004-08-19,49.7,51.7,47.7,49.8,49.8,44994500
1,2004-08-20,50.2,54.2,49.9,53.8,53.8,23005800
2,2004-08-23,55.0,56.4,54.2,54.3,54.3,18393200
3,2004-08-24,55.3,55.4,51.5,52.1,52.1,15361800
4,2004-08-25,52.1,53.7,51.6,52.7,52.7,9257400


In [254]:
Google_stock.head(10)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2004-08-19,49.7,51.7,47.7,49.8,49.8,44994500
1,2004-08-20,50.2,54.2,49.9,53.8,53.8,23005800
2,2004-08-23,55.0,56.4,54.2,54.3,54.3,18393200
3,2004-08-24,55.3,55.4,51.5,52.1,52.1,15361800
4,2004-08-25,52.1,53.7,51.6,52.7,52.7,9257400
5,2004-08-26,52.1,53.6,52.0,53.6,53.6,7148200
6,2004-08-27,53.7,54.0,52.5,52.7,52.7,6258300
7,2004-08-30,52.3,52.4,50.7,50.7,50.7,5235700
8,2004-08-31,50.8,51.5,50.7,50.9,50.9,4954800
9,2004-09-01,51.0,51.2,49.5,49.8,49.8,9206800


In [258]:
# tail함수는 뒤에서 5개
Google_stock.tail()


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
3308,2017-10-09,980.0,985.4,976.1,977.0,977.0,891400
3309,2017-10-10,980.0,981.6,966.1,972.6,972.6,968400
3310,2017-10-11,973.7,990.7,972.2,989.2,989.2,1693300
3311,2017-10-12,987.5,994.1,985.0,987.8,987.8,1262400
3312,2017-10-13,992.0,997.2,989.0,989.7,989.7,1157700


In [260]:
Google_stock.isnull()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False
5,False,False,False,False,False,False,False
6,False,False,False,False,False,False,False
7,False,False,False,False,False,False,False
8,False,False,False,False,False,False,False
9,False,False,False,False,False,False,False


In [261]:
Google_stock.isnull().sum()

Date         0
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [263]:
# describe 개수, 평균, 표준편차, 최소~최대값까지 모여줌
Google_stock.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,3313.0,3313.0,3313.0,3313.0,3313.0,3300.0
mean,380.2,383.5,376.5,380.1,380.1,8000000.0
std,223.8,225.0,222.5,223.9,223.9,8400000.0
min,49.3,50.5,47.7,49.7,49.7,7900.0
25%,226.6,228.4,224.0,226.4,226.4,2600000.0
50%,293.3,295.4,289.9,293.0,293.0,5300000.0
75%,536.7,540.0,532.4,536.7,536.7,11000000.0
max,992.0,997.2,989.0,989.7,989.7,83000000.0


In [264]:
Google_stock.max()

Date         2017-10-13
Open              1e+03
High              1e+03
Low               1e+03
Close             1e+03
Adj Close         1e+03
Volume         82768100
dtype: object

In [265]:
#

Google_stock.corr()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
Open,1.0,1.0,1.0,1.0,1.0,-0.6
High,1.0,1.0,1.0,1.0,1.0,-0.6
Low,1.0,1.0,1.0,1.0,1.0,-0.6
Close,1.0,1.0,1.0,1.0,1.0,-0.6
Adj Close,1.0,1.0,1.0,1.0,1.0,-0.6
Volume,-0.6,-0.6,-0.6,-0.6,-0.6,1.0


# 또다른 csv파일을 불러와서 보자

In [266]:
data = pd.read_csv('fake_company.csv')

In [271]:
data.head(10)

Unnamed: 0,Year,Name,Department,Age,Salary
0,1990,Alice,HR,25,50000
1,1990,Bob,RD,30,48000
2,1990,Charlie,Admin,45,55000
3,1991,Alice,HR,26,52000
4,1991,Bob,RD,31,50000
5,1991,Charlie,Admin,46,60000
6,1992,Alice,HR,27,60000
7,1992,Bob,RD,32,52000
8,1992,Charlie,Admin,47,62000


In [272]:
# 연도(year)별로 급여(salary) 합산을 해보자
data.groupby(['Year'])['Salary'].sum()

Year
1990    153000
1991    162000
1992    174000
Name: Salary, dtype: int64

In [273]:
# 평균연봉은 얼마?
data.groupby(['Year'])['Salary'].mean()

Year
1990    51000
1991    54000
1992    58000
Name: Salary, dtype: int64

In [275]:
# 개인별(Name) 급여 합산
data.groupby(['Name'])['Salary'].sum()

Name
Alice      162000
Bob        150000
Charlie    177000
Name: Salary, dtype: int64

In [277]:
# 연도별(Year) 부서별(Department) 급여(Salary) 합산(sum)
data.groupby(['Year','Department'])['Salary'].sum()

Year  Department
1990  Admin         55000
      HR            50000
      RD            48000
1991  Admin         60000
      HR            52000
      RD            50000
1992  Admin         62000
      HR            60000
      RD            52000
Name: Salary, dtype: int64

# GETTING HTML DATA(HTML 데이터 불러오기)

## https://www.livingin-canada.com/house-prices-canada.html

In [278]:
df = pd.read_html('https://www.livingin-canada.com/house-prices-canada.html')

In [279]:
df[0]

Unnamed: 0,0,1,2
0,City,Average House Price,12 Month Change
1,"Vancouver, BC","$1,092,000",+ 14.3 %
2,"Toronto, Ont","$766,000",– 5.1 %
3,"Calgary, Alb","$431,000",+ 0.1 %
4,"Ottawa, Ont","$382,000",+ 8.3 %
5,"Montreal, Que","$341,000",+ 6.3 %
6,"Halifax, NS","$316,000",+ 2.4 %
7,"Regina, Sask","$276,000",– 6.5 %
8,"Fredericton, NB","$173,000",+ 1.2 %
9,(adsbygoogle = window.adsbygoogle || []).push(...,,


# PANDAS OPERATIONS

In [305]:
df = pd.DataFrame(
    
    {
        'Employee Id' : [111,222,333,444],
        'Employee Name' : ['Channel', 'Stive', 'Mitch', 'Bird'],
        'Salary[%/h]' : [35, 29, 38, 20],
        'Years of Experience' : [3, 4, 9, 1]
    }

)

In [306]:
df

Unnamed: 0,Employee Id,Employee Name,Salary[%/h],Years of Experience
0,111,Channel,35,3
1,222,Stive,29,4
2,333,Mitch,38,9
3,444,Bird,20,1


In [307]:
# 경력이 3년 이상인 이상인 로우들의 데이터프레임으로 새로운 데이터프레임 만들자
df_new = df[df['Years of Experience']>=3 ]

In [308]:
df_new

Unnamed: 0,Employee Id,Employee Name,Salary[%/h],Years of Experience
0,111,Channel,35,3
1,222,Stive,29,4
2,333,Mitch,38,9


In [309]:
del df['Employee Id']

In [310]:
df

Unnamed: 0,Employee Name,Salary[%/h],Years of Experience
0,Channel,35,3
1,Stive,29,4
2,Mitch,38,9
3,Bird,20,1


In [321]:
df.loc[0,'Salary[%/h]']= 35

In [322]:
df

Unnamed: 0,Employee Name,Salary[%/h],Years of Experience
0,Channel,35,3
1,Stive,29,4
2,Mitch,38,9
3,Bird,20,1


In [323]:
df.iloc[0,1]= 45

In [324]:
df

Unnamed: 0,Employee Name,Salary[%/h],Years of Experience
0,Channel,45,3
1,Stive,29,4
2,Mitch,38,9
3,Bird,20,1


In [325]:
df.iloc[0,2]= 3

In [326]:
df

Unnamed: 0,Employee Name,Salary[%/h],Years of Experience
0,Channel,45,3
1,Stive,29,4
2,Mitch,38,9
3,Bird,20,1


# APPLYING FUNCTIONS

In [327]:
# salary_raise 함수를 정의(임금인상)
def salary_raise(salary):
    return salary + 5

In [328]:
df

Unnamed: 0,Employee Name,Salary[%/h],Years of Experience
0,Channel,45,3
1,Stive,29,4
2,Mitch,38,9
3,Bird,20,1


In [330]:
# salary_raise 함수를 사용하여 급여에 5씩 더해줌
df['Salary[%/h]'].apply(salary_raise)

0    50
1    34
2    43
3    25
Name: Salary[%/h], dtype: int64

In [332]:
df['Employee Name'].apply(len)

0    7
1    5
2    5
3    4
Name: Employee Name, dtype: int64

In [334]:
# 경력(Years of Experience) 을 모두 합산
df['Years of Experience'].sum()

17

# SORTING AND ORDERING

In [337]:
df = pd.DataFrame(
    
    {
        'Employee Id' : [111,222,333,444],
        'Employee Name' : ['Channel', 'Stive', 'Mitch', 'Bird'],
        'Salary[%/h]' : [35, 29, 38, 20],
        'Years of Experience' : [3, 4, 9, 1]
    }

)

In [338]:
df

Unnamed: 0,Employee Id,Employee Name,Salary[%/h],Years of Experience
0,111,Channel,35,3
1,222,Stive,29,4
2,333,Mitch,38,9
3,444,Bird,20,1


In [339]:
df.sort_values(by='Years of Experience')

Unnamed: 0,Employee Id,Employee Name,Salary[%/h],Years of Experience
3,444,Bird,20,1
0,111,Channel,35,3
1,222,Stive,29,4
2,333,Mitch,38,9


In [340]:
# 위에서 정렬해도 원본은 그대로임
df

Unnamed: 0,Employee Id,Employee Name,Salary[%/h],Years of Experience
0,111,Channel,35,3
1,222,Stive,29,4
2,333,Mitch,38,9
3,444,Bird,20,1


In [342]:
# 정렬하여 원본을 변경하려면?
df.sort_values(by='Years of Experience', inplace = True)

In [343]:
df

Unnamed: 0,Employee Id,Employee Name,Salary[%/h],Years of Experience
3,444,Bird,20,1
0,111,Channel,35,3
1,222,Stive,29,4
2,333,Mitch,38,9


# CONCATENATING AND MERGING

![image.png](attachment:image.png)
Reference: https://pandas.pydata.org/pandas-docs/stable/merging.html

# EXCELLENT JOB!