In [1]:
# Pandas 
%pip install pandas

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd

In [3]:
growth = pd.Series([143, 150, 157, 160], index=['2018', '2019', '2020', '2021'])
growth # Series -> list와 비슷하지만 차이점은 직접 인덱스 지정 가능

2018    143
2019    150
2020    157
2021    160
dtype: int64

In [4]:
print(growth.values) # values값만 가져올 수 있다.
print(growth.values[0]) # values 값에서 한 가지만 지정해서 불러올 수 있다.

[143 150 157 160]
143


In [5]:
print(growth.index) # 인덱스만 불러오기
print(growth.index[0]) # 한 가지만 불러오기

Index(['2018', '2019', '2020', '2021'], dtype='object')
2018


In [6]:
growth["2019"]

150

In [7]:
#DataFrame 방법1
import pandas as pd

index = ["2018", "2019", "2020", "2021"]
data = {
    "영희" : [143, 150, 157, 160],
    "철수" : [165, 172, 175, 180]
}
growth = pd.DataFrame(data, columns=["영희", "철수"], index = index)
growth

Unnamed: 0,영희,철수
2018,143,165
2019,150,172
2020,157,175
2021,160,180


In [8]:
#DataFrame 방법2
index = ["2018", "2019", "2020", "2021"]

Y = pd.Series([143, 150, 157, 160], index = index)
C = pd.Series([165, 172, 175, 180], index = index)

growth = pd.DataFrame({
    "영희":Y,
    "철수":C
})
growth

Unnamed: 0,영희,철수
2018,143,165
2019,150,172
2020,157,175
2021,160,180


In [9]:
print(growth.dtypes)

영희    int64
철수    int64
dtype: object


In [10]:
a = growth.astype('float')

In [11]:
print(growth)

       영희   철수
2018  143  165
2019  150  172
2020  157  175
2021  160  180


In [12]:
growth.astype({'영희':'float'}) #특정 데이터 타입만 바꾸기

Unnamed: 0,영희,철수
2018,143.0,165
2019,150.0,172
2020,157.0,175
2021,160.0,180


In [13]:
# 엑셀에서 읽어오기기
pd.read_csv("1.csv")

Unnamed: 0,A,B,C
0,30,21,9


In [14]:
pd.read_csv("2.csv")

Unnamed: 0,A,B,C
0,30,21,9
1,11,2,3


In [15]:
pd.read_csv("3.csv", index_col=0) #0번 열을 인덱스로 쓰게 하겠다.

Unnamed: 0,A,B,C
0,30,21,9
1,11,2,3


In [16]:
print(growth)

       영희   철수
2018  143  165
2019  150  172
2020  157  175
2021  160  180


In [17]:
growth.to_csv("output.csv") #목록에 파일 만들기

In [18]:
growth.to_csv("output.csv", index = True) #인덱스 생성

In [19]:
growth.to_csv("output.csv", index = False) #인덱스 지우기

In [20]:
# 해당 컬럼만 불러오기
index = ["2018", "2019", "2020", "2021"]
data = {
    "영희" : [143, 150, 157, 160],
    "철수" : [165, 172, 175, 180]
}
growth = pd.DataFrame(data, columns=["영희", "철수"], index = index)
growth

Unnamed: 0,영희,철수
2018,143,165
2019,150,172
2020,157,175
2021,160,180


In [21]:
#방법1
growth['영희']

2018    143
2019    150
2020    157
2021    160
Name: 영희, dtype: int64

In [22]:
#방법2
growth.영희

2018    143
2019    150
2020    157
2021    160
Name: 영희, dtype: int64

In [23]:
# 특정 칸만 불러올 수 도있음
#방법1
growth['영희'][1:]

2019    150
2020    157
2021    160
Name: 영희, dtype: int64

In [24]:
#방법2
growth.영희[1:]

2019    150
2020    157
2021    160
Name: 영희, dtype: int64

In [25]:
#.iloc[행:열]
growth.iloc[0]

영희    143
철수    165
Name: 2018, dtype: int64

In [26]:
growth.iloc[1]

영희    150
철수    172
Name: 2019, dtype: int64

In [27]:
growth.iloc[:2, 0] #1개 행의 0번째 열

2018    143
2019    150
Name: 영희, dtype: int64

In [28]:
growth.iloc[[0,1,2], 0] #growth.iloc[:3, 0]

2018    143
2019    150
2020    157
Name: 영희, dtype: int64

In [29]:
# .loc[]
# 좀 더 직관적으로 불러오고 싶을 때
# 명확하게 불러오고 싶을 때
growth.loc['2020', '영희']

157

In [30]:
growth.loc['2020', '철수']

175

In [31]:
growth.loc['2021']

영희    160
철수    180
Name: 2021, dtype: int64

In [32]:
growth.loc[ :, '영희']

2018    143
2019    150
2020    157
2021    160
Name: 영희, dtype: int64

In [33]:
growth.loc['2020', '철수']

175

In [34]:
growth.loc[["2018", '2020'], ['영희','철수']]

Unnamed: 0,영희,철수
2018,143,165
2020,157,175


In [35]:
# .iloc와 .loc
growth.iloc[[0,1], [0,1]] # .iloc : 인덱스 기반 <-> .loc

Unnamed: 0,영희,철수
2018,143,165
2019,150,172


In [36]:
# .loc: 문자로 인덱스 표현 가능
growth.loc['2018' : '2020', '영희' : '철수']

Unnamed: 0,영희,철수
2018,143,165
2019,150,172
2020,157,175


In [37]:
growth.iloc[0:3, 0:2]

Unnamed: 0,영희,철수
2018,143,165
2019,150,172
2020,157,175


In [38]:
growth.set_index('영희') #특정 컬럼을 인덱스로 쓰겠다.

Unnamed: 0_level_0,철수
영희,Unnamed: 1_level_1
143,165
150,172
157,175
160,180


In [39]:
#Conditional selection(조사식) #중요!!!
growth.loc[growth.영희>150]

Unnamed: 0,영희,철수
2020,157,175
2021,160,180


In [40]:
growth.loc[growth.영희 == 157]

Unnamed: 0,영희,철수
2020,157,175


In [41]:
growth.영희>150

2018    False
2019    False
2020     True
2021     True
Name: 영희, dtype: bool

In [42]:
# 가로세로 바꾸기
# 해당 컬럼만 불러오기
index = ["영희", "철수", "영철"]
data = {
    "2018" : [143, 165, 170], 
    "2019" : [150, 172, 177],
    "2020" : [157, 175, 180]
}
growth = pd.DataFrame(data, index = index)
growth

Unnamed: 0,2018,2019,2020
영희,143,150,157
철수,165,172,175
영철,170,177,180


In [43]:
growth.loc[growth['2019'] == 150]

Unnamed: 0,2018,2019,2020
영희,143,150,157


In [44]:
'''
isin = 'or'
'''

"\nisin = 'or'\n"

In [45]:
growth['2019'] = 190

In [46]:
print(growth)

    2018  2019  2020
영희   143   190   157
철수   165   190   175
영철   170   190   180


In [47]:
growth['2021'] = 200

In [48]:
print(growth)

    2018  2019  2020  2021
영희   143   190   157   200
철수   165   190   175   200
영철   170   190   180   200


In [49]:
growth["거꾸로"] = range(len(growth), 0, -1)
print(growth)

    2018  2019  2020  2021  거꾸로
영희   143   190   157   200    3
철수   165   190   175   200    2
영철   170   190   180   200    1


In [53]:
#kaqqle 예제 적용
def check(col):
    if col["영희"] > 160 :
        return 2
    elif col["영희"] > 150 :
        return 1
    else :
        return 0

In [52]:
growth.apply(check, axis = 'index') # rows

2018    0
2019    2
2020    1
2021    2
거꾸로     0
dtype: int64

In [None]:
#kaqqle
#4. Grouping and Sortind
"""
agg(x): x의 통계를 내준다

"""