In [1]:
import pandas as pd
import numpy as np


In [4]:
data = [1,2,3]
ser = pd.Series(data, index=['a','b','c'])
ser

a    1
b    2
c    3
dtype: int64

In [7]:
ser.loc['a':'c']

a    1
b    2
c    3
dtype: int64

In [17]:
ser[[True, False, True]] # True에 해당하는 값만 출력

a    1
c    3
dtype: int64

In [13]:
ser != 2 # 조건식 -> boolean 값으로 출력

a     True
b    False
c     True
dtype: bool

In [16]:
ser[ser != 2] # 조건식을 넣어서 True에 해당하는 값만 출력

a    1
c    3
dtype: int64

## DataFrame

In [20]:
data = [[1,10,100], [2,20,200], [3,30,300]]
df = pd.DataFrame(data, index=['r1','r2','r3'], columns=['c1','c2','c3'])
df

Unnamed: 0,c1,c2,c3
r1,1,10,100
r2,2,20,200
r3,3,30,300


In [24]:
df.loc[['r1','r3'], ['c2','c3']] # r1, r3 행의 c2, c3 열의 값 출력

Unnamed: 0,c2,c3
r1,10,100
r3,30,300


## CSV 파일 불러오기

In [28]:
df = pd.read_csv('../data/anime.csv')
df.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [29]:
# 인덱스로 할 열을 번호로 지정
df = pd.read_csv('../data/anime.csv', index_col=0)
df.head()

Unnamed: 0_level_0,name,genre,type,episodes,rating,members
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [30]:
# 인덱스로 할 열을 이름으로 지정
df = pd.read_csv('../data/anime.csv', index_col='anime_id')
df.head()

Unnamed: 0_level_0,name,genre,type,episodes,rating,members
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [31]:
# 지정한 열을 지정한 형으로 불러오기
df = pd.read_csv('../data/anime.csv', dtype={'members': float})
df.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630.0
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665.0
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262.0
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572.0
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266.0


In [40]:
# datetime 형으로 불러오기
pd.read_csv('../data/anime_stock_price.csv', parse_dates=['Date']).info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 522 entries, 0 to 521
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   Date            522 non-null    datetime64[ns]
 1   TOEI ANIMATION  522 non-null    float64       
 2   IG Port         522 non-null    float64       
dtypes: datetime64[ns](1), float64(2)
memory usage: 12.4 KB


## Excel 파일 불러오기

In [49]:
import openpyxl

df = pd.read_excel('../data/anime.xlsx')
df.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,15335,Gintama Movie: Kanketsu-hen - Yorozuya yo Eien...,"Action, Comedy, Historical, Parody, Samurai, S...",Movie,1,9.1,72534
2,28851,Koe no Katachi,"Drama, School, Shounen",Movie,1,9.05,102733
3,199,Sen to Chihiro no Kamikakushi,"Adventure, Drama, Supernatural",Movie,1,8.93,466254
4,12355,Ookami Kodomo no Ame to Yuki,"Fantasy, Slice of Life",Movie,1,8.84,226193


## SQL 파일 불러오기

In [43]:
!pip install pandasql

Collecting pandasql
  Using cached pandasql-0.7.3-py3-none-any.whl
[33mDEPRECATION: pyodbc 4.0.0-unsupported has a non-standard version number. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pyodbc or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063[0m[33m
[0mInstalling collected packages: pandasql
Successfully installed pandasql-0.7.3

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m23.3.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [45]:
from pandasql import sqldf
dfsql = lambda q:sqldf(q, globals()) # globals() : 전역변수를 딕셔너리 형태로 출력
result = dfsql('select name, rating from df where rating > 9 order by rating desc')
result.head()

Unnamed: 0,name,rating
0,Taka no Tsume 8: Yoshida-kun no X-Files,10.0
1,Mogura no Motoro,9.5
2,Kimi no Na wa.,9.37
3,Kahei no Umi,9.33
4,Gintama Movie: Kanketsu-hen - Yorozuya yo Eien...,9.1


In [47]:
# 결과를 json 형태로 출력
result.to_json('../data/result.json', orient='table')

## HTML

In [50]:
url = 'https://docs.python.org/3/py-modindex.html'
table = pd.read_html(url)

# 첫 번째 DataFrame에서 빈 열과 결손값을 제외
table[0].loc[:,1:].dropna().head(10)

Unnamed: 0,1,2
2,__future__,Future statement definitions
3,__main__,The environment where top-level code is run. C...
4,_thread,Low-level threading API.
5,_tkinter,A binary module that contains the low-level in...
8,abc,Abstract base classes according to :pep:`3119`.
9,aifc,Deprecated: Read and write audio files in AIF...
10,argparse,Command-line option and argument parsing library.
11,array,Space efficient arrays of uniformly typed nume...
12,ast,Abstract Syntax Tree classes and manipulation.
13,asyncio,Asynchronous I/O.


## 그 밖의 메소드들..

In [52]:
df.where(df['rating'] < 9.2).head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,,,,,,,
1,15335.0,Gintama Movie: Kanketsu-hen - Yorozuya yo Eien...,"Action, Comedy, Historical, Parody, Samurai, S...",Movie,1.0,9.1,72534.0
2,28851.0,Koe no Katachi,"Drama, School, Shounen",Movie,1.0,9.05,102733.0
3,199.0,Sen to Chihiro no Kamikakushi,"Adventure, Drama, Supernatural",Movie,1.0,8.93,466254.0
4,12355.0,Ookami Kodomo no Ame to Yuki,"Fantasy, Slice of Life",Movie,1.0,8.84,226193.0


In [53]:
# rating 순으로 정렬
df.sort_values('rating', ascending=False).head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
2143,33662,Taka no Tsume 8: Yoshida-kun no X-Files,"Comedy, Parody",Movie,1,10.0,13
1894,23005,Mogura no Motoro,Slice of Life,Movie,1,9.5,62
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1753,33607,Kahei no Umi,Historical,Movie,1,9.33,44
1,15335,Gintama Movie: Kanketsu-hen - Yorozuya yo Eien...,"Action, Comedy, Historical, Parody, Samurai, S...",Movie,1,9.1,72534
