## csv 파일 입력

In [1]:
%%writefile sample1.csv
c1, c2, c3
1, 1.11, one
2, 2.22, two
3, 3.33, three

Overwriting sample1.csv


In [45]:
import pandas as pd
import numpy as np

In [3]:
pd.read_csv('sample1.csv')

Unnamed: 0,c1,c2,c3
0,1,1.11,one
1,2,2.22,two
2,3,3.33,three


### 열 인덱스가 없는 경우 names 인수로 추가

In [4]:
%%writefile sample2.csv
1, 1.11, one
2, 2.22, two
3, 3.33, three

Overwriting sample2.csv


In [5]:
pd.read_csv('sample2.csv', names = ['c1','c2','c3'])

Unnamed: 0,c1,c2,c3
0,1,1.11,one
1,2,2.22,two
2,3,3.33,three


In [6]:
pd.read_csv('sample2.csv', names = ['c1','c2','c3'], index_col='c1')

Unnamed: 0_level_0,c2,c3
c1,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1.11,one
2,2.22,two
3,3.33,three


In [7]:
%%writefile sample3.txt
1 1.11 one
2 2.22 two
3 3.33 three

Overwriting sample3.txt


In [8]:
pd.read_table('sample3.txt', sep=' ',names = ['c1','c2','c3'])

Unnamed: 0,c1,c2,c3
0,1,1.11,one
1,2,2.22,two
2,3,3.33,three


In [9]:
pd.read_table('sample3.txt', sep=' ',names = ['c1','c2','c3'], skiprows=[0,1])

Unnamed: 0,c1,c2,c3
0,3,3.33,three


In [10]:
%%writefile sample4.csv
c1, c2, c3
1, 1.11, one
2, , two
누락, 3.33, three

Overwriting sample4.csv


In [11]:
pd.read_csv('sample4.csv', na_values=['누락', ' '])

Unnamed: 0,c1,c2,c3
0,1.0,1.11,one
1,2.0,,two
2,,3.33,three


## csv 파일 출력

In [12]:
df = pd.read_csv('sample1.csv')
df

Unnamed: 0,c1,c2,c3
0,1,1.11,one
1,2,2.22,two
2,3,3.33,three


In [13]:
df.to_csv('sample6.csv')

In [14]:
!cat sample6.csv

,c1, c2, c3
0,1,1.11, one
1,2,2.22, two
2,3,3.33, three


In [15]:
df.to_csv('sample7.txt', sep=' ')

In [16]:
!cat sample7.txt

 c1 " c2" " c3"
0 1 1.11 " one"
1 2 2.22 " two"
2 3 3.33 " three"


In [17]:
df2 = pd.read_csv('sample4.csv', na_values=['누락', ' '])
df2

Unnamed: 0,c1,c2,c3
0,1.0,1.11,one
1,2.0,,two
2,,3.33,three


In [18]:
df2.to_csv('sample8.csv', na_rep='none')

In [19]:
!cat sample8.csv

,c1, c2, c3
0,1.0,1.11, one
1,2.0,none, two
2,none,3.33, three


## 인터넷 상의 자료 입력

In [20]:
df = pd.read_csv("https://raw.githubusercontent.com/datascienceschool/docker_rpython/master/data/titanic.csv")
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [21]:
pd.set_option("display.max_rows", 10)
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [22]:
df.head(10)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C


In [23]:
df.tail(3)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0,C148,C
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.75,,Q


## 인터넷 상의 DB 불러오기

In [48]:
conda install pandas-datareader

Collecting package metadata (current_repodata.json): done
Solving environment: | 
The environment is inconsistent, please check the package plan carefully
The following packages are causing the inconsistency:

  - defaults/osx-64::holoviews==1.15.0=py39hecd8cb5_0
  - defaults/noarch::nbclassic==0.3.5=pyhd3eb1b0_0
  - defaults/osx-64::jupyterlab==3.4.4=py39hecd8cb5_0
  - defaults/osx-64::anaconda==2022.10=py39_0
  - defaults/osx-64::scrapy==2.6.2=py39hecd8cb5_0
  - defaults/osx-64::hvplot==0.8.0=py39hecd8cb5_0
  - defaults/osx-64::conda-build==3.22.0=py39hecd8cb5_0
  - defaults/osx-64::bcrypt==3.2.0=py39hca72f7f_1
  - defaults/osx-64::_ipyw_jlab_nb_ext_conf==0.1.0=py39hecd8cb5_1
  - defaults/osx-64::jupyter_server==1.18.1=py39hecd8cb5_0
  - defaults/noarch::jupyterlab_server==2.10.3=pyhd3eb1b0_1
  - defaults/osx-64::nbconvert==6.4.4=py39hecd8cb5_0
  - defaults/osx-64::jupyter==1.0.0=py39hecd8cb5_8
  - defaults/noarch::ipywidgets==7.6.5=pyhd3eb1b0_1
  - defaults/osx-64::twisted==22.2.0=p

_anaconda_depends-20 | 69 KB     | ##################################### | 100% [A[A[A[A[A[A[A[A









anaconda-custom      | 4 KB      | ##################################### | 100% [A[A[A[A[A[A[A[A[A[A






boltons-23.0.0       | 423 KB    | ##################################### | 100% [A[A[A[A[A[A[A






boltons-23.0.0       | 423 KB    | ##################################### | 100% [A[A[A[A[A[A[A










conda-23.3.1         | 962 KB    | ##################################### | 100% [A[A[A[A[A[A[A[A[A[A[A










conda-23.3.1         | 962 KB    | ##################################### | 100% [A[A[A[A[A[A[A[A[A[A[A








openssl-1.1.1t       | 3.3 MB    | ##################################### | 100% [A[A[A[A[A[A[A[A[A








openssl-1.1.1t       | 3.3 MB    | ##################################### | 100% [A[A[A[A[A[A[A[A[A





pip-23.0.1           | 2.5 MB    | ##################################### |

In [2]:
import pandas_datareader as pdr

In [6]:
dt_start = '2000, 1, 1'
dt_end = '2022, 12, 31'

gdp = pdr.get_data_fred('GDP', dt_start, dt_end)
gdp.head()

Unnamed: 0_level_0,GDP
DATE,Unnamed: 1_level_1
2000-01-01,10002.179
2000-04-01,10247.72
2000-07-01,10318.165
2000-10-01,10435.744
2001-01-01,10470.231


In [7]:
gdp.tail()

Unnamed: 0_level_0,GDP
DATE,Unnamed: 1_level_1
2021-10-01,24349.121
2022-01-01,24740.48
2022-04-01,25248.476
2022-07-01,25723.941
2022-10-01,26137.992


### FRED 데이터 여러 개 불러오기

In [8]:
econ = pdr.get_data_fred(['GDP', 'CPIAUCSL', 'CPILFESL'], dt_start, dt_end)
econ

Unnamed: 0_level_0,GDP,CPIAUCSL,CPILFESL
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-01-01,10002.179,169.300,179.300
2000-02-01,,170.000,179.400
2000-03-01,,171.000,180.000
2000-04-01,10247.720,170.900,180.300
2000-05-01,,171.200,180.700
...,...,...,...
2022-08-01,,295.320,296.639
2022-09-01,,296.539,298.339
2022-10-01,26137.992,297.987,299.333
2022-11-01,,298.598,300.261


## loc, iloc 인덱서 
### loc : 라벨을 이용한 2차원 인덱싱
행 인덱싱, 불리언 인덱스는 가능 / 열 인덱싱 불가

In [9]:
econ.loc['2008-01-01']

GDP         14706.538
CPIAUCSL      212.174
CPILFESL      213.771
Name: 2008-01-01 00:00:00, dtype: float64

In [10]:
econ.loc['2008-01-01':'2009-01-01']

Unnamed: 0_level_0,GDP,CPIAUCSL,CPILFESL
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2008-01-01,14706.538,212.174,213.771
2008-02-01,,212.687,213.939
2008-03-01,,213.448,214.42
2008-04-01,14865.701,213.942,214.56
2008-05-01,,215.208,214.936
2008-06-01,,217.463,215.424
2008-07-01,14898.999,219.016,215.965
2008-08-01,,218.69,216.393
2008-09-01,,218.877,216.713
2008-10-01,14608.208,216.995,216.788


In [11]:
econ.loc[econ.GDP > 14500]

Unnamed: 0_level_0,GDP,CPIAUCSL,CPILFESL
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2007-07-01,14564.117,207.603,210.773
2007-10-01,14715.058,209.19,212.077
2008-01-01,14706.538,212.174,213.771
2008-04-01,14865.701,213.942,214.56
2008-07-01,14898.999,219.016,215.965
2008-10-01,14608.208,216.995,216.788
2009-10-01,14651.248,216.509,220.501
2010-01-01,14764.611,217.488,220.633
2010-04-01,14980.193,217.403,220.822
2010-07-01,15141.605,217.605,221.363


In [12]:
econ.loc[econ.GDP > 14500, 'CPIAUCSL':'CPILFESL']

Unnamed: 0_level_0,CPIAUCSL,CPILFESL
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1
2007-07-01,207.603,210.773
2007-10-01,209.19,212.077
2008-01-01,212.174,213.771
2008-04-01,213.942,214.56
2008-07-01,219.016,215.965
2008-10-01,216.995,216.788
2009-10-01,216.509,220.501
2010-01-01,217.488,220.633
2010-04-01,217.403,220.822
2010-07-01,217.605,221.363


In [13]:
econ.loc['2008-01-01':'2009-01-01', ['CPIAUCSL', 'GDP']]

Unnamed: 0_level_0,CPIAUCSL,GDP
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1
2008-01-01,212.174,14706.538
2008-02-01,212.687,
2008-03-01,213.448,
2008-04-01,213.942,14865.701
2008-05-01,215.208,
2008-06-01,217.463,
2008-07-01,219.016,14898.999
2008-08-01,218.69,
2008-09-01,218.877,
2008-10-01,216.995,14608.208


### iloc : 정수 인덱스만 사용 가능

In [14]:
econ.iloc[:10,0]

DATE
2000-01-01    10002.179
2000-02-01          NaN
2000-03-01          NaN
2000-04-01    10247.720
2000-05-01          NaN
2000-06-01          NaN
2000-07-01    10318.165
2000-08-01          NaN
2000-09-01          NaN
2000-10-01    10435.744
Freq: MS, Name: GDP, dtype: float64

In [15]:
econ.iloc[100:110,1:]

Unnamed: 0_level_0,CPIAUCSL,CPILFESL
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1
2008-05-01,215.208,214.936
2008-06-01,217.463,215.424
2008-07-01,219.016,215.965
2008-08-01,218.69,216.393
2008-09-01,218.877,216.713
2008-10-01,216.995,216.788
2008-11-01,213.153,216.947
2008-12-01,211.398,216.925
2009-01-01,211.933,217.346
2009-02-01,212.705,217.792
