In [3]:
import numpy as np
import pandas as pd

# Object Creation 1

# 값을 가지고 있는 리스트를 통해 시리즈(Series)를 만들고,
# 인덱스를 기본값으로 불러온다

s = pd.Series([1, 3, 5, np.nan, 6, 8])
print(s)

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64


In [9]:
# Object Creation 2

# Datetime 인덱스와 레이블이 있는 열이 있는 numpy 배열을 전달하여 데이터 프레임 만들기
dates = pd.date_range('20220701', periods = 10)
print(dates)
print()

# freq='M' 일 때는 기준이 월
dates2 = pd.date_range('20220701', periods = 10, freq = 'M')
print(dates2)

DatetimeIndex(['2022-07-01', '2022-07-02', '2022-07-03', '2022-07-04',
               '2022-07-05', '2022-07-06', '2022-07-07', '2022-07-08',
               '2022-07-09', '2022-07-10'],
              dtype='datetime64[ns]', freq='D')

DatetimeIndex(['2022-07-31', '2022-08-31', '2022-09-30', '2022-10-31',
               '2022-11-30', '2022-12-31', '2023-01-31', '2023-02-28',
               '2023-03-31', '2023-04-30'],
              dtype='datetime64[ns]', freq='M')


In [12]:
# Object Creation 3

# 시리즈와 같은 것으로 바뀔 수 있는 객체와 dict로 구성된 dataframe

df2 = pd.DataFrame({
  'A': 1.,                                                      
  'B': pd.Timestamp('20220701'),                                # Timestamp => 날짜 형식으로 넣어주라는 뜻
  'C': pd.Series(1, index = list(range(4)), dtype = 'float32'), # 데이터 타입이 float
  'D': np.array([3] * 4, dtype = 'int32'),                      # 데이터 타입이 int
  'E': pd.Categorical(['test', 'train', 'test', 'train']),      # 카테고리컬 데이터
  'F': 'foo'
})                                                              # A ~ F 는 시리즈 데이터

df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2022-07-01,1.0,3,test,foo
1,1.0,2022-07-01,1.0,3,train,foo
2,1.0,2022-07-01,1.0,3,test,foo
3,1.0,2022-07-01,1.0,3,train,foo


In [17]:
# Viewing Data 1

# DataFrame 의 맨 윗줄과 마지막 줄을 확인할 때: head(), tail()
d1 = pd.date_range('20220701', periods = 100)

df3 = pd.DataFrame(np.random.randn(100, 4), index = d1, columns = list('ABCD'))
df3.head()

df3.head(3) # 상위 3개만 보여준다

df3.tail()

Unnamed: 0,A,B,C,D
2022-10-04,0.115334,-1.711092,1.482646,-0.99364
2022-10-05,-0.349347,0.717195,-0.121333,-0.403271
2022-10-06,-1.146007,-0.959869,-0.052893,-0.586304
2022-10-07,-1.183994,-0.229069,-0.506767,0.62191
2022-10-08,0.111844,0.625642,-0.308718,-0.503547


In [19]:
# Viewing Data 2

# 인덱스, 열, 데이터에 대한 세부정보 보기
# .index / .columns / .values

df3.index

DatetimeIndex(['2022-07-01', '2022-07-02', '2022-07-03', '2022-07-04',
               '2022-07-05', '2022-07-06', '2022-07-07', '2022-07-08',
               '2022-07-09', '2022-07-10', '2022-07-11', '2022-07-12',
               '2022-07-13', '2022-07-14', '2022-07-15', '2022-07-16',
               '2022-07-17', '2022-07-18', '2022-07-19', '2022-07-20',
               '2022-07-21', '2022-07-22', '2022-07-23', '2022-07-24',
               '2022-07-25', '2022-07-26', '2022-07-27', '2022-07-28',
               '2022-07-29', '2022-07-30', '2022-07-31', '2022-08-01',
               '2022-08-02', '2022-08-03', '2022-08-04', '2022-08-05',
               '2022-08-06', '2022-08-07', '2022-08-08', '2022-08-09',
               '2022-08-10', '2022-08-11', '2022-08-12', '2022-08-13',
               '2022-08-14', '2022-08-15', '2022-08-16', '2022-08-17',
               '2022-08-18', '2022-08-19', '2022-08-20', '2022-08-21',
               '2022-08-22', '2022-08-23', '2022-08-24', '2022-08-25',
      

In [20]:
df3.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [None]:
df3.values

In [24]:
# Viewing Data 3

# 통계정보 확인은 describe()

df3.describe()

Unnamed: 0,A,B,C,D
count,100.0,100.0,100.0,100.0
mean,0.059556,0.098966,-0.118157,0.155879
std,0.843696,0.987,1.08343,1.032992
min,-2.025596,-2.287198,-2.434884,-1.819318
25%,-0.56744,-0.650467,-0.917054,-0.575858
50%,-0.028512,0.215083,-0.238548,0.008526
75%,0.709314,0.771742,0.711225,0.782465
max,2.133335,2.14913,2.090496,2.846706


In [25]:
# Viewing Data 4

# 데이터프레임을 전치(transpose) 하는것: .T

df3.T

Unnamed: 0,2022-07-01,2022-07-02,2022-07-03,2022-07-04,2022-07-05,2022-07-06,2022-07-07,2022-07-08,2022-07-09,2022-07-10,...,2022-09-29,2022-09-30,2022-10-01,2022-10-02,2022-10-03,2022-10-04,2022-10-05,2022-10-06,2022-10-07,2022-10-08
A,-0.33128,-0.278161,0.775255,1.726618,-0.968276,1.114437,-0.313476,1.392803,-0.779679,0.879821,...,-0.046198,-0.345251,-0.785807,0.000276,0.393707,0.115334,-0.349347,-1.146007,-1.183994,0.111844
B,1.609326,0.434458,-1.134499,1.203824,0.939896,0.76814,0.165847,1.324324,0.382697,1.278855,...,-0.045434,-0.444955,0.110229,1.224687,0.072276,-1.711092,0.717195,-0.959869,-0.229069,0.625642
C,1.812329,-0.121223,-1.00084,0.698908,-0.268768,0.590769,-1.278358,1.00593,-1.437724,1.461722,...,1.980726,1.496646,-0.327636,1.103634,-1.249075,1.482646,-0.121333,-0.052893,-0.506767,-0.308718
D,-0.004407,-1.434104,-0.526421,0.363326,-1.47363,-0.956052,-0.78543,-1.255459,-1.819318,-1.704452,...,0.538937,-0.91156,0.035479,1.125893,0.972852,-0.99364,-0.403271,-0.586304,0.62191,-0.503547
