# Pandas (panel data system)

# Syntax

In [2]:
import pandas as pd

In [3]:
df=pd.DataFrame(
    {"a":[4,5,6],
     "b":[7,8,9],
     "c":[10,11,12]},
     index=[1,2,3])
df

Unnamed: 0,a,b,c
1,4,7,10
2,5,8,11
3,6,9,12


In [9]:
# Subtracting "a" column
df["a"]

1    4
2    5
3    6
Name: a, dtype: int64

In [4]:
# Subtracting "a" and "b" columns (list형태로 묶는다)
df[["a","b"]]

Unnamed: 0,a,b
1,4,7
2,5,8
3,6,9


In [10]:
# Subtracting "1" row
df.loc[1]

a     4
b     7
c    10
Name: 1, dtype: int64

In [11]:
# Subtracting"1" row & "a" column
df.loc[3,"a"]

6

In [6]:
# # Subtracting a specific matrix
df.loc[[1,2],["a","b"]] # 여러 특정 행열

Unnamed: 0,a,b
1,4,7
2,5,8


In [7]:
# The first way to create a dataframe
df=pd.DataFrame(
    {"a":[4,5,9],
    "b":[7,8,9],
    "c":[10,11,12]},
    index=[1,2,3])
df

Unnamed: 0,a,b,c
1,4,7,10
2,5,8,11
3,9,9,12


In [8]:
# The second way to create a dataframe
df=pd.DataFrame(
    [[4,7,10],
    [5,8,11],
    [6,9,12]],
    index=[1,2,3],
    columns=['a','b','c'])
df

Unnamed: 0,a,b,c
1,4,7,10
2,5,8,11
3,6,9,12


In [26]:
# Creating a multiindex dataframe
df=pd.DataFrame(
    {'a':[4,5,6],
     'b':[7,8,9],
     'c':[10,11,12]},
     index=pd.MultiIndex.from_tuples(
     [('d',1),('d',2),('e',2)],
     names=['n','v']))
df

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b,c
n,v,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
d,1,4,7,10
d,2,5,8,11
e,2,6,9,12


# Subset Observations (Rows)

In [31]:
df[df.Length > 7] # error: df에 Length라는 열 없기때문

AttributeError: 'DataFrame' object has no attribute 'Length'

In [36]:
df[df.b > 7] # 조건에 맞는 값들 색인

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b,c
n,v,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
d,2,5,8,11
e,2,6,9,12


In [40]:
df.b > 7
df['b'] > 7

n  v
d  1    False
   2     True
e  2     True
Name: b, dtype: bool

In [42]:
df.drop_duplicates() # 중복값 없어 변화 없음

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b,c
n,v,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
d,1,4,7,10
d,2,5,8,11
e,2,6,9,12


In [48]:
df2=pd.DataFrame(
    {'a':[4,5,6,6],
     'b':[7,8,9,9],
     'c':[10,11,12,12]},
     index=pd.MultiIndex.from_tuples(
     [('d',1),('d',2),('e',2),('e',3)],
     names=['n','v']))
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b,c
n,v,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
d,1,4,7,10
d,2,5,8,11
e,2,6,9,12
e,3,6,9,12


In [49]:
df2.drop_duplicates() # 중복된 행 삭제, drop_duplicates?로 도움말

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b,c
n,v,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
d,1,4,7,10
d,2,5,8,11
e,2,6,9,12


In [54]:
df.a.isin([5]) # isin에는 리스트형태를 넣어야함
df['a'].isin([5])

n  v
d  1    False
   2     True
e  2    False
Name: a, dtype: bool

In [56]:
import numpy as np
df3=pd.DataFrame(
    {'a':[4,5,6,6,np.nan],
     'b':[7,8,np.nan,9,9],
     'c':[10,11,12,np.nan,12]},
     index=pd.MultiIndex.from_tuples(
     [('d',1),('d',2),('e',2),('e',3),('e',4)],
     names=['n','v']))
df3

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b,c
n,v,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
d,1,4.0,7.0,10.0
d,2,5.0,8.0,11.0
e,2,6.0,,12.0
e,3,6.0,9.0,
e,4,,9.0,12.0


In [57]:
pd.isnull(df3)

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b,c
n,v,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
d,1,False,False,False
d,2,False,False,False
e,2,False,True,False
e,3,False,False,True
e,4,True,False,False


In [61]:
df3['a'].isnull()
df3['a'].isnull().sum()

1

In [65]:
pd.notnull(df3) # df3.notnull()
df3.notnull().sum()

a    4
b    4
c    4
dtype: int64

* &,|,~,^,df.any(),df.all() L
* and, or, not, xor, any, all

In [68]:
~df3.a.notnull() 

n  v
d  1    False
   2    False
e  2    False
   3    False
   4     True
Name: a, dtype: bool