In [3]:
import pandas as pd

# 1. 신규열을 apply 함수를 통해 생성(일반함수 지정)

In [4]:
import numpy as np

In [5]:
df = pd.DataFrame ({'a' : np.random.randn(6),
                 'b' : ['철수', '영희'] * 3,
                 'c' : np.random.randn(6)})
df

Unnamed: 0,a,b,c
0,1.422511,철수,0.023319
1,-2.224265,영희,0.977927
2,-0.841835,철수,1.785981
3,-1.187998,영희,0.340711
4,0.198487,철수,0.832533
5,0.510432,영희,-0.583916


In [9]:
df.dtypes

a    float64
b     object
c    float64
dtype: object

In [4]:
def test(df):
    return df['a']+df['c']+10

In [14]:
# 좀 더 복잡하게 시도
def test2(df):
    result1 = abs(df['a'])
    result2 = df['c']+10
    return pow(result1+result2,2)

In [5]:
df['value'] = df.apply(test, axis = 1)
df

Unnamed: 0,a,b,c,value
0,0.492961,철수,-1.187899,9.305063
1,-1.162457,영희,0.709823,9.547366
2,1.161172,철수,0.159889,11.321061
3,0.116088,영희,0.474479,10.590567
4,-0.528222,철수,1.779906,11.251683
5,0.722476,영희,-0.860373,9.862103


In [15]:
df['value2'] = df.apply(test2, axis = 1)
df

Unnamed: 0,a,b,c,value2
0,1.422511,철수,0.023319,131.007028
1,-2.224265,영희,0.977927,174.297883
2,-0.841835,철수,1.785981,159.461728
3,-1.187998,영희,0.340711,132.91114
4,0.198487,철수,0.832533,121.683393
5,0.510432,영희,-0.583916,98.535719


# 2. 임의의값으로 샘플데이터를 30,000개 생성하여 90보다 작은 수에는 NaN 이 나오게 하세요.

In [6]:
sample = pd.DataFrame({'임의의값':[10,100,40]})
sample

Unnamed: 0,임의의값
0,10
1,100
2,40


In [7]:
sample = pd.concat([sample]*10000).reset_index(drop = True)

In [8]:
sample.shape

(30000, 1)

In [11]:
sample.head()

Unnamed: 0,임의의값
0,10
1,100
2,40
3,10
4,100


In [16]:
sample1 = sample.copy()

In [17]:
sample1 = sample1['임의의값'].mask(sample['임의의값']<90, np.nan)
sample1.head()

0      NaN
1    100.0
2      NaN
3      NaN
4    100.0
Name: 임의의값, dtype: float64

# 3. 사용자정의 함수를 map 메소드로 처리하세요.

In [31]:
ser = pd.Series(np.random.randn(6))

In [32]:
df = pd.DataFrame(ser,columns = ['관측값_A'])
df

Unnamed: 0,관측값_A
0,0.508716
1,0.067881
2,-0.787496
3,-0.443805
4,-0.119713
5,0.82728


In [33]:
def map_test(a):
    print('원소별 처리')
    return np.abs(a)

In [34]:
s = pd.Series(ser.map(map_test))
s

원소별 처리
원소별 처리
원소별 처리
원소별 처리
원소별 처리
원소별 처리


0    0.508716
1    0.067881
2    0.787496
3    0.443805
4    0.119713
5    0.827280
dtype: float64

In [35]:
df['관측값_A_절대값']=s
df

Unnamed: 0,관측값_A,관측값_A_절대값
0,0.508716,0.508716
1,0.067881,0.067881
2,-0.787496,0.787496
3,-0.443805,0.443805
4,-0.119713,0.119713
5,0.82728,0.82728


# 4. pipe 메소드로 결측지를 채우세요.

In [44]:
df = pd.DataFrame({'name':['지혜','뽀짝','희민'],
                  'zodiac':[np.nan,'처녀자리','사자자리'],
                  'birth':[pd.Timestamp('1995-10-04'),pd.NaT,pd.NaT]})
df

Unnamed: 0,name,zodiac,birth
0,지혜,,1995-10-04
1,뽀짝,처녀자리,NaT
2,희민,사자자리,NaT


In [45]:
def birth_fillna(df):
    df['birth'] = df['birth'].fillna(pd.Timestamp('2020-09-10'))
    return df

In [46]:
df.pipe(birth_fillna)

Unnamed: 0,name,zodiac,birth
0,지혜,,1995-10-04
1,뽀짝,처녀자리,2020-09-10
2,희민,사자자리,2020-09-10


In [48]:
def zodiac_fillna(df):
    df['zodiac'] = df['zodiac'].fillna('천칭자리')
    return df

In [49]:
df.pipe(zodiac_fillna)

Unnamed: 0,name,zodiac,birth
0,지혜,천칭자리,1995-10-04
1,뽀짝,처녀자리,2020-09-10
2,희민,사자자리,2020-09-10


# 5. pipe 메소드로 gender별 age평균을 구하고 열이름 첫글자를 대문자로 바꾸세요.

In [69]:
df2 = pd.DataFrame([['지혜','Female',28],
                   ['뽀짝','Female',2],
                   ['희민','Male',33]],
                    columns = ['name','gender','age'])
df2

Unnamed: 0,name,gender,age
0,지혜,Female,28
1,뽀짝,Female,2
2,희민,Male,33


In [70]:
def mean_age_by_group(dataframe,col):
    return dataframe.groupby(col).mean()

In [71]:
def uppercase_column_name(dataframe):
    dataframe.columns = dataframe.columns.str.upper()
    return dataframe

In [72]:
(df2.pipe(mean_age_by_group, col = 'gender').pipe(uppercase_column_name))

Unnamed: 0_level_0,AGE
gender,Unnamed: 1_level_1
Female,15.0
Male,33.0
