## Series.map(arg)
- https://pandas.pydata.org/docs/reference/api/pandas.Series.map.html#pandas.Series.map

In [4]:
people_set = {
    'Name' : ['Spencer', 'Mark', 'Tom', 'Peter'],
    'Major': ['Computer', 'Science', 'English', 'Computer'],
    'YearOfJoining' : [2020, 2019, 2018, 2017],
    'DriverLicense' : [True, False, False, True],
    'TeacherCertification' : [True, False, False, False],
}

In [5]:
import pandas as pd

In [6]:
people = pd.DataFrame(people_set)
people

Unnamed: 0,Name,Major,YearOfJoining,DriverLicense,TeacherCertification
0,Spencer,Computer,2020,True,True
1,Mark,Science,2019,False,False
2,Tom,English,2018,False,False
3,Peter,Computer,2017,True,False


In [7]:
# map() 으로 매핑 확인해서 값 변환
people['DriverLicense'] = people['DriverLicense'].map({True:'Yes', False:'No'})
people

Unnamed: 0,Name,Major,YearOfJoining,DriverLicense,TeacherCertification
0,Spencer,Computer,2020,Yes,True
1,Mark,Science,2019,No,False
2,Tom,English,2018,No,False
3,Peter,Computer,2017,Yes,False


In [8]:
# 매핑이 안된 값은 그대로? NA? 일까
people['TeacherCertification'].map({True: 'ComputerScience'})

0    ComputerScience
1                NaN
2                NaN
3                NaN
Name: TeacherCertification, dtype: object

In [9]:
people['TeacherCertification'].map({True: 'ComputerScience','False':'NoCertification'}).value_counts()


ComputerScience    1
Name: TeacherCertification, dtype: int64

## Dataframe.applymap(func)
- https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.applymap.html#pandas.DataFrame.applymap
- 모든 요소에 apply()를 할 수 있다.

In [10]:
people.applymap(type)

Unnamed: 0,Name,Major,YearOfJoining,DriverLicense,TeacherCertification
0,<class 'str'>,<class 'str'>,<class 'int'>,<class 'str'>,<class 'bool'>
1,<class 'str'>,<class 'str'>,<class 'int'>,<class 'str'>,<class 'bool'>
2,<class 'str'>,<class 'str'>,<class 'int'>,<class 'str'>,<class 'bool'>
3,<class 'str'>,<class 'str'>,<class 'int'>,<class 'str'>,<class 'bool'>


In [11]:
people.applymap(str).applymap(type)

Unnamed: 0,Name,Major,YearOfJoining,DriverLicense,TeacherCertification
0,<class 'str'>,<class 'str'>,<class 'str'>,<class 'str'>,<class 'str'>
1,<class 'str'>,<class 'str'>,<class 'str'>,<class 'str'>,<class 'str'>
2,<class 'str'>,<class 'str'>,<class 'str'>,<class 'str'>,<class 'str'>
3,<class 'str'>,<class 'str'>,<class 'str'>,<class 'str'>,<class 'str'>


In [12]:
# str같은 경우 Series는 applymap할 필요 없이 가능하나
people['Name'].str.len()

0    7
1    4
2    3
3    5
Name: Name, dtype: int64

In [13]:
# Dataframe은 .str이 안된다.
people.str

AttributeError: 'DataFrame' object has no attribute 'str'

모든 요소를 str로 변환하고 대문자로 변환하면

In [16]:
# 대문자 만드는 함수
def uppercase(data):
    return data.upper()

In [14]:
people.applymap(str)

Unnamed: 0,Name,Major,YearOfJoining,DriverLicense,TeacherCertification
0,Spencer,Computer,2020,Yes,True
1,Mark,Science,2019,No,False
2,Tom,English,2018,No,False
3,Peter,Computer,2017,Yes,False


In [17]:
people.applymap(str).applymap(uppercase)

Unnamed: 0,Name,Major,YearOfJoining,DriverLicense,TeacherCertification
0,SPENCER,COMPUTER,2020,YES,True
1,MARK,SCIENCE,2019,NO,False
2,TOM,ENGLISH,2018,NO,False
3,PETER,COMPUTER,2017,YES,False


사실 모든 요소 형변환 하는 방법은 따로 있긴하다.
위에는 사용자가 함수를 만들어서 모든 요소에 적용할 수 있다는 점을 실습해본 것.

In [19]:
people.astype(str).applymap(uppercase)

Unnamed: 0,Name,Major,YearOfJoining,DriverLicense,TeacherCertification
0,SPENCER,COMPUTER,2020,YES,True
1,MARK,SCIENCE,2019,NO,False
2,TOM,ENGLISH,2018,NO,False
3,PETER,COMPUTER,2017,YES,False


In [22]:
# Lambda도 살펴보자
df = pd.DataFrame({'A': [1, 2, 3],
                   'B': [4, 5, 6],
                   'C': [7, 8, 9]})

In [25]:
# applymap()을 사용하여 제곱 연산 적용
df_squared = df.applymap(lambda x: x**2)

In [26]:
df_squared

Unnamed: 0,A,B,C
0,1,16,49
1,4,25,64
2,9,36,81


In [28]:
df**2

Unnamed: 0,A,B,C
0,1,16,49
1,4,25,64
2,9,36,81


## 데이터셋
- 고객 성격 분석 : https://www.kaggle.com/datasets/imakash3011/customer-personality-analysis

In [29]:
import pandas as pd

In [30]:
customers = pd.read_csv('marketing_campaign.csv', sep='\t', index_col='ID')
customers

Unnamed: 0_level_0,Year_Birth,Education,Marital_Status,Income,Kidhome,Teenhome,Dt_Customer,Recency,MntWines,MntFruits,...,NumWebVisitsMonth,AcceptedCmp3,AcceptedCmp4,AcceptedCmp5,AcceptedCmp1,AcceptedCmp2,Complain,Z_CostContact,Z_Revenue,Response
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5524,1957,Graduation,Single,58138.0,0,0,04-09-2012,58,635,88,...,7,0,0,0,0,0,0,3,11,1
2174,1954,Graduation,Single,46344.0,1,1,08-03-2014,38,11,1,...,5,0,0,0,0,0,0,3,11,0
4141,1965,Graduation,Together,71613.0,0,0,21-08-2013,26,426,49,...,4,0,0,0,0,0,0,3,11,0
6182,1984,Graduation,Together,26646.0,1,0,10-02-2014,26,11,4,...,6,0,0,0,0,0,0,3,11,0
5324,1981,PhD,Married,58293.0,1,0,19-01-2014,94,173,43,...,5,0,0,0,0,0,0,3,11,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10870,1967,Graduation,Married,61223.0,0,1,13-06-2013,46,709,43,...,5,0,0,0,0,0,0,3,11,0
4001,1946,PhD,Together,64014.0,2,1,10-06-2014,56,406,0,...,7,0,0,0,1,0,0,3,11,0
7270,1981,Graduation,Divorced,56981.0,0,0,25-01-2014,91,908,48,...,6,0,1,0,0,0,0,3,11,0
8235,1956,Master,Together,69245.0,0,1,24-01-2014,8,428,30,...,3,0,0,0,0,0,0,3,11,0


In [31]:
# 다자녀 표시
customers['Kidhome'].map({0:'자녀없음', 1:'외동', 2:'다자녀'})
customers.head()

Unnamed: 0_level_0,Year_Birth,Education,Marital_Status,Income,Kidhome,Teenhome,Dt_Customer,Recency,MntWines,MntFruits,...,NumWebVisitsMonth,AcceptedCmp3,AcceptedCmp4,AcceptedCmp5,AcceptedCmp1,AcceptedCmp2,Complain,Z_CostContact,Z_Revenue,Response
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5524,1957,Graduation,Single,58138.0,0,0,04-09-2012,58,635,88,...,7,0,0,0,0,0,0,3,11,1
2174,1954,Graduation,Single,46344.0,1,1,08-03-2014,38,11,1,...,5,0,0,0,0,0,0,3,11,0
4141,1965,Graduation,Together,71613.0,0,0,21-08-2013,26,426,49,...,4,0,0,0,0,0,0,3,11,0
6182,1984,Graduation,Together,26646.0,1,0,10-02-2014,26,11,4,...,6,0,0,0,0,0,0,3,11,0
5324,1981,PhD,Married,58293.0,1,0,19-01-2014,94,173,43,...,5,0,0,0,0,0,0,3,11,0


In [33]:
# 모두 대문자로 변환
customers.applymap(str).applymap(uppercase)

Unnamed: 0_level_0,Year_Birth,Education,Marital_Status,Income,Kidhome,Teenhome,Dt_Customer,Recency,MntWines,MntFruits,...,NumWebVisitsMonth,AcceptedCmp3,AcceptedCmp4,AcceptedCmp5,AcceptedCmp1,AcceptedCmp2,Complain,Z_CostContact,Z_Revenue,Response
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5524,1957,GRADUATION,SINGLE,58138.0,0,0,04-09-2012,58,635,88,...,7,0,0,0,0,0,0,3,11,1
2174,1954,GRADUATION,SINGLE,46344.0,1,1,08-03-2014,38,11,1,...,5,0,0,0,0,0,0,3,11,0
4141,1965,GRADUATION,TOGETHER,71613.0,0,0,21-08-2013,26,426,49,...,4,0,0,0,0,0,0,3,11,0
6182,1984,GRADUATION,TOGETHER,26646.0,1,0,10-02-2014,26,11,4,...,6,0,0,0,0,0,0,3,11,0
5324,1981,PHD,MARRIED,58293.0,1,0,19-01-2014,94,173,43,...,5,0,0,0,0,0,0,3,11,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10870,1967,GRADUATION,MARRIED,61223.0,0,1,13-06-2013,46,709,43,...,5,0,0,0,0,0,0,3,11,0
4001,1946,PHD,TOGETHER,64014.0,2,1,10-06-2014,56,406,0,...,7,0,0,0,1,0,0,3,11,0
7270,1981,GRADUATION,DIVORCED,56981.0,0,0,25-01-2014,91,908,48,...,6,0,1,0,0,0,0,3,11,0
8235,1956,MASTER,TOGETHER,69245.0,0,1,24-01-2014,8,428,30,...,3,0,0,0,0,0,0,3,11,0


In [34]:
customers.astype(str).applymap(uppercase)

Unnamed: 0_level_0,Year_Birth,Education,Marital_Status,Income,Kidhome,Teenhome,Dt_Customer,Recency,MntWines,MntFruits,...,NumWebVisitsMonth,AcceptedCmp3,AcceptedCmp4,AcceptedCmp5,AcceptedCmp1,AcceptedCmp2,Complain,Z_CostContact,Z_Revenue,Response
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5524,1957,GRADUATION,SINGLE,58138.0,0,0,04-09-2012,58,635,88,...,7,0,0,0,0,0,0,3,11,1
2174,1954,GRADUATION,SINGLE,46344.0,1,1,08-03-2014,38,11,1,...,5,0,0,0,0,0,0,3,11,0
4141,1965,GRADUATION,TOGETHER,71613.0,0,0,21-08-2013,26,426,49,...,4,0,0,0,0,0,0,3,11,0
6182,1984,GRADUATION,TOGETHER,26646.0,1,0,10-02-2014,26,11,4,...,6,0,0,0,0,0,0,3,11,0
5324,1981,PHD,MARRIED,58293.0,1,0,19-01-2014,94,173,43,...,5,0,0,0,0,0,0,3,11,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10870,1967,GRADUATION,MARRIED,61223.0,0,1,13-06-2013,46,709,43,...,5,0,0,0,0,0,0,3,11,0
4001,1946,PHD,TOGETHER,64014.0,2,1,10-06-2014,56,406,0,...,7,0,0,0,1,0,0,3,11,0
7270,1981,GRADUATION,DIVORCED,56981.0,0,0,25-01-2014,91,908,48,...,6,0,1,0,0,0,0,3,11,0
8235,1956,MASTER,TOGETHER,69245.0,0,1,24-01-2014,8,428,30,...,3,0,0,0,0,0,0,3,11,0
