In [1]:
import pandas as pd

In [2]:
friend_dict_list = [
    {'name' : 'John', 'age': 15, 'job' : 'student'},
    {'name' : 'Jenny', 'age': 30, 'job' : 'developer'},
    {'name' : 'Nate', 'age': 30, 'job' : 'teacher'},
]
df = pd.DataFrame(friend_dict_list, columns = ['name', 'age', 'job'])

In [3]:
df.head()

Unnamed: 0,name,age,job
0,John,15,student
1,Jenny,30,developer
2,Nate,30,teacher


In [4]:
# salary라는 column 추가
df['salary'] = 0

In [5]:
df.head()

Unnamed: 0,name,age,job,salary
0,John,15,student,0
1,Jenny,30,developer,0
2,Nate,30,teacher,0


In [6]:
#
import numpy as np

In [7]:
df['salary'] = np.where(df['job'] != 'student', 'yes', 'no')

In [9]:
df.head()

Unnamed: 0,name,age,job,salary
0,John,15,student,no
1,Jenny,30,developer,yes
2,Nate,30,teacher,yes


In [10]:
friend_dict_list = [
    {'name' : 'John', 'midterm': 95, 'final' : 85},
    {'name' : 'Jenny', 'midterm': 85, 'final' : 80},
    {'name' : 'Nate', 'midterm': 30, 'final' : 10},
]
df = pd.DataFrame(friend_dict_list, columns = ['name', 'midterm', 'final'])

In [11]:
df.head()

Unnamed: 0,name,midterm,final
0,John,95,85
1,Jenny,85,80
2,Nate,30,10


In [12]:
# 총점 추가
df['total'] = df['midterm'] + df['final']

In [13]:
df.head()

Unnamed: 0,name,midterm,final,total
0,John,95,85,180
1,Jenny,85,80,165
2,Nate,30,10,40


In [14]:
# 평균 추가
df['average'] = df['total'] / 2

In [15]:
df.head()

Unnamed: 0,name,midterm,final,total,average
0,John,95,85,180,90.0
1,Jenny,85,80,165,82.5
2,Nate,30,10,40,20.0


In [16]:
# 성적산출
grades = []

for row in df['average'] :
    if row >= 90:
        grades.append('A')
    elif row >= 80:
        grades.append('B')
    else:
        grades.append('F')

In [17]:
df['grade'] = grades

In [18]:
df.head()

Unnamed: 0,name,midterm,final,total,average,grade
0,John,95,85,180,90.0,A
1,Jenny,85,80,165,82.5,B
2,Nate,30,10,40,20.0,F


In [19]:
# grade를 p/f로 변경 (apply 사용)

# 적용할 함수 설정
def pass_or_fail(row):
    if row !='F':
        return "pass"
    else : 
        return "Fail"

In [21]:
df.grade = df.grade.apply(pass_or_fail)

In [22]:
df.head()

Unnamed: 0,name,midterm,final,total,average,grade
0,John,95,85,180,90.0,pass
1,Jenny,85,80,165,82.5,pass
2,Nate,30,10,40,20.0,Fail


In [23]:
# date_list 생성

In [27]:
date_list = [
    {
        'yyyy-mm-dd' : '2000-06-27'
    },
    {
        'yyyy-mm-dd' : '2007-10-27'
    }

]
df = pd.DataFrame(date_list, columns = ['yyyy-mm-dd'])

In [28]:
df.head()

Unnamed: 0,yyyy-mm-dd
0,2000-06-27
1,2007-10-27


In [29]:
# 연도만 따로 빼서 column에 추가
def extract_year(row):
    return row.split('-')[0]

In [30]:
df['year'] = df['yyyy-mm-dd'].apply(extract_year)

In [31]:
df.head()

Unnamed: 0,yyyy-mm-dd,year
0,2000-06-27,2000
1,2007-10-27,2007


In [32]:
# 행 생성 예제
friend_dict_list = [
    {'name' : 'John', 'midterm': 95, 'final' : 85},
    {'name' : 'Jenny', 'midterm': 85, 'final' : 80},
    {'name' : 'Nate', 'midterm': 30, 'final' : 10},
]
df = pd.DataFrame(friend_dict_list, columns = ['name', 'midterm', 'final'])

In [33]:
df.head()

Unnamed: 0,name,midterm,final
0,John,95,85
1,Jenny,85,80
2,Nate,30,10


In [34]:
# 추가할 df 생성
df2 = pd.DataFrame([
    ['Ben', 50, 50]
], columns = ['name', 'midterm', 'final'])

In [35]:
df2.head()

Unnamed: 0,name,midterm,final
0,Ben,50,50


In [37]:
# df 합치기
df.append(df2, ignore_index = True) # ignore_index => 새 df의 인덱스는 추가 안함.

  df.append(df2, ignore_index = True) # ignore_index => 새 df의 인덱스는 추가 안함.


Unnamed: 0,name,midterm,final
0,John,95,85
1,Jenny,85,80
2,Nate,30,10
3,Ben,50,50


In [38]:
# pandas groupby

In [39]:
student_list = [{'name': 'John', 'major': "Computer Science", 'sex': "male"},
                {'name': 'Nate', 'major': "Computer Science", 'sex': "male"},
                {'name': 'Abraham', 'major': "Physics", 'sex': "male"},
                {'name': 'Brian', 'major': "Psychology", 'sex': "male"},
                {'name': 'Janny', 'major': "Economics", 'sex': "female"},
                {'name': 'Yuna', 'major': "Economics", 'sex': "female"},
                {'name': 'Jeniffer', 'major': "Computer Science", 'sex': "female"},
                {'name': 'Edward', 'major': "Computer Science", 'sex': "male"},
                {'name': 'Zara', 'major': "Psychology", 'sex': "female"},
                {'name': 'Wendy', 'major': "Economics", 'sex': "female"},
                {'name': 'Sera', 'major': "Psychology", 'sex': "female"}
         ]
df = pd.DataFrame(student_list, columns = ['name', 'major', 'sex'])
df

Unnamed: 0,name,major,sex
0,John,Computer Science,male
1,Nate,Computer Science,male
2,Abraham,Physics,male
3,Brian,Psychology,male
4,Janny,Economics,female
5,Yuna,Economics,female
6,Jeniffer,Computer Science,female
7,Edward,Computer Science,male
8,Zara,Psychology,female
9,Wendy,Economics,female


In [42]:
# 학과별 몇명인지 알아보기(groupby)
# 학과별로 분류
groupby_major = df.groupby('major')

In [43]:
groupby_major.groups

{'Computer Science': [0, 1, 6, 7], 'Economics': [4, 5, 9], 'Physics': [2], 'Psychology': [3, 8, 10]}

In [45]:
# visualizing more
for name, group in groupby_major:
    print( name + ' : ' + str(len(group))) # 학과이름 : 사람수
    print(group) # 해당학과 학생 목록
    print() # 빈 라인 추가

Computer Science : 4
       name             major     sex
0      John  Computer Science    male
1      Nate  Computer Science    male
6  Jeniffer  Computer Science  female
7    Edward  Computer Science    male

Economics : 3
    name      major     sex
4  Janny  Economics  female
5   Yuna  Economics  female
9  Wendy  Economics  female

Physics : 1
      name    major   sex
2  Abraham  Physics  male

Psychology : 3
     name       major     sex
3   Brian  Psychology    male
8    Zara  Psychology  female
10   Sera  Psychology  female



In [47]:
# df로 만들때
df_major_cnt = pd.DataFrame( {'count' : groupby_major.size()}).reset_index()
# reset_index는 major를 column에 넣어주기위해 적었음
df_major_cnt

Unnamed: 0,major,count
0,Computer Science,4
1,Economics,3
2,Physics,1
3,Psychology,3


In [48]:
# 성별로
groupby_sex = df.groupby('sex')
for name, group in groupby_sex:
    print(name + " : " + str(len(group)))
    print(group)
    print()

female : 6
        name             major     sex
4      Janny         Economics  female
5       Yuna         Economics  female
6   Jeniffer  Computer Science  female
8       Zara        Psychology  female
9      Wendy         Economics  female
10      Sera        Psychology  female

male : 5
      name             major   sex
0     John  Computer Science  male
1     Nate  Computer Science  male
2  Abraham           Physics  male
3    Brian        Psychology  male
7   Edward  Computer Science  male



In [50]:
# df로 변환
df_sex_cnt = pd.DataFrame({"count" : groupby_sex.size()}).reset_index()
df_sex_cnt

Unnamed: 0,sex,count
0,female,6
1,male,5


In [52]:
# 중복 데이터 제거하기

In [53]:
student_list = [{'name': 'John', 'major': "Computer Science", 'sex': "male"},
                {'name': 'Nate', 'major': "Computer Science", 'sex': "male"},
                {'name': 'Abraham', 'major': "Physics", 'sex': "male"},
                {'name': 'Brian', 'major': "Psychology", 'sex': "male"},
                {'name': 'Janny', 'major': "Economics", 'sex': "female"},
                {'name': 'Yuna', 'major': "Economics", 'sex': "female"},
                {'name': 'Jeniffer', 'major': "Computer Science", 'sex': "female"},
                {'name': 'Edward', 'major': "Computer Science", 'sex': "male"},
                {'name': 'Zara', 'major': "Psychology", 'sex': "female"},
                {'name': 'Wendy', 'major': "Economics", 'sex': "female"},
                {'name': 'Sera', 'major': "Psychology", 'sex': "female"},
                {'name': 'John', 'major': "Computer Science", 'sex': "male"},
         ]
df = pd.DataFrame(student_list, columns = ['name', 'major', 'sex'])
df

Unnamed: 0,name,major,sex
0,John,Computer Science,male
1,Nate,Computer Science,male
2,Abraham,Physics,male
3,Brian,Psychology,male
4,Janny,Economics,female
5,Yuna,Economics,female
6,Jeniffer,Computer Science,female
7,Edward,Computer Science,male
8,Zara,Psychology,female
9,Wendy,Economics,female


In [54]:
# 0, 11 index가 중복

In [55]:
df.duplicated() # true면 중복

0     False
1     False
2     False
3     False
4     False
5     False
6     False
7     False
8     False
9     False
10    False
11     True
dtype: bool

In [56]:
# 중복값 제거(나중의 것 제거)
df.drop_duplicates()

Unnamed: 0,name,major,sex
0,John,Computer Science,male
1,Nate,Computer Science,male
2,Abraham,Physics,male
3,Brian,Psychology,male
4,Janny,Economics,female
5,Yuna,Economics,female
6,Jeniffer,Computer Science,female
7,Edward,Computer Science,male
8,Zara,Psychology,female
9,Wendy,Economics,female


In [58]:
student_list = [{'name': 'John', 'major': "Computer Science", 'sex': "male"},
                {'name': 'Nate', 'major': "Computer Science", 'sex': "male"},
                {'name': 'Abraham', 'major': "Physics", 'sex': "male"},
                {'name': 'Brian', 'major': "Psychology", 'sex': "male"},
                {'name': 'Janny', 'major': "Economics", 'sex': "female"},
                {'name': 'Yuna', 'major': "Economics", 'sex': "female"},
                {'name': 'Jeniffer', 'major': "Computer Science", 'sex': "female"},
                {'name': 'Edward', 'major': "Computer Science", 'sex': "male"},
                {'name': 'Zara', 'major': "Psychology", 'sex': "female"},
                {'name': 'Wendy', 'major': "Economics", 'sex': "female"},
                {'name': 'Nate', 'major': None, 'sex': "male"},
                {'name': 'John', 'major': "Computer Science", 'sex': None},
         ]
df = pd.DataFrame(student_list, columns = ['name', 'major', 'sex'])
df

Unnamed: 0,name,major,sex
0,John,Computer Science,male
1,Nate,Computer Science,male
2,Abraham,Physics,male
3,Brian,Psychology,male
4,Janny,Economics,female
5,Yuna,Economics,female
6,Jeniffer,Computer Science,female
7,Edward,Computer Science,male
8,Zara,Psychology,female
9,Wendy,Economics,female


In [60]:
# name이 같을때 중복값 제거하라
df.duplicated('name')

0     False
1     False
2     False
3     False
4     False
5     False
6     False
7     False
8     False
9     False
10     True
11     True
dtype: bool

In [61]:
df.drop_duplicates(['name'], keep = 'first') # 중복 판별 인자, 앞에서부터_디폴트값(last는 뒤에서)

Unnamed: 0,name,major,sex
0,John,Computer Science,male
1,Nate,Computer Science,male
2,Abraham,Physics,male
3,Brian,Psychology,male
4,Janny,Economics,female
5,Yuna,Economics,female
6,Jeniffer,Computer Science,female
7,Edward,Computer Science,male
8,Zara,Psychology,female
9,Wendy,Economics,female


In [62]:
# None값 찾기
school_id_list = [{'name': 'John', 'job': "teacher", 'age': 40},
                {'name': 'Nate', 'job': "teacher", 'age': 35},
                {'name': 'Yuna', 'job': "teacher", 'age': 37},
                {'name': 'Abraham', 'job': "student", 'age': 10},
                {'name': 'Brian', 'job': "student", 'age': 12},
                {'name': 'Janny', 'job': "student", 'age': 11},
                {'name': 'Nate', 'job': "teacher", 'age': None},
                {'name': 'John', 'job': "student", 'age': None}
         ]
df = pd.DataFrame(school_id_list, columns = ['name', 'job', 'age'])
df

Unnamed: 0,name,job,age
0,John,teacher,40.0
1,Nate,teacher,35.0
2,Yuna,teacher,37.0
3,Abraham,student,10.0
4,Brian,student,12.0
5,Janny,student,11.0
6,Nate,teacher,
7,John,student,


In [63]:
df.shape

(8, 3)

In [64]:
df.info() # age2개가 NaN임을 확인할 수 있다.

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   name    8 non-null      object 
 1   job     8 non-null      object 
 2   age     6 non-null      float64
dtypes: float64(1), object(2)
memory usage: 320.0+ bytes


In [65]:
df.isna() # NaN값 있으면 True

Unnamed: 0,name,job,age
0,False,False,False
1,False,False,False
2,False,False,False
3,False,False,False
4,False,False,False
5,False,False,False
6,False,False,True
7,False,False,True


In [66]:
df.isnull() # isna와 같은 기능

Unnamed: 0,name,job,age
0,False,False,False
1,False,False,False
2,False,False,False
3,False,False,False
4,False,False,False
5,False,False,False
6,False,False,True
7,False,False,True


In [67]:
# null 발견하면 0을 집어넣는 경우
df.age = df.age.fillna(0)

In [68]:
df

Unnamed: 0,name,job,age
0,John,teacher,40.0
1,Nate,teacher,35.0
2,Yuna,teacher,37.0
3,Abraham,student,10.0
4,Brian,student,12.0
5,Janny,student,11.0
6,Nate,teacher,0.0
7,John,student,0.0


In [69]:
# median값 넣어주기

school_id_list = [{'name': 'John', 'job': "teacher", 'age': 40},
                {'name': 'Nate', 'job': "teacher", 'age': 35},
                {'name': 'Yuna', 'job': "teacher", 'age': 37},
                {'name': 'Abraham', 'job': "student", 'age': 10},
                {'name': 'Brian', 'job': "student", 'age': 12},
                {'name': 'Janny', 'job': "student", 'age': 11},
                {'name': 'Nate', 'job': "teacher", 'age': None},
                {'name': 'John', 'job': "student", 'age': None}
         ]
df = pd.DataFrame(school_id_list, columns = ['name', 'job', 'age'])
df

Unnamed: 0,name,job,age
0,John,teacher,40.0
1,Nate,teacher,35.0
2,Yuna,teacher,37.0
3,Abraham,student,10.0
4,Brian,student,12.0
5,Janny,student,11.0
6,Nate,teacher,
7,John,student,


In [71]:
df['age'].fillna(df.groupby('job')['age'].transform('median'), inplace = True)
# job을 기준으로 그룹을 나누고, 그룹들의 나이 median값을 null에 집어넣기. inplace = True는 바로 반영
df

Unnamed: 0,name,job,age
0,John,teacher,40.0
1,Nate,teacher,35.0
2,Yuna,teacher,37.0
3,Abraham,student,10.0
4,Brian,student,12.0
5,Janny,student,11.0
6,Nate,teacher,37.0
7,John,student,11.0


In [72]:
# apply 함수 활용
date_list = [{'yyyy-mm-dd': '2000-06-27'},
         {'yyyy-mm-dd': '2002-09-24'},
         {'yyyy-mm-dd': '2005-12-20'}]
df = pd.DataFrame(date_list, columns = ['yyyy-mm-dd'])
df

Unnamed: 0,yyyy-mm-dd
0,2000-06-27
1,2002-09-24
2,2005-12-20


In [73]:
# column year 추가
def extract_year(column):
    return column.split("-")[0]

In [74]:
df['year'] = df['yyyy-mm-dd'].apply(extract_year)

In [75]:
df

Unnamed: 0,yyyy-mm-dd,year
0,2000-06-27,2000
1,2002-09-24,2002
2,2005-12-20,2005


In [76]:
# age column 추가_파라미터 받을때
def get_age(year, current_year) : #파라미터값 현재날짜 받음
    return current_year - int(year)

In [79]:
df['age'] = df['year'].apply(get_age, current_year=2022)
df

Unnamed: 0,yyyy-mm-dd,year,age
0,2000-06-27,2000,22
1,2002-09-24,2002,20
2,2005-12-20,2005,17


In [80]:
# 파라미터 여러개
def get_introduce(age, prefix, suffix) :
    return prefix + str(age) + suffix

In [82]:
df['introduce'] = df['age'].apply(get_introduce, prefix='I am ', suffix=' years old')
df

Unnamed: 0,yyyy-mm-dd,year,age,introduce
0,2000-06-27,2000,22,I am 22 years old
1,2002-09-24,2002,20,I am 20 years old
2,2005-12-20,2005,17,I am 17 years old


In [86]:
# column여러개로 apply 사용
def get_introduce_2(row) :
    return "I was born in " + str(row.year) + " my age is " + str(row.age)

In [87]:
df.introduce = df.apply(get_introduce_2, axis =1) # df.apply로.

In [88]:
df

Unnamed: 0,yyyy-mm-dd,year,age,introduce
0,2000-06-27,2000,22,I was born in 2000 my age is 22
1,2002-09-24,2002,20,I was born in 2002 my age is 20
2,2005-12-20,2005,17,I was born in 2005 my age is 17


In [89]:
# 팬더스 map, applymap

In [93]:
date_list = [{'date': '2000-06-27'},
         {'date': '2002-09-24'},
         {'date': '2005-12-20'}]
df = pd.DataFrame(date_list, columns = ['date'])
df

Unnamed: 0,date
0,2000-06-27
1,2002-09-24
2,2005-12-20


In [94]:
def extract_year(date) :
    return date.split('-')[0]

In [95]:
df['year'] = df['date'].map(extract_year)
# 이 경우 apply와 사용방법 똑같음. map은 다른 방법으로도 사용할 수 있음.

In [96]:
df

Unnamed: 0,date,year
0,2000-06-27,2000
1,2002-09-24,2002
2,2005-12-20,2005


In [None]:
# map의 다른 활용

In [97]:
job_list = [{'age': 20, 'job': 'student'},
         {'age': 30, 'job': 'developer'},
         {'age': 30, 'job': 'teacher'}]
df = pd.DataFrame(job_list)
df

Unnamed: 0,age,job
0,20,student
1,30,developer
2,30,teacher


In [98]:
# text를 숫자로
df.job = df.job.map({'student' : 1, 'developer' : 2, 'teacher':3})
# dictionary로 map에 전달해주어서 값 변경해줄 수 있다.
df

Unnamed: 0,age,job
0,20,1
1,30,2
2,30,3


In [99]:
# applymap
x_y = [{'x': 5.5, 'y': -5.6},
         {'x': -5.2, 'y': 5.5},
         {'x': -1.6, 'y': -4.5}]
df = pd.DataFrame(x_y)
df

Unnamed: 0,x,y
0,5.5,-5.6
1,-5.2,5.5
2,-1.6,-4.5


In [100]:
# 모든 column에 적용하고싶을 때
import numpy as np

In [101]:
df = df.applymap(np.around) # 모든 값 변경시킬때(반올림으로) applymap

In [102]:
df

Unnamed: 0,x,y
0,6.0,-6.0
1,-5.0,6.0
2,-2.0,-4.0


In [103]:
# map은 특정 column값 변경하고싶을때 활용.

In [107]:
# unique value만 뽑을 때

In [104]:
job_list = [{'name': 'John', 'job': "teacher"},
                {'name': 'Nate', 'job': "teacher"},
                {'name': 'Fred', 'job': "teacher"},
                {'name': 'Abraham', 'job': "student"},
                {'name': 'Brian', 'job': "student"},
                {'name': 'Janny', 'job': "developer"},
                {'name': 'Nate', 'job': "teacher"},
                {'name': 'Obrian', 'job': "dentist"},
                {'name': 'Yuna', 'job': "teacher"},
                {'name': 'Rob', 'job': "lawyer"},
                {'name': 'Brian', 'job': "student"},
                {'name': 'Matt', 'job': "student"},
                {'name': 'Wendy', 'job': "banker"},
                {'name': 'Edward', 'job': "teacher"},
                {'name': 'Ian', 'job': "teacher"},
                {'name': 'Chris', 'job': "banker"},
                {'name': 'Philip', 'job': "lawyer"},
                {'name': 'Janny', 'job': "basketball player"},
                {'name': 'Gwen', 'job': "teacher"},
                {'name': 'Jessy', 'job': "student"}
         ]
df = pd.DataFrame(job_list, columns = ['name', 'job'])

In [105]:
df

Unnamed: 0,name,job
0,John,teacher
1,Nate,teacher
2,Fred,teacher
3,Abraham,student
4,Brian,student
5,Janny,developer
6,Nate,teacher
7,Obrian,dentist
8,Yuna,teacher
9,Rob,lawyer


In [108]:
df.job.unique() # class값 하나씩만 보여줌

array(['teacher', 'student', 'developer', 'dentist', 'lawyer', 'banker',
       'basketball player'], dtype=object)

In [109]:
# 각 직업별로 몇 개의 데이터가 있는지 보고싶을때
df.job.value_counts()

teacher              8
student              5
lawyer               2
banker               2
developer            1
dentist              1
basketball player    1
Name: job, dtype: int64

In [110]:
# 데이터프레임 합치기

In [111]:
l1 = [{'name': 'John', 'job': "teacher"},
      {'name': 'Nate', 'job': "student"},
      {'name': 'Fred', 'job': "developer"}]

l2 = [{'name': 'Ed', 'job': "dentist"},
      {'name': 'Jack', 'job': "farmer"},
      {'name': 'Ted', 'job': "designer"}]
         
df1 = pd.DataFrame(l1, columns = ['name', 'job'])
df2 = pd.DataFrame(l2, columns = ['name', 'job'])

In [112]:
df1

Unnamed: 0,name,job
0,John,teacher
1,Nate,student
2,Fred,developer


In [113]:
df2

Unnamed: 0,name,job
0,Ed,dentist
1,Jack,farmer
2,Ted,designer


In [116]:
# 1,2 합치기
#1) concat 사용
result = pd.concat([df1, df2], ignore_index=True)

In [117]:
result

Unnamed: 0,name,job
0,John,teacher
1,Nate,student
2,Fred,developer
3,Ed,dentist
4,Jack,farmer
5,Ted,designer


In [120]:
#2) append사용
result2 = df1.append(df2, ignore_index=True)

  result2 = df1.append(df2, ignore_index=True)


In [121]:
result2

Unnamed: 0,name,job
0,John,teacher
1,Nate,student
2,Fred,developer
3,Ed,dentist
4,Jack,farmer
5,Ted,designer


In [122]:
# 열로 합치는 예제

In [129]:
l3 = [{'name': 'John', 'job': "teacher"},
      {'name': 'Nate', 'job': "student"},
      {'name': 'Jack', 'job': "developer"}]

l4 = [{'age': 25, 'country': "U.S"},
      {'age': 30, 'country': "U.K"},
      {'age': 45, 'country': "Korea"}]
         
df1 = pd.DataFrame(l3, columns = ['name', 'job'])
df2 = pd.DataFrame(l4, columns = ['age', 'country'])

In [130]:
df1

Unnamed: 0,name,job
0,John,teacher
1,Nate,student
2,Jack,developer


In [131]:
df2

Unnamed: 0,age,country
0,25,U.S
1,30,U.K
2,45,Korea


In [132]:
# concat 사용
result = pd.concat([df1, df2], axis=1, ignore_index=True) # axis=1으로 열합치기

In [128]:
result

Unnamed: 0,0,1,2,3
0,John,teacher,25,U.S
1,Nate,student,30,U.K
2,Jack,developer,45,Korea


In [133]:
# list합치는 경우

In [134]:
# 실제값, 예측값 비교시
label = [1,2,3,4,5]
prediction = [1,2,2,4,4]


In [137]:
# 어떤 value가 제대로 예측되지않는지 분간이 힘들때가 있음
# 데이터프레임으로 만들어주면 보기 편함
comparison = pd.DataFrame({'label' : label, 'prediction' : prediction})

In [138]:
comparison

Unnamed: 0,label,prediction
0,1,1
1,2,2
2,3,2
3,4,4
4,5,4
