<a href="https://colab.research.google.com/github/hyeji-K/AI_example/blob/main/Pandas_%EA%B8%B0%EC%B4%88.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **1. 팬더스, 데이터프레임, 시리즈 알아보기**
- 팬더스가 엑셀보다 빠름
    - 엑셀은 프로그램을 만들 수 없음
    - 팬더스는 numpy 사용 - 숫자 계산에 강력한 퍼포머스
- 각 컬럼들을 시리즈라고 하며 데이터 프레임은 시리즈의 결합체?
- 시리즈는 파이썬의 리스트를 이용하여 만들 수 있음

In [2]:
import pandas as pd

In [None]:
data_frame = pd.read_csv('data/friend_list.csv') # 데이터 불러오기

In [2]:
s1 = pd.core.series.Series([1,2,3])
s2 = pd.core.series.Series(['one','two','three'])

In [3]:
pd.DataFrame(data=dict(num=s1, word=s2))

Unnamed: 0,num,word
0,1,one
1,2,two
2,3,three


### **2. 파일에서 데이터 불러오기**


In [None]:
df = pd.read_csv('data/friend_list.csv')

In [None]:
df.head() # 처음부터 다섯개를 불러옴
df.tail() # 뒤에서부터 불러옴

In [None]:
# 쉼표가 아닌 탭으로 컬럼들이 구분되어 있는 데이터일때
df = pd.read_csv('data/friend_list_tab.txt', delimiter= '\t')

In [None]:
# 컬럼에 대한 이름이 없을 경우
df = pd.read_csv('data/friend_list_no_head.csv', header= None)
df.columns = ['name', 'age', 'job']

In [None]:
df = pd.read_csv('data/friend_list_no_head.csv', header= None, names=['name', 'age', 'job'])

### **3. 데이터프레임 생성하기**

In [6]:
friend_dict_list = [
    {'name': 'John', 'age': 25, 'job': 'student'},
    {'name': 'Nate', 'age': 30, 'job': 'teacher'}
]

In [15]:
df = pd.DataFrame(friend_dict_list)

In [17]:
df.head()
# 키의 값에 대한 순서 ????

Unnamed: 0,name,age,job
0,John,25,student
1,Nate,30,teacher


In [19]:
from collections import OrderedDict
# 키의 순서를 보장

In [20]:
friend_ordered_dict = OrderedDict(
    [
        ('name', ['John', 'Nate']),
        ('age', [25, 30]),
        ('job', ['student', 'teacher'])
    ]
)

In [21]:
df = pd.DataFrame.from_dict(friend_ordered_dict)

In [22]:
df.head()

Unnamed: 0,name,age,job
0,John,25,student
1,Nate,30,teacher


In [23]:
# 파이썬의 리스트를 사용해서 만드는 방법
friend_list = [
    ['John', 20, 'student'],
    ['Nate', 30, 'teacher']
]

In [24]:
column_name = ['name', 'age', 'job']

In [26]:
df = pd.DataFrame.from_records(friend_list, columns = column_name)

In [27]:
df.head()

Unnamed: 0,name,age,job
0,John,20,student
1,Nate,30,teacher


In [45]:
friend_list = [
        ['name', ['John', 'Nate']],
        ['age', [25, 30]],
        ['job', ['student', 'teacher']]
]

In [None]:
df = pd.DataFrame.from_items(friend_list)

최신 버전에서는 from_items() 메소드를 지원하지 않는다. 따라서 list를 dict로 캐스팅 해 준 후 from_dict() 메소드를 사용하면 데이터프레임으로 생성할 수 있다.

In [46]:
df = pd.DataFrame.from_dict(dict(friend_list))

In [47]:
df

Unnamed: 0,name,age,job
0,John,25,student
1,Nate,30,teacher


### **4. 데이터프레임 파일로 저장하기(to_csv)**

In [None]:
df.to_csv('friends.csv', index=True, header=True) # 디폴트

In [None]:
# None 값은 기본적으로 빈칸으로 설정이 됨
df.to_csv('friends.csv', index=True, header=True, na_rep = '-') # None -> -로 빈칸을 채움

### **5. 데이터프레임 행, 열(row, column) 선택 및 필터하기**

In [31]:
df[1:3] # 처음은 포함하고 마지막은 포함하지 않음

Unnamed: 0,name,age,job
1,Nate,30,teacher


In [None]:
df = df[1:3] # 기존 데이터프레임에 적용

In [None]:
# 불연속적인 데이터를 가져올때
df.loc[[0,2]]

#### by column condition

In [None]:
df[df.age > 25]
df.query('age>25')

In [None]:
# 이름이 nate이고 나이가 25살 이상인 사람을 불러오기
df[(df.age > 25) & (df.name == 'Nate')]

#### Filter Column

In [32]:
# by index
friend_list = [
    ['John', 20, 'student'],
    ['Jenny', 30, 'developer'],
    ['Nate', 30, 'teacher']
]
df = pd.DataFrame.from_records(friend_list)

In [33]:
# 컬럼 index를 이용해서 필터하는 방법
df.iloc[:, 0:2] # 앞부분은 행(row), 뒷부분은 열(column)

Unnamed: 0,0,1
0,John,20
1,Jenny,30
2,Nate,30


In [38]:
# 컬럼의 이름을 사용해서 필터하는 방법
df = pd.read_csv('data/friend_list_no_head.csv', header= None, names=['name', 'age', 'job'])

In [39]:
df_filtered = df[['name', 'age']]

In [40]:
df_filtered

Unnamed: 0,name,age
0,John,20
1,Jenny,30
2,Nate,30
3,Julia,40
4,Brian,45
5,Chris,25


In [41]:
df.filter(items=['age', 'job'])

Unnamed: 0,age,job
0,20,student
1,30,developer
2,30,teacher
3,40,dentist
4,45,manager
5,25,intern


In [42]:
df.filter(like='a', axis=1) # 컬럼 이름에 'a'가 들어간 것만 필터링

Unnamed: 0,name,age
0,John,20
1,Jenny,30
2,Nate,30
3,Julia,40
4,Brian,45
5,Chris,25


In [43]:
df.filter(regex='b$', axis=1) # 컬럼 이름이 'b'로 끝나는 것만 필터링

Unnamed: 0,job
0,student
1,developer
2,teacher
3,dentist
4,manager
5,intern


In [36]:
%cd /content/drive/MyDrive/생성AI_1회차/AI_example/

/content/drive/MyDrive/생성AI_1회차/AI_example


In [37]:
pwd

'/content/drive/MyDrive/생성AI_1회차/AI_example'

### **6. 데이터프레임 행, 열(drop row, column) 삭제하기**

In [3]:
friend_dict_list = [{'age': 20, 'job': 'student'},
         {'age': 30, 'job': 'developer'},
         {'age': 30, 'job': 'teacher'}]
df = pd.DataFrame(friend_dict_list, index = ['John', 'Jenny', 'Nate'])

In [4]:
df.drop(['John', 'Nate'])

Unnamed: 0,age,job
Jenny,30,developer


In [None]:
# 드랍한 row를 데이터 프레임에 반영
df.drop(['John', 'Nate'], inplace = True)

In [7]:
friend_dict_list = [{'name': 'Jone', 'age': 20, 'job': 'student'},
         {'name': 'Jenny', 'age': 30, 'job': 'developer'},
         {'name': 'Nate', 'age': 30, 'job': 'teacher'}]
df = pd.DataFrame(friend_dict_list)
df

Unnamed: 0,name,age,job
0,Jone,20,student
1,Jenny,30,developer
2,Nate,30,teacher


In [6]:
# 인덱스로 삭제
df.drop(df.index[[0,2]])

Unnamed: 0,name,age,job
1,Jenny,30,developer


In [8]:
# 조건에 맞는 것 삭제
df[df.age > 20]

Unnamed: 0,name,age,job
1,Jenny,30,developer
2,Nate,30,teacher


In [9]:
# 컬럼 drop
df.drop('age', axis=1)

Unnamed: 0,name,job
0,Jone,student
1,Jenny,developer
2,Nate,teacher


### **7. 행, 열 생성 및 수정하기**

In [10]:
friend_dict_list = [{'name': 'Jone', 'age': 20, 'job': 'student'},
         {'name': 'Jenny', 'age': 30, 'job': 'developer'},
         {'name': 'Nate', 'age': 30, 'job': 'teacher'}]
df = pd.DataFrame(friend_dict_list)

In [11]:
df['salary'] = 0

In [12]:
df.head()

Unnamed: 0,name,age,job,salary
0,Jone,20,student,0
1,Jenny,30,developer,0
2,Nate,30,teacher,0


In [25]:
import numpy as np

In [14]:
df['salary'] = np.where(df['job'] != 'student', 'yes', 'no')

In [15]:
df.head()

Unnamed: 0,name,age,job,salary
0,Jone,20,student,no
1,Jenny,30,developer,yes
2,Nate,30,teacher,yes


In [16]:
friend_dict_list = [{'name': 'John', 'midterm': 95, 'final': 85},
         {'name': 'Jenny', 'midterm': 85, 'final': 80},
         {'name': 'Nate', 'midterm': 10, 'final': 30}]
df = pd.DataFrame(friend_dict_list, columns = ['name', 'midterm', 'final'])

In [17]:
# 총점 추가
df['total'] = df['midterm'] + df['final']

In [18]:
df

Unnamed: 0,name,midterm,final,total
0,John,95,85,180
1,Jenny,85,80,165
2,Nate,10,30,40


In [19]:
# 평균 점수 추가
df['average'] = df['total'] / 2

In [11]:
df.head()

Unnamed: 0,name,midterm,final,total,average
0,John,95,85,180,90.0
1,Jenny,85,80,165,82.5
2,Nate,10,30,40,20.0


In [12]:
grades = []

for row in df['average']:
    if row >= 90:
        grades.append('A')
    elif row >= 80:
        grades.append('B')
    elif row >= 70:
        grades.append('C')
    else:
        grades.append('F')

df['grade'] = grades

In [13]:
df.head()

Unnamed: 0,name,midterm,final,total,average,grade
0,John,95,85,180,90.0,A
1,Jenny,85,80,165,82.5,B
2,Nate,10,30,40,20.0,F


In [17]:
def pass_or_fail(row):
    if row != "F":
        return 'Pass'
    else:
        return 'Fail'

In [18]:
df.grade = df.grade.apply(pass_or_fail)

In [16]:
df.head()

Unnamed: 0,name,midterm,final,total,average,grade
0,John,95,85,180,90.0,Pass
1,Jenny,85,80,165,82.5,Pass
2,Nate,10,30,40,20.0,Fail


In [20]:
date_list = [{'yyyy-mm-dd': '2000-06-27'},
         {'yyyy-mm-dd': '2002-09-24'},
         {'yyyy-mm-dd': '2005-12-20'}]
df = pd.DataFrame(date_list, columns = ['yyyy-mm-dd'])

In [21]:
df.head()

Unnamed: 0,yyyy-mm-dd
0,2000-06-27
1,2002-09-24
2,2005-12-20


In [6]:
def extract_year(row):
    return row.split('-')[0]

In [7]:
df['year'] = df['yyyy-mm-dd'].apply(extract_year)

In [24]:
df

Unnamed: 0,yyyy-mm-dd,year
0,2000-06-27,2000
1,2002-09-24,2002
2,2005-12-20,2005


In [25]:
friend_dict_list = [{'name': 'John', 'midterm': 95, 'final': 85},
         {'name': 'Jenny', 'midterm': 85, 'final': 80},
         {'name': 'Nate', 'midterm': 10, 'final': 30}]
df = pd.DataFrame(friend_dict_list, columns = ['name', 'midterm', 'final'])

In [26]:
df2 = pd.DataFrame([['Ben', 50,50]], columns = ['name', 'midterm', 'final'])

In [None]:
df.append(df2, ignore_index=True) # DataFrame.append 메소드는 Pandas 2.0.0 이상 버전에서 사용되지 않는 것이 맞습니다.

In [28]:
new_df = pd.concat([df, df2])

In [29]:
new_df

Unnamed: 0,name,midterm,final
0,John,95,85
1,Jenny,85,80
2,Nate,10,30
0,Ben,50,50


### **8. 데이터 그룹 만들기 (group by)**

In [30]:
student_list = [{'name': 'John', 'major': "Computer Science", 'sex': "male"},
                {'name': 'Nate', 'major': "Computer Science", 'sex': "male"},
                {'name': 'Abraham', 'major': "Physics", 'sex': "male"},
                {'name': 'Brian', 'major': "Psychology", 'sex': "male"},
                {'name': 'Janny', 'major': "Economics", 'sex': "female"},
                {'name': 'Yuna', 'major': "Economics", 'sex': "female"},
                {'name': 'Jeniffer', 'major': "Computer Science", 'sex': "female"},
                {'name': 'Edward', 'major': "Computer Science", 'sex': "male"},
                {'name': 'Zara', 'major': "Psychology", 'sex': "female"},
                {'name': 'Wendy', 'major': "Economics", 'sex': "female"},
                {'name': 'Sera', 'major': "Psychology", 'sex': "female"}
         ]
df = pd.DataFrame(student_list, columns = ['name', 'major', 'sex'])
df

Unnamed: 0,name,major,sex
0,John,Computer Science,male
1,Nate,Computer Science,male
2,Abraham,Physics,male
3,Brian,Psychology,male
4,Janny,Economics,female
5,Yuna,Economics,female
6,Jeniffer,Computer Science,female
7,Edward,Computer Science,male
8,Zara,Psychology,female
9,Wendy,Economics,female


In [31]:
groupby_major = df.groupby('major')

In [32]:
groupby_major.groups

{'Computer Science': [0, 1, 6, 7], 'Economics': [4, 5, 9], 'Physics': [2], 'Psychology': [3, 8, 10]}

In [33]:
for name, group in groupby_major:
    print(name + ": " + str(len(group)))
    print(group)
    print()

Computer Science: 4
       name             major     sex
0      John  Computer Science    male
1      Nate  Computer Science    male
6  Jeniffer  Computer Science  female
7    Edward  Computer Science    male

Economics: 3
    name      major     sex
4  Janny  Economics  female
5   Yuna  Economics  female
9  Wendy  Economics  female

Physics: 1
      name    major   sex
2  Abraham  Physics  male

Psychology: 3
     name       major     sex
3   Brian  Psychology    male
8    Zara  Psychology  female
10   Sera  Psychology  female



In [34]:
df_major_cnt = pd.DataFrame({'count' : groupby_major.size()}).reset_index()
df_major_cnt

Unnamed: 0,major,count
0,Computer Science,4
1,Economics,3
2,Physics,1
3,Psychology,3


### **9. 중복 데이터 삭제하기**

In [35]:
student_list = [{'name': 'John', 'major': "Computer Science", 'sex': "male"},
                {'name': 'Nate', 'major': "Computer Science", 'sex': "male"},
                {'name': 'Abraham', 'major': "Physics", 'sex': "male"},
                {'name': 'Brian', 'major': "Psychology", 'sex': "male"},
                {'name': 'Janny', 'major': "Economics", 'sex': "female"},
                {'name': 'Yuna', 'major': "Economics", 'sex': "female"},
                {'name': 'Jeniffer', 'major': "Computer Science", 'sex': "female"},
                {'name': 'Edward', 'major': "Computer Science", 'sex': "male"},
                {'name': 'Zara', 'major': "Psychology", 'sex': "female"},
                {'name': 'Wendy', 'major': "Economics", 'sex': "female"},
                {'name': 'Sera', 'major': "Psychology", 'sex': "female"},
                {'name': 'John', 'major': "Computer Science", 'sex': "male"},
         ]
df = pd.DataFrame(student_list, columns = ['name', 'major', 'sex'])
df

Unnamed: 0,name,major,sex
0,John,Computer Science,male
1,Nate,Computer Science,male
2,Abraham,Physics,male
3,Brian,Psychology,male
4,Janny,Economics,female
5,Yuna,Economics,female
6,Jeniffer,Computer Science,female
7,Edward,Computer Science,male
8,Zara,Psychology,female
9,Wendy,Economics,female


In [36]:
df.duplicated()

Unnamed: 0,0
0,False
1,False
2,False
3,False
4,False
5,False
6,False
7,False
8,False
9,False


In [37]:
df = df.drop_duplicates()

In [38]:
df

Unnamed: 0,name,major,sex
0,John,Computer Science,male
1,Nate,Computer Science,male
2,Abraham,Physics,male
3,Brian,Psychology,male
4,Janny,Economics,female
5,Yuna,Economics,female
6,Jeniffer,Computer Science,female
7,Edward,Computer Science,male
8,Zara,Psychology,female
9,Wendy,Economics,female


In [None]:
# 이름이 같은 값인 행을 제거
df.duplicated(['name'])
df.drop_duplicates(['name'], keep='last')

### **10. NaN(None) 찾아서 다른 값으로 변경하기 (fillna)**

In [39]:
school_id_list = [{'name': 'John', 'job': "teacher", 'age': 40},
                {'name': 'Nate', 'job': "teacher", 'age': 35},
                {'name': 'Yuna', 'job': "teacher", 'age': 37},
                {'name': 'Abraham', 'job': "student", 'age': 10},
                {'name': 'Brian', 'job': "student", 'age': 12},
                {'name': 'Janny', 'job': "student", 'age': 11},
                {'name': 'Nate', 'job': "teacher", 'age': None},
                {'name': 'John', 'job': "student", 'age': None}
         ]
df = pd.DataFrame(school_id_list, columns = ['name', 'job', 'age'])
df

Unnamed: 0,name,job,age
0,John,teacher,40.0
1,Nate,teacher,35.0
2,Yuna,teacher,37.0
3,Abraham,student,10.0
4,Brian,student,12.0
5,Janny,student,11.0
6,Nate,teacher,
7,John,student,


In [41]:
df.shape

(8, 3)

In [40]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   name    8 non-null      object 
 1   job     8 non-null      object 
 2   age     6 non-null      float64
dtypes: float64(1), object(2)
memory usage: 320.0+ bytes


In [42]:
df.isna()

Unnamed: 0,name,job,age
0,False,False,False
1,False,False,False
2,False,False,False
3,False,False,False
4,False,False,False
5,False,False,False
6,False,False,True
7,False,False,True


In [43]:
df.isnull()

Unnamed: 0,name,job,age
0,False,False,False
1,False,False,False
2,False,False,False
3,False,False,False
4,False,False,False
5,False,False,False
6,False,False,True
7,False,False,True


In [44]:
df.age = df.age.fillna(0)

In [45]:
df

Unnamed: 0,name,job,age
0,John,teacher,40.0
1,Nate,teacher,35.0
2,Yuna,teacher,37.0
3,Abraham,student,10.0
4,Brian,student,12.0
5,Janny,student,11.0
6,Nate,teacher,0.0
7,John,student,0.0


In [None]:
# fill missing age with median age for each group (teacher, student)
df["age"].fillna(df.groupby('job')['age'].transform('median'), inplace=True)

### **11. apply 함수, 다양한 예제로 활용해보기**

In [3]:
date_list = [{'yyyy-mm-dd': '2000-06-27'},
         {'yyyy-mm-dd': '2002-09-24'},
         {'yyyy-mm-dd': '2005-12-20'}]
df = pd.DataFrame(date_list, columns = ['yyyy-mm-dd'])

In [4]:
def get_age(year, current_year):
    return current_year - int(year)

In [8]:
df['age'] = df['year'].apply(get_age, current_year=2018)

In [9]:
df

Unnamed: 0,yyyy-mm-dd,year,age
0,2000-06-27,2000,18
1,2002-09-24,2002,16
2,2005-12-20,2005,13


In [10]:
def get_introduce(age, prefix, suffix):
    return prefix + str(age) + suffix

In [11]:
df['introduce'] = df['age'].apply(get_introduce, prefix='I am ', suffix=' years old')

In [12]:
df

Unnamed: 0,yyyy-mm-dd,year,age,introduce
0,2000-06-27,2000,18,I am 18 years old
1,2002-09-24,2002,16,I am 16 years old
2,2005-12-20,2005,13,I am 13 years old


In [13]:
# 여러개의 컬럼을 apply로 적용
def get_introduce_2(row):
    return "I was born in " + str(row.year) + " my age is " + str(row.age)

In [14]:
df['introduce_2'] = df.apply(get_introduce_2, axis=1)

In [15]:
df

Unnamed: 0,yyyy-mm-dd,year,age,introduce,introduce_2
0,2000-06-27,2000,18,I am 18 years old,I was born in 2000 my age is 18
1,2002-09-24,2002,16,I am 16 years old,I was born in 2002 my age is 16
2,2005-12-20,2005,13,I am 13 years old,I was born in 2005 my age is 13


### **12. map, applymap 함수, 다양항 예제로 활용해보기**

In [16]:
date_list = [{'date': '2000-06-27'},
         {'date': '2002-09-24'},
         {'date': '2005-12-20'}]
df = pd.DataFrame(date_list, columns = ['date'])
df

Unnamed: 0,date
0,2000-06-27
1,2002-09-24
2,2005-12-20


In [19]:
def extract_year(date):
    return date.split('-')[0]

In [20]:
df['year'] = df['date'].map(extract_year)

In [21]:
df

Unnamed: 0,date,year
0,2000-06-27,2000
1,2002-09-24,2002
2,2005-12-20,2005


In [22]:
job_list = [{'age': 20, 'job': 'student'},
         {'age': 30, 'job': 'developer'},
         {'age': 30, 'job': 'teacher'}]
df = pd.DataFrame(job_list)

In [23]:
df.job = df.job.map({"student":1,"developer":2,"teacher":3})
# 딕셔너리 직접 전달 가능

In [24]:
# applymap - 모든 컬럼에 적용
x_y = [{'x': 5.5, 'y': -5.6},
         {'x': -5.2, 'y': 5.5},
         {'x': -1.6, 'y': -4.5}]
df = pd.DataFrame(x_y)

In [26]:
df = df.applymap(np.around)

  df = df.applymap(np.around)


In [27]:
df

Unnamed: 0,age,job
0,20,1
1,30,2
2,30,3


### **13. 컬럼 내 유니크한 값 뽑아내고 갯수 확인하기 (unique, value_count)**

In [28]:
job_list = [{'name': 'John', 'job': "teacher"},
                {'name': 'Nate', 'job': "teacher"},
                {'name': 'Fred', 'job': "teacher"},
                {'name': 'Abraham', 'job': "student"},
                {'name': 'Brian', 'job': "student"},
                {'name': 'Janny', 'job': "developer"},
                {'name': 'Nate', 'job': "teacher"},
                {'name': 'Obrian', 'job': "dentist"},
                {'name': 'Yuna', 'job': "teacher"},
                {'name': 'Rob', 'job': "lawyer"},
                {'name': 'Brian', 'job': "student"},
                {'name': 'Matt', 'job': "student"},
                {'name': 'Wendy', 'job': "banker"},
                {'name': 'Edward', 'job': "teacher"},
                {'name': 'Ian', 'job': "teacher"},
                {'name': 'Chris', 'job': "banker"},
                {'name': 'Philip', 'job': "lawyer"},
                {'name': 'Janny', 'job': "basketball player"},
                {'name': 'Gwen', 'job': "teacher"},
                {'name': 'Jessy', 'job': "student"}
         ]
df = pd.DataFrame(job_list, columns = ['name', 'job'])

In [29]:
df.job.unique()

array(['teacher', 'student', 'developer', 'dentist', 'lawyer', 'banker',
       'basketball player'], dtype=object)

In [31]:
df.job.value_counts()

Unnamed: 0_level_0,count
job,Unnamed: 1_level_1
teacher,8
student,5
lawyer,2
banker,2
developer,1
dentist,1
basketball player,1


### **14. 두개의 데이터프레임 합치기(concat, append)**

In [32]:
l1 = [{'name': 'John', 'job': "teacher"},
      {'name': 'Nate', 'job': "student"},
      {'name': 'Fred', 'job': "developer"}]

l2 = [{'name': 'Ed', 'job': "dentist"},
      {'name': 'Jack', 'job': "farmer"},
      {'name': 'Ted', 'job': "designer"}]

df1 = pd.DataFrame(l1, columns = ['name', 'job'])
df2 = pd.DataFrame(l2, columns = ['name', 'job'])

In [33]:
# 1.
result = pd.concat([df1, df2])

In [34]:
result

Unnamed: 0,name,job
0,John,teacher
1,Nate,student
2,Fred,developer
0,Ed,dentist
1,Jack,farmer
2,Ted,designer


In [36]:
result_2 = pd.concat([df1, df2], ignore_index=True)

In [37]:
result_2

Unnamed: 0,name,job
0,John,teacher
1,Nate,student
2,Fred,developer
3,Ed,dentist
4,Jack,farmer
5,Ted,designer


In [None]:
result = df1.append(df2) # DataFrame.append 메소드는 Pandas 2.0.0 이상 버전에서 사용되지 않는 것이 맞습니다.

In [39]:
l1 = [{'name': 'John', 'job': "teacher"},
      {'name': 'Nate', 'job': "student"},
      {'name': 'Fred', 'job': "developer"}]

l2 = [{'name': 'Ed', 'job': "dentist"},
      {'name': 'Jack', 'job': "farmer"},
      {'name': 'Ted', 'job': "designer"}]

l3 = [{'name': 'John', 'job': "dentist"},
      {'name': 'Nate', 'job': "farmer"},
      {'name': 'Jack', 'job': "designer"}]

l4 = [{'age': 25, 'country': "U.S"},
      {'age': 30, 'country': "U.K"},
      {'age': 45, 'country': "Korea"}]

In [42]:
df1 = pd.DataFrame(l3, columns = ['name', 'job'])
df2 = pd.DataFrame(l4, columns = ['age', 'country'])

In [43]:
df1

Unnamed: 0,name,job
0,John,dentist
1,Nate,farmer
2,Jack,designer


In [44]:
df2

Unnamed: 0,age,country
0,25,U.S
1,30,U.K
2,45,Korea


In [45]:
result = pd.concat([df1, df2], axis=1, ignore_index=True) # 열로 합치기

In [46]:
result

Unnamed: 0,0,1,2,3
0,John,dentist,25,U.S
1,Nate,farmer,30,U.K
2,Jack,designer,45,Korea


In [47]:
# 리스트로 합치기
label = [1,2,3,4,5]
prediction = [1,2,2,4,4]

In [48]:
comparision = pd.DataFrame({'label': label, 'prediction': prediction})

In [49]:
comparision

Unnamed: 0,label,prediction
0,1,1
1,2,2
2,3,2
3,4,4
4,5,4
