In [1]:
import numpy as np
import pandas as pd

## 0. 데이터 준비하기

In [5]:
np.random.seed(0)
data = {
    "Name": np.random.choice(["Alice", "Bob", "Charlie", "David", "Eve", None], size=1000),
    "Age": np.random.randint(20, 60, size=1000),
    "City": np.random.choice(
        ['Seoul', 'New York', 'Paris', 'London', 'Berlin', 'Tokyo'], size=1000
    ),
    "Salary": np.random.randint(50000, 120000, size=1000),
}

df = pd.DataFrame(data)
df.to_csv('sample_data.csv', index=False)

departments = ['HR', 'Marketing', 'Sales', 'IT', 'Finance']
df2 = pd.DataFrame({
    "Name": df['Name'].dropna().unique(),
    "Departments": np.random.choice(departments, size=len(df['Name'].dropna().unique()))
})
df2.to_csv('sample_data2.csv', index=False)

## 1. 불러오기/저장하기

In [6]:
df = pd.read_csv('sample_data.csv')
df

Unnamed: 0,Name,Age,City,Salary
0,Eve,35,Seoul,50889
1,,40,Berlin,110224
2,Alice,29,Tokyo,80141
3,David,24,Paris,59740
4,David,44,Paris,62998
...,...,...,...,...
995,Eve,24,New York,77062
996,,30,Tokyo,76061
997,,26,Tokyo,55645
998,David,28,Seoul,110482


In [13]:
# 1-5. WRITE: Multilines JSON 파일로 저장
df.to_json('new_data_multilines.json', orient='records', lines=True)

new_df = pd.read_json("new_data_multilines.json", lines=True)
new_df

Unnamed: 0,Name,Age,City,Salary
0,Eve,35,Seoul,50889
1,,40,Berlin,110224
2,Alice,29,Tokyo,80141
3,David,24,Paris,59740
4,David,44,Paris,62998
...,...,...,...,...
995,Eve,24,New York,77062
996,,30,Tokyo,76061
997,,26,Tokyo,55645
998,David,28,Seoul,110482


## 2. 데이터 정제

In [9]:
# 이름에 'a'가 포함된 행 필터링
df_with_a = df[df['Name'].str.contains('a', na=False)]
df_with_a

Unnamed: 0,Name,Age,City,Salary
3,David,24,Paris,59740
4,David,44,Paris,62998
5,David,50,Tokyo,104176
7,David,39,New York,117249
9,Charlie,33,Berlin,50075
...,...,...,...,...
985,Charlie,35,Berlin,65870
987,Charlie,35,Paris,116099
989,Charlie,37,New York,116816
994,David,29,Tokyo,82691


## 3. 데이터 변환

In [14]:
# 3-1. 각 이름을 대문자로 변환
df["Name"] = df["Name"].apply(lambda x : x.upper() if pd.notna(x) else x)
df

Unnamed: 0,Name,Age,City,Salary
0,EVE,35,Seoul,50889
1,,40,Berlin,110224
2,ALICE,29,Tokyo,80141
3,DAVID,24,Paris,59740
4,DAVID,44,Paris,62998
...,...,...,...,...
995,EVE,24,New York,77062
996,,30,Tokyo,76061
997,,26,Tokyo,55645
998,DAVID,28,Seoul,110482


In [None]:
# 3-2 