# Pandas 라이브러리를 활용한 데이터 수집 및 저장

# 3. 데이터베이스 데이터 수집 및 저장 (MongoDB)

### 라이브러리 선언하기

In [20]:
!pip install pymongo



In [21]:
# Pandas 패키지 불러오기
import pandas as pd
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi

### 1. 파일 데이터 불러오기

In [22]:
# CSV 파일을 읽어 Data Frame 변수에 저장하기
# selloutLocalUrl = "../dataset/kopo_product_volume.csv"
selloutGitUrl = "https://raw.githubusercontent.com/hyokwan/python-lecture/master/dataset/kopo_product_volume.csv"
selloutData = pd.read_csv(selloutGitUrl)

selloutData.head()

Unnamed: 0,REGIONID,PRODUCTGROUP,YEARWEEK,VOLUME
0,A01,ST0001,201415,810144
1,A01,ST0002,201415,128999
2,A01,ST0001,201418,671464
3,A01,ST0002,201418,134467
4,A01,ST0001,201413,470040


In [23]:
type(selloutData)

### 2. MongoDB에 데이터프레임 저장

In [34]:
# MongoDB 연결 정보 설정
# user = "hkcode"
# password = "1234"
# host = '127.0.0.1'
# port = 27017
# database = 'hdb'

# MongoClient 로컬 접속
# uri = f"mongodb://{user}:{password}@{host}:{port}/{database}?authSource=admin"
# client = MongoClient(uri)

# uri = "클라우드 접속주소"
# Mongo클라우드 접속
# Create a new client and connect to the server
client = MongoClient(uri, server_api=ServerApi('1'))

try:
    # 연결 확인 (서버 정보 가져오기)
    server_info = client.server_info()
    print("Connected to MongoDB server:", server_info)
except ConnectionFailure as e:
    print("Could not connect to MongoDB server:", e)

Connected to MongoDB server: {'version': '7.0.12', 'gitVersion': 'b6513ce0781db6818e24619e8a461eae90bc94fc', 'modules': ['enterprise'], 'allocator': 'tcmalloc', 'javascriptEngine': 'mozjs', 'sysInfo': 'deprecated', 'versionArray': [7, 0, 12, 0], 'bits': 64, 'debug': False, 'maxBsonObjectSize': 16777216, 'storageEngines': ['devnull', 'inMemory', 'queryable_wt', 'wiredTiger'], 'ok': 1.0, '$clusterTime': {'clusterTime': Timestamp(1723537542, 5), 'signature': {'hash': b'\xedJs\xfc\x83\xcd\xaa\x19\xd0;\xcf\xfe\xa6\xd98\xb9X=\xb44', 'keyId': 7351290462197514241}}, 'operationTime': Timestamp(1723537542, 5)}


### 3. MongoDB 데이터프레임 저장

In [None]:
database = "hkcodedb2"

In [25]:
collection_name = "kopo_product_collect"

# 데이터베이스와 컬렉션 객체 생성
db = client[database]
collection = db[collection_name]


In [26]:
# 데이터프레임을 MongoDB에 저장
records = selloutData.to_dict(orient='records')
records

[{'REGIONID': 'A01',
  'PRODUCTGROUP': 'ST0001',
  'YEARWEEK': 201415,
  'VOLUME': 810144},
 {'REGIONID': 'A01',
  'PRODUCTGROUP': 'ST0002',
  'YEARWEEK': 201415,
  'VOLUME': 128999},
 {'REGIONID': 'A01',
  'PRODUCTGROUP': 'ST0001',
  'YEARWEEK': 201418,
  'VOLUME': 671464},
 {'REGIONID': 'A01',
  'PRODUCTGROUP': 'ST0002',
  'YEARWEEK': 201418,
  'VOLUME': 134467},
 {'REGIONID': 'A01',
  'PRODUCTGROUP': 'ST0001',
  'YEARWEEK': 201413,
  'VOLUME': 470040},
 {'REGIONID': 'A01',
  'PRODUCTGROUP': 'ST0002',
  'YEARWEEK': 201413,
  'VOLUME': 140297},
 {'REGIONID': 'A01',
  'PRODUCTGROUP': 'ST0001',
  'YEARWEEK': 201411,
  'VOLUME': 524787},
 {'REGIONID': 'A01',
  'PRODUCTGROUP': 'ST0002',
  'YEARWEEK': 201411,
  'VOLUME': 145509},
 {'REGIONID': 'A01',
  'PRODUCTGROUP': 'ST0001',
  'YEARWEEK': 201416,
  'VOLUME': 764927},
 {'REGIONID': 'A01',
  'PRODUCTGROUP': 'ST0002',
  'YEARWEEK': 201416,
  'VOLUME': 140313},
 {'REGIONID': 'A01',
  'PRODUCTGROUP': 'ST0001',
  'YEARWEEK': 201419,
  'VOLUME

In [27]:
collection.insert_many(records)
print("Data inserted successfully into MongoDB collection:", collection_name)

Data inserted successfully into MongoDB collection: kopo_product_collect


### 4. 데이터 조회

In [29]:
db = client[database]
collection = db[collection_name]

# 컬렉션에서 데이터 조회
inDf = pd.DataFrame( collection.find() )
inDf

Unnamed: 0,_id,REGIONID,PRODUCTGROUP,YEARWEEK,VOLUME
0,66bb03a18b9d758c0c2bf53c,A01,ST0001,201415,810144
1,66bb03a18b9d758c0c2bf53d,A01,ST0002,201415,128999
2,66bb03a18b9d758c0c2bf53e,A01,ST0001,201418,671464
3,66bb03a18b9d758c0c2bf53f,A01,ST0002,201418,134467
4,66bb03a18b9d758c0c2bf540,A01,ST0001,201413,470040
...,...,...,...,...,...
525,66bb071f8b9d758c0c2bf74c,A01,ST0002,201614,148835
526,66bb071f8b9d758c0c2bf74d,A01,ST0001,201641,746061
527,66bb071f8b9d758c0c2bf74e,A01,ST0002,201412,151750
528,66bb071f8b9d758c0c2bf74f,A01,ST0001,201420,645626


In [32]:
db = client[database]
collection = db[collection_name]

# 컬렉션에서 데이터 조회
cursor = collection.find()


# 조회된 데이터를 데이터프레임으로 변환
selloutDf = pd.DataFrame(list(cursor))
selloutDf.shape

(530, 5)