In [29]:
import warnings

# openpyxl 관련 경고 무시
warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl")


In [37]:
import requests
import io, time
import pandas as pd
from google.cloud import bigquery
from datetime import datetime, timedelta

# API 호출 함수 정의
def call_api(dclPrductSeCd, start_date, end_date):
    base_url = f"https://impfood.mfds.go.kr/CFCCC01F01/getExcelFile?totalCnt=3215&page=1&limit=10&dclPrductSeCd={dclPrductSeCd}&prductNm=&rpsntItmNm=&bsshNm=&srchNtncd=&ovsmnfstNm=&returnChk=&sameSearch=&srchStrtDt={start_date}&srchEndDt={end_date}&expirdeBeginDtm=&expirdeEndDtm=&srchHistNo=&rpsntItmCd=&oemFoodYn="
    response = requests.get(base_url)
    response.raise_for_status()
    if response.status_code == 200:
        xlsx_file = io.BytesIO(response.content)

        # Pandas를 사용하여 XLSX 파일 읽기
        df = pd.read_excel(xlsx_file)
        return df
    else:
        raise Exception(f"[{response.status_code}] {response.text}")


def modify_df(df):
    df.rename(columns={
        'NO': 'number',
        '구분': 'category',
        '수입업체': 'importer',
        '제품명(한글)': 'product_name_kr',
        '제품명(영문)': 'product_name_en',
        '품목(유형)': 'item_type',
        '해외제조업소': 'manufacturer_foreign',
        '처리일자': 'processing_date',
        '소비기한': 'expiration_date',
        '제조국': 'country_of_manufacture',
        '수출국': 'country_of_export',
        '냉동전환번호': 'frozen_conversion_number',
        '이력추적번호': 'traceability_number',
        "원재료": "raw_material"
    }, inplace=True)

    # 변경된 컬럼명으로 데이터프레임의 처음 몇 줄 확인
    df.drop(columns=['number'], inplace=True)
    df_converted = df.astype(str).replace('nan', pd.NA)
    df_converted['processing_date'] = pd.to_datetime(df_converted['processing_date'], errors='coerce').dt.date

    return df_converted


# BigQuery 클라이언트 초기화
client = bigquery.Client()
def upload_bigquery(df):
    # BigQuery 테이블에 데이터 집계
    job_config = bigquery.LoadJobConfig()
    job_config.write_disposition = bigquery.WriteDisposition.WRITE_APPEND

    job = client.load_table_from_dataframe(
        df, "grainscanner.external_ingestion.raw_impfood_imported_foods_info", job_config=job_config
    )
    # 작업 완료 대기
    job.result()


# 시작 연도 및 월별로 반복
start_year = 2020
current_year = datetime.now().year
current_month = datetime.now().month

fail_list = []
for year in range(start_year, current_year + 1):
    for month in range(1, 13):
        # 현재 년도 이후 월은 처리하지 않음
        if year == current_year and month > current_month:
            break
        # 월별 시작일과 종료일 계산
        start_date = datetime(year, month, 1).strftime('%Y-%m-%d')
        last_day = (datetime(year, month + 1, 1) - timedelta(days=1)).day if month < 12 else 31
        end_date = datetime(year, month, last_day).strftime('%Y-%m-%d')
        
        # dclPrductSeCd 값에 대해 반복
        for dclPrductSeCd in [1, 4, 5]:
            df = None
            try:
                time.sleep(0.3)
                df = call_api(dclPrductSeCd, start_date, end_date)
            except Exception as e:
                fail_list.append({
                    "start_date": start_date,
                    "end_date": end_date,
                    "product_code": dclPrductSeCd
                })
                print(f"** fail - {start_date} ~ {end_date} : {dclPrductSeCd}")
                continue
        
            if df is not None and len(df) > 0:
                modified_df = modify_df(df)
                upload_bigquery(modified_df)
            else:
                print(f"** empty - {start_date} ~ {end_date} : {dclPrductSeCd}")


** fail - 2020-03-01 ~ 2020-03-31 : 1


In [32]:
import requests
import io
import pandas as pd
from google.cloud import bigquery

# API 호출하여 XLSX 파일 다운로드
api_url = "https://impfood.mfds.go.kr/CFCCC01F01/getExcelFile?totalCnt=0&page=1&limit=10&dclPrductSeCd=4&prductNm=&rpsntItmNm=&bsshNm=&srchNtncd=&ovsmnfstNm=&returnChk=&sameSearch=&srchStrtDt=2018-01-01&srchEndDt=2018-01-31&expirdeBeginDtm=&expirdeEndDtm=&srchHistNo=&rpsntItmCd=&oemFoodYn="
api_url = "https://impfood.mfds.go.kr/CFCCC01F01/getExcelFile?totalCnt=0&page=1&limit=10&dclPrductSeCd=4&prductNm=&rpsntItmNm=&bsshNm=&srchNtncd=&ovsmnfstNm=&returnChk=&sameSearch=&srchStrtDt=2018-01-01&srchEndDt=2018-01-31&expirdeBeginDtm=&expirdeEndDtm=&srchHistNo=&rpsntItmCd=&oemFoodYn="
response = requests.get(api_url)
xlsx_file = io.BytesIO(response.content)

# Pandas를 사용하여 XLSX 파일 읽기
df = pd.read_excel(xlsx_file)
df

Unnamed: 0,NO,구분,수입업체,제품명(한글),제품명(영문),품목(유형),해외제조업소,처리일자,소비기한,제조국,수출국,냉동전환번호,이력추적번호,원재료
0,1,가공식품,(주)비티알커머스,드리 폰타이넌 오드 괴즈,OUDE GEUZE (5.3%),맥주,3 FONTEINEN,2018-01-30,2016-12-05 ~ 2036-10-26,벨기에,벨기에,,,"정제수,맥아,밀,호프"
1,2,가공식품,주식회사 쏠트코리아,암염,ROCK SALT,천일염,UBAIDIA MINERALS,2018-01-30,2017-12-10 ~ 2027-12-09,파키스탄,파키스탄,,,암염
2,3,가공식품,(주)비티알커머스,드리 폰타이넌 오드 크릭,OUDE KRIEK (5.6%),기타주류,BROUWERIJ DRIE(3) FONTEINEN,2018-01-30,2017-04-20 ~ 2037-10-26,벨기에,벨기에,,,"정제수,버찌,맥아,밀,호프"
3,4,가공식품,(주)비티알커머스,드리 폰타이넌 오마쥬,HOMMAGE (4.5%),기타주류,3 FONTEINEN,2018-01-30,2016-12-20 ~ 2036-10-26,벨기에,벨기에,,,"정제수,산딸기,맥아,밀,버찌,호프"
4,5,가공식품,송다,우림고수보이차/120g(60gX2ea),YULIN PUER TEA,침출차,"MENGHAI YU LIN TEA, CO., LTD.",2018-01-30,2016-01-16 ~ 2046-01-15,중국,중국,,,차나무잎
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89,90,가공식품,케이앤씨원,리파인트쏠트,REFINED SALT,정제소금,"JIANGSU SUYAN JINGSHEN CO.,LTD",2018-01-03,2017-11-11 ~ 2027-11-10,중국,중국,,,"식염,페로시안화칼륨"
90,91,가공식품,쏠트플러스(주),리파인드 솔트/1000KG,REFINED SALT,정제소금,"JIANGSU SUYAN JINGSHEN CO.,LTD",2018-01-03,2017-11-11 ~ 2027-11-10,중국,중국,,,"식염,페로시안화칼륨"
91,92,가공식품,헬씨티,보이차,PU ER TEA,침출차,"MENGHAI YEZHUAG SHUAGLI (TAO RAN TANG TEA,YUNNAN)",2018-01-03,2017-09-18 ~ 2027-09-17,중국,중국,,,보이차
92,93,가공식품,쏠트플러스(주),리파인드 솔트/1000KG,REFINED SALT,정제소금,"JIANGSU SUYAN JINGSHEN CO.,LTD",2018-01-02,2017-11-11 ~ 2027-11-10,중국,중국,,,"식염,페로시안화칼륨"


In [None]:
# 데이터프레임 컬럼명 변경
df.rename(columns={
    'NO': 'number',
    '구분': 'category',
    '수입업체': 'importer',
    '제품명(한글)': 'product_name_kr',
    '제품명(영문)': 'product_name_en',
    '품목(유형)': 'item_type',
    '해외제조업소': 'manufacturer_foreign',
    '처리일자': 'processing_date',
    '소비기한': 'expiration_date',
    '제조국': 'country_of_manufacture',
    '수출국': 'country_of_export',
    '냉동전환번호': 'frozen_conversion_number',
    '이력추적번호': 'traceability_number'
}, inplace=True)

# 변경된 컬럼명으로 데이터프레임의 처음 몇 줄 확인

df.drop(columns=['number'], inplace=True)



In [6]:
df_renamed.drop(columns=['number'], inplace=True)
df_renamed.head()

Unnamed: 0,category,importer,product_name_kr,product_name_en,item_type,manufacturer_foreign,processing_date,expiration_date,country_of_manufacture,country_of_export,frozen_conversion_number,traceability_number
0,농.임산물,주식회사 에이통상,망고,FRESH MANGO,"망고(열매,신선)","LANNA HARVEST CO.,LTD",2024-03-03,-,태국,태국,,
1,농.임산물,주식회사 에이통상,망고,FRESH MANGO,"망고(열매,신선)",FOOD NAMU LIMITED PARTNERSHIP,2024-03-03,-,태국,태국,,
2,농.임산물,주식회사 에이통상,망고,FRESH MANGO,"망고(열매,신선)","LANNA HARVEST CO.,LTD",2024-03-03,-,태국,태국,,
3,농.임산물,주식회사 에이통상,망고,FRESH MANGO,"망고(열매,신선)",FOOD NAMU LIMITED PARTNERSHIP,2024-03-03,-,태국,태국,,
4,농.임산물,주식회사 에이통상,망고,FRESH MANGO,"망고(열매,신선)",FOOD NAMU LIMITED PARTNERSHIP,2024-03-03,-,태국,태국,,


In [17]:
df_converted = df_renamed.astype(str).replace('nan', pd.NA)
#df_converted = df_converted.drop(columns=['processing_date'])

df_converted['processing_date'] = pd.to_datetime(df_converted['processing_date'], errors='coerce').dt.date

df_converted

Unnamed: 0,category,importer,product_name_kr,product_name_en,item_type,manufacturer_foreign,processing_date,expiration_date,country_of_manufacture,country_of_export,frozen_conversion_number,traceability_number
0,농.임산물,주식회사 에이통상,망고,FRESH MANGO,"망고(열매,신선)","LANNA HARVEST CO.,LTD",2024-03-03,-,태국,태국,,
1,농.임산물,주식회사 에이통상,망고,FRESH MANGO,"망고(열매,신선)",FOOD NAMU LIMITED PARTNERSHIP,2024-03-03,-,태국,태국,,
2,농.임산물,주식회사 에이통상,망고,FRESH MANGO,"망고(열매,신선)","LANNA HARVEST CO.,LTD",2024-03-03,-,태국,태국,,
3,농.임산물,주식회사 에이통상,망고,FRESH MANGO,"망고(열매,신선)",FOOD NAMU LIMITED PARTNERSHIP,2024-03-03,-,태국,태국,,
4,농.임산물,주식회사 에이통상,망고,FRESH MANGO,"망고(열매,신선)",FOOD NAMU LIMITED PARTNERSHIP,2024-03-03,-,태국,태국,,
...,...,...,...,...,...,...,...,...,...,...,...,...
920,농.임산물,경원무역,깐도라지,FRESH ROOT OF BALLOON FLOWER PEELING,"도라지/길경(뿌리,신선)","GALAXY INT'L TRADE CO.,LTD",2024-02-27,2024-02-23 ~,중국,중국,,
921,농.임산물,(주)진원무역,바나나,BANANA,"바나나(열매,신선)",HUNG SON HIGH TECHNOLOGY AGRICULTURE JOINT STO...,2024-02-27,-,베트남,베트남,,
922,농.임산물,우화,신선양배추(15KG),FRESH CABBAGE,"양배추(잎,신선)","ANQIU BODONG INTERNATIONAL TRADE.CO.,LTD",2024-02-27,-,중국,중국,,
923,농.임산물,(주)진원무역,포도,FRESH GRAPES,"포도(열매,신선)",AGRICOLA DON RICARDO SAC,2024-02-27,-,페루,페루,,


In [18]:

# BigQuery 클라이언트 초기화
client = bigquery.Client()

# BigQuery 테이블에 데이터 집계
job_config = bigquery.LoadJobConfig()
job_config.write_disposition = bigquery.WriteDisposition.WRITE_APPEND

job = client.load_table_from_dataframe(
    df_converted, "grainscanner.external_ingestion.raw_impfood_imported_foods_info", job_config=job_config
)

# 작업 완료 대기
job.result()

print("데이터가 BigQuery 테이블에 성공적으로 적재되었습니다.")

데이터가 BigQuery 테이블에 성공적으로 적재되었습니다.
