In [None]:
import os
import json

import httpx
import pandas as pd
from dotenv import load_dotenv

load_dotenv()

In [None]:
headers = {
    "Content-Type": "application/json",
    "X-Naver-Client-Id": os.getenv("X_NAVER_CLIENT_ID"),
    "X-Naver-Client-Secret": os.getenv("X_NAVER_CLIENT_SECRET")
}

In [None]:
url = "https://openapi.naver.com/v1/datalab/search"

params = {
    "startDate": "2016-01-01",
    "endDate": "2025-04-28",
    "timeUnit": "date",
    "keywordGroups": [
        {
            "groupName": "효성에프엠에스",
            "keywords": [
                "효성에프엠에스"
            ]
        }
    ]
}


resp = httpx.post(url=url, json=params, headers=headers)

In [None]:
resp.json()

In [None]:
df = pd.DataFrame(data=resp.json()["results"][0]["data"])
df

In [None]:
import io
import boto3

s3 = boto3.client('s3', 
    aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
    aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
    region_name="ap-northeast-2"
)

bucket_name = "datalake-common-s3"
s3_key = "naver/keyword-search/2025-04-30.parquet"

# Convert the DataFrame to a Parquet file in memory
buffer = io.BytesIO()
df.to_parquet(buffer, index=False, compression='snappy')
buffer.seek(0)

# Upload the Parquet file to S3
s3.upload_fileobj(buffer, bucket_name, s3_key)
print(f"File uploaded to s3://{bucket_name}/{s3_key}")

File uploaded to s3://datalake-common-s3/naver/keyword-search/2025-04-30.parquet


In [20]:
import duckdb

# DuckDB 연결 생성
con = duckdb.connect()

query_create_secret = f"""
    CREATE SECRET (
        TYPE s3,
        KEY_ID '{os.getenv("AWS_ACCESS_KEY_ID")}',
        SECRET '{os.getenv("AWS_SECRET_ACCESS_KEY")}',
        REGION 'ap-northeast-2'
    )
"""

con.execute(query_create_secret)


# S3에 저장된 Parquet 파일을 DuckDB로 읽기
query = f"""
    SELECT 
        * 
    FROM 
        read_parquet('s3://{bucket_name}/{s3_key}')
"""

# 쿼리 실행
result = con.execute(query).fetchdf()

# 결과 출력
result

Unnamed: 0,period,ratio
0,2016-01-01,0.38819
1,2016-01-02,0.69875
2,2016-01-03,0.38819
3,2016-01-04,8.92857
4,2016-01-05,7.22049
...,...,...
3356,2025-04-24,16.07142
3357,2025-04-25,31.75465
3358,2025-04-26,7.45341
3359,2025-04-27,3.95962


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style("whitegrid")

df = pd.DataFrame(data=resp.json()["results"][0]["data"])

plt.figure(figsize=(10, 6))
sns.lineplot(data=df, x="period", y="ratio")
plt.xticks(rotation=45)
plt.xticks(range(0, len(df), len(df) // 20))

plt.show()