In [51]:
import pandas as pd
import json
import requests
import time
from tqdm import tqdm

import mariadb
from sqlalchemy import create_engine

def make_binary(x):
    if x!=0:
        return 1
    return 0


In [94]:
def make_jongna_data(query,sortStartDate,sortEndDate):
    print(sortStartDate,sortEndDate)
    url = f'https://search-api.joongna.com/v25/search/product'
    param = {
        "filter":{
            "dateFilterParameter":{
                "sortEndDate":f"{sortEndDate}",
                "sortStartDate":f"{sortStartDate}"
            },
            "flawedYn":0,
            "fullPackageYn":0,
            "limitedEditionYn":0,
    #         "maxPrice":2000000000,
    #         "minPrice":2,
            "tradeType":0
        },
        "productFilter":"ALL",
        "productStates":[
            0,
            1,
            99
        ],
        "searchQuantity":10000,
        "osType":2,
        "searchWord":f'{query}',
        "sort":"RECENT_SORT",
    }
    response= requests.post(url,json=(param))
    data=response.json()['data']
    col=['articleUrl',
    'articleSeq',
    'articleRegDate',
    'price',
    'title',
    'state'] # 0 1 3]
    # 데이터프레임 화 , 중복 제거
    df=pd.DataFrame(data['items']).drop_duplicates('title')[col]
    # nan 제거
    df=df.dropna()
    # is_sold 처리
    df.rename(columns  = {'state': 'is_sold'}, inplace = True)
    df['is_sold'] = df['is_sold'].map(lambda x : make_binary(x))
    # seq int화
    df['articleSeq']=df['articleSeq'].astype('int')
    # 매입 게시물 제거
    buy = df[df['title'].str.contains('삽니|매입|사요', na=False)]
    only_sale=pd.merge(df, buy, how='outer', indicator = True).query('_merge == "left_only"').drop(columns=['_merge'])
    # 가격 25%~75% 사이만 사용
    min_price=only_sale['price'].describe().loc['25%']
    max_price=only_sale['price'].describe().loc['75%']
    q = "(price >= @min_price) and (price <= @max_price)"
    final = only_sale.query(q)
    #is_mint
    mint_list=list(final[final['title'].str.contains("s급|S급|미개봉|민트급|새상품")].index)
    final['is_mint']=0
    final.loc[mint_list,'is_mint']=1
    return final

In [108]:
#2022년 1월~5월 데이터
# query="맥북 m1 프로 16인치"
query="아이폰 13"
final= make_jongna_data(query,"2022-05-14" ,"2022-05-20")
day_list=[(5,13),(5,7),(4,30),(4,23),(4,16),(4,9),(3,31),(3,24),(3,17),(3,10),(2,28),(2,21)
,(2,14),(2,7),(1,31),(1,24),(1,17),(1,10)]
for mon,end_d in (day_list):
    day=end_d-6
    if day<10:
        day=f"0{day}"
    if end_d <10:
        end_d=f"0{end_d}"
    final=pd.concat([final,make_jongna_data(query,f"2022-0{mon}-{day}" ,f"2022-0{mon}-{end_d}")])

2022-05-14 2022-05-20


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final['is_mint']=0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


2022-05-07 2022-05-13


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final['is_mint']=0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


2022-05-01 2022-05-07


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final['is_mint']=0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


2022-04-24 2022-04-30


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final['is_mint']=0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


2022-04-17 2022-04-23


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final['is_mint']=0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


2022-04-10 2022-04-16


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final['is_mint']=0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


2022-04-03 2022-04-09


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final['is_mint']=0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


2022-03-25 2022-03-31


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final['is_mint']=0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


2022-03-18 2022-03-24


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final['is_mint']=0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


2022-03-11 2022-03-17


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final['is_mint']=0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


2022-03-04 2022-03-10


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final['is_mint']=0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


2022-02-22 2022-02-28


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final['is_mint']=0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


2022-02-15 2022-02-21


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final['is_mint']=0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


2022-02-08 2022-02-14


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final['is_mint']=0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


2022-02-01 2022-02-07


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final['is_mint']=0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


2022-01-25 2022-01-31


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final['is_mint']=0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


2022-01-18 2022-01-24


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final['is_mint']=0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


2022-01-11 2022-01-17


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final['is_mint']=0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


2022-01-04 2022-01-10


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final['is_mint']=0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


In [109]:
# 지역, 이미지 처리
detail_list=[]
for seq in tqdm(final[final['is_sold']==0]['articleSeq']):
    t=time.time()
    detail_url=f'https://apis.naver.com/cafe-web/cafe-articleapi/v2/cafes/10050146/articles/{seq}'
    response=requests.get(detail_url)

    if response.status_code!=200:
        continue
    detail=response.json()
    city=detail['result']['articleRegion']['regionName1']
    state=detail['result']['articleRegion']['regionName2']
    img=detail['result']['saleInfo']['image']['url']
    detail_list.append([seq,city,state,img])

detail_df=pd.DataFrame(detail_list,columns=["articleSeq",'city','state','product_image'])

total=pd.merge(final,detail_df, how='outer',on='articleSeq')
total=total.fillna("")
total

100%|██████████████████████████████████████████████████████████████████████████████| 4539/4539 [08:36<00:00,  8.79it/s]


Unnamed: 0,articleUrl,articleSeq,articleRegDate,price,title,sortDate,is_sold,is_mint,city,state,product_image
0,https://m.cafe.naver.com/ArticleRead.nhn?clubi...,918062287,2022-05-20 04:06:43,1120000,[112만원] 아이폰13프로 블루 128GB 풀박스(부천/부천역),2022-05-20 04:06:43,0,0,,,https://cafeptthumb-phinf.pstatic.net/MjAyMjA1...
1,https://m.cafe.naver.com/ArticleRead.nhn?clubi...,918059836,2022-05-20 03:08:16,790000,아이폰13 스타라이트 256용량 배터리100% 상태좋은폰 79만 판매합니다,2022-05-20 03:08:16,0,0,서울특별시,구로구,https://cafeptthumb-phinf.pstatic.net/MjAyMjA1...
2,https://m.cafe.naver.com/ArticleRead.nhn?clubi...,918058590,2022-05-20 02:41:57,1100000,[판매] ﻿아이폰13프로 그래파이트 128GB 외관 S급 공기계 [110만],2022-05-20 02:41:57,0,1,,,https://cafeptthumb-phinf.pstatic.net/MjAyMjA1...
3,https://m.cafe.naver.com/ArticleRead.nhn?clubi...,918058474,2022-05-20 02:39:38,940000,아이폰13 핑크 256기가 배터리100%,2022-05-20 02:39:38,0,0,경기도,군포시,https://cafeptthumb-phinf.pstatic.net/MjAyMjA1...
4,https://m.cafe.naver.com/ArticleRead.nhn?clubi...,918057665,2022-05-20 02:24:00,940000,아이폰13 SSS급 256기가 배터리100% 94만,2022-05-20 02:24:00,0,1,,,https://cafeptthumb-phinf.pstatic.net/MjAyMjA1...
...,...,...,...,...,...,...,...,...,...,...,...
7993,https://m.cafe.naver.com/ArticleRead.nhn?clubi...,889824822,2022-01-04 08:15:33,950000,아이폰 13 미니 256gb 스타라이트,2022-01-04 08:15:33,1,0,,,
7994,https://m.cafe.naver.com/ArticleRead.nhn?clubi...,889798768,2022-01-04 01:17:52,950000,아이폰13프로 씨에라블루 128기가 최저가판매,2022-01-04 01:17:52,1,0,,,
7995,https://m.cafe.naver.com/ArticleRead.nhn?clubi...,889793550,2022-01-04 00:29:30,750000,"s21바이올렛 ss급으로 z플립3, 아이폰13,프로,맥스추금교환",2022-01-04 00:29:30,1,1,,,
7996,https://m.cafe.naver.com/ArticleRead.nhn?clubi...,889791793,2022-01-04 00:16:57,900000,아이폰13미니 256gb 미드나이트 일주일 사용,2022-01-04 00:16:57,1,0,,,


In [110]:
# 스프링서버로부터 post의 크기 받아왔다고 가정
size=804
item_id=2
# post size에 기반한 id열 생성
total['id']=range(size+2,size+2+len(total))

#post 데이터 만들기

# item_id ,location_id , market_price_id, platform_id 바꾸는 로직 추가 필요
post_columns=['id','articleSeq','articleUrl','is_sold','is_mint','sortDate','title','product_image','city','state'] #platform추가
post=total[post_columns].copy()
post['platform']='Jongo Nara'
post.rename(columns  = {
    'id' : 'post_id',
    'articleSeq' : 'pid',
    'articleUrl':'url',
    'sortDate':'upload_date'
}, inplace = True)
post['item_id']=item_id
# post['location_id']=15 # 지역 데이터 앞에 꺼만 뺴오기 수원시 권선구 -> 수원시
post['platform_id']=11 #중고나라가 11이었음


post.drop(['city','state','platform'],axis=1,inplace=True)



price_columns=['id','price']
price=total[price_columns].copy()
price.rename(columns={
    'id':"market_price_id"
},inplace=True)
price['post_id']=price['market_price_id']
price['item_id']=item_id


In [111]:
user='developer', 
password='1234', 
database='dev', 
host='3.36.254.182',
port=13306

conn  = mariadb.connect(
    user='developer', 
    password='1234', 
    database='dev', 
    host='3.36.254.182',
    port=13306
)

# 위 커넥션 정보와 동일하게 입력
engine = create_engine(f"mysql+pymysql://developer:1234@3.36.254.182:13306/dev")
data=(item_id,None,query)

sql = """
INSERT INTO item (
    item_id,
    latest_search_time,
    name
) VALUES (
    ?, ?, ?
)
"""
cs = conn.cursor()
cs.execute(sql,data)
conn.commit()

In [112]:
t=time.time()
post.to_sql('post',engine,if_exists='append',index=False,method='multi')
price.to_sql('market_price',engine,if_exists='append',index=False,method='multi')
print(time.time()-t)

1.9360778331756592


0.26453208923339844
