Skip to content

Commit

Permalink
[#23][#26] ParsingService: 사이트별 날짜 파싱 메소드 수정
Browse files Browse the repository at this point in the history
1.CrawlingRepository: findDateTimeByLabel 메소드 추가
2.ParsingService: @requiredargsconstructor 추가
3.CrawlingService: @async -> @transactional 수정
  • Loading branch information
chaiminwoo0223 committed Jun 9, 2024
1 parent 95e1b37 commit 84f2d36
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import skhu.jijijig.domain.Crawling;

import java.util.List;
import java.util.Optional;

public interface CrawlingRepositoryCustom {
Page<Crawling> searchAllByKeyword(String keyword, Pageable pageable);
Expand All @@ -14,4 +15,6 @@ public interface CrawlingRepositoryCustom {
Page<Crawling> findAllSortedByDateTime(Pageable pageable);

Page<Crawling> findAllSortedByDateTimeByLabel(String label, Pageable pageable);

Optional<String> findDateTimeByLabel(String label);
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import skhu.jijijig.domain.QCrawling;

import java.util.List;
import java.util.Optional;

@Repository
@RequiredArgsConstructor
Expand Down Expand Up @@ -83,6 +84,18 @@ public Page<Crawling> findAllSortedByDateTimeByLabel(String label, Pageable page
return PageableExecutionUtils.getPage(result, pageable, countQuery::fetchOne);
}

@Override
public Optional<String> findDateTimeByLabel(String label) {
QCrawling crawling = QCrawling.crawling;

String dateTime = queryFactory.select(crawling.dateTime)
.from(crawling)
.where(crawling.label.eq(label))
.fetchFirst();

return Optional.ofNullable(dateTime);
}

private BooleanExpression keywordContainsInLabelOrTitle(String keyword) {
return keyword != null ? QCrawling.crawling.label.containsIgnoreCase(keyword)
.or(QCrawling.crawling.title.containsIgnoreCase(keyword)) : null;
Expand Down
3 changes: 1 addition & 2 deletions src/main/java/skhu/jijijig/service/CrawlingService.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import org.springframework.context.ApplicationContext;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.Pageable;
import org.springframework.scheduling.annotation.Async;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
Expand Down Expand Up @@ -46,7 +45,7 @@ public void scheduleCrawlingTasks() {
crawlingService.performCrawling("https://quasarzone.com/bbs/qb_saleinfo", "퀘사이존", "div.market-info-list");
}

@Async
@Transactional
public void performCrawling(String url, String label, String rowsCssSelector) {
System.out.println(label);
crawlWebsite(url, label, rowsCssSelector);
Expand Down
37 changes: 21 additions & 16 deletions src/main/java/skhu/jijijig/service/ParsingService.java
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package skhu.jijijig.service;

import lombok.RequiredArgsConstructor;
import org.openqa.selenium.By;
import org.openqa.selenium.WebElement;
import org.springframework.stereotype.Service;
import skhu.jijijig.repository.crawling.CrawlingRepository;

import java.time.LocalDate;
import java.time.LocalDateTime;
Expand All @@ -12,7 +14,10 @@
import java.util.regex.Pattern;

@Service
@RequiredArgsConstructor
public class ParsingService {
private final CrawlingRepository crawlingRepository;

public boolean parseOpen(WebElement row, String OPEN) {
return row.findElements(By.cssSelector(OPEN)).isEmpty();
}
Expand Down Expand Up @@ -52,15 +57,15 @@ public String parseDateTime(WebElement row, String label, String DATETIME) {
DateTimeFormatter dateFormatter = DateTimeFormatter.ofPattern("uuuu-MM-dd");
DateTimeFormatter timeFormatter = DateTimeFormatter.ofPattern("uuuu-MM-dd HH:mm:ss");
if (label.startsWith("뽐뿌")) {
return parsePpomppuDateTime(dateTime, today, dateFormatter);
return parsePpomppuDateTime(label, dateTime, today, dateFormatter);
} else if (label.startsWith("루리웹")) {
return parseRuliwebDateTime(dateTime, today, dateFormatter);
return parseRuliwebDateTime(label, dateTime, today, dateFormatter);
} else if (label.startsWith("어미새")) {
return parseEomisaeDateTime(dateTime, today, now, dateFormatter, timeFormatter);
return parseEomisaeDateTime(label, dateTime, today, now, dateFormatter, timeFormatter);
} else if (label.startsWith("쿨엔조이")) {
return parseCoolenjoyDateTime(dateTime, today, dateFormatter);
return parseCoolenjoyDateTime(label, dateTime, today, dateFormatter);
} else if (label.startsWith("퀘사이존")) {
return parseQuasarzoneDateTime(dateTime, today, dateFormatter);
return parseQuasarzoneDateTime(label, dateTime, today, dateFormatter);
}
return today.format(timeFormatter);
}
Expand Down Expand Up @@ -160,42 +165,42 @@ private int parseInteger(String text) {
}
}

private String parsePpomppuDateTime(String dateTime, LocalDate today, DateTimeFormatter dateFormatter) {
private String parsePpomppuDateTime(String label, String dateTime, LocalDate today, DateTimeFormatter dateFormatter) {
if (dateTime.contains(":")) {
return today.format(dateFormatter) + " " + dateTime;
} else if (dateTime.contains("/")) {
String[] parts = dateTime.split("/");
LocalDate date = LocalDate.of(2000 + Integer.parseInt(parts[0]), Integer.parseInt(parts[1]), Integer.parseInt(parts[2]));
return date.format(dateFormatter) + " 00:00:00";
return crawlingRepository.findDateTimeByLabel(label).orElse(date.format(dateFormatter) + " 00:00:00");
}
return today.format(dateFormatter) + " 00:00:00";
}

private String parseRuliwebDateTime(String dateTime, LocalDate today, DateTimeFormatter dateFormatter) {
private String parseRuliwebDateTime(String label, String dateTime, LocalDate today, DateTimeFormatter dateFormatter) {
if (dateTime.contains(":")) {
return today.format(dateFormatter) + " " + dateTime + ":00";
} else if (dateTime.contains(".")) {
String[] parts = dateTime.split("\\.");
LocalDate date = LocalDate.of(Integer.parseInt(parts[0]), Integer.parseInt(parts[1]), Integer.parseInt(parts[2]));
return date.format(dateFormatter) + " 00:00:00";
return crawlingRepository.findDateTimeByLabel(label).orElse(date.format(dateFormatter) + " 00:00:00");
}
return today.format(dateFormatter) + " 00:00:00";
}

private String parseEomisaeDateTime(String dateTime, LocalDate today, LocalDateTime now, DateTimeFormatter dateFormatter, DateTimeFormatter timeFormatter) {
private String parseEomisaeDateTime(String label, String dateTime, LocalDate today, LocalDateTime now, DateTimeFormatter dateFormatter, DateTimeFormatter timeFormatter) {
if (dateTime.contains(".")) {
String[] parts = dateTime.split("\\.");
LocalDate date = LocalDate.of(2000 + Integer.parseInt(parts[0]), Integer.parseInt(parts[1]), Integer.parseInt(parts[2]));
if (date.isEqual(today)) {
return now.format(timeFormatter);
} else {
return date.format(dateFormatter) + " 00:00:00";
return crawlingRepository.findDateTimeByLabel(label).orElse(date.format(dateFormatter) + " 00:00:00");
}
}
return today.format(dateFormatter) + " 00:00:00";
}

private String parseCoolenjoyDateTime(String dateTime, LocalDate today, DateTimeFormatter dateFormatter) {
private String parseCoolenjoyDateTime(String label, String dateTime, LocalDate today, DateTimeFormatter dateFormatter) {
dateTime = dateTime.replaceAll("등록일\\s+", "");
if (dateTime.contains(":")) {
return today.format(dateFormatter) + " " + dateTime + ":00";
Expand All @@ -211,12 +216,12 @@ private String parseCoolenjoyDateTime(String dateTime, LocalDate today, DateTime
day = Integer.parseInt(parts[2]);
}
LocalDate date = LocalDate.of(year, month, day);
return date.format(dateFormatter) + " 00:00:00";
return crawlingRepository.findDateTimeByLabel(label).orElse(date.format(dateFormatter) + " 00:00:00");
}
return today.format(dateFormatter) + " 00:00:00";
}

private String parseQuasarzoneDateTime(String dateTime, LocalDate today, DateTimeFormatter dateFormatter) {
private String parseQuasarzoneDateTime(String label, String dateTime, LocalDate today, DateTimeFormatter dateFormatter) {
if (dateTime.contains(":")) {
return today.format(dateFormatter) + " " + dateTime + ":00";
}
Expand All @@ -226,10 +231,10 @@ private String parseQuasarzoneDateTime(String dateTime, LocalDate today, DateTim
int month = Integer.parseInt(parts[0]);
int day = Integer.parseInt(parts[1]);
if (month == 12 && today.getMonthValue() == 1) {
year -= 1; // 전년도
year -= 1;
}
LocalDate date = LocalDate.of(year, month, day);
return date.format(dateFormatter) + " 00:00:00";
return crawlingRepository.findDateTimeByLabel(label).orElse(date.format(dateFormatter) + " 00:00:00");
}
return today.format(dateFormatter) + " 00:00:00";
}
Expand Down

0 comments on commit 84f2d36

Please sign in to comment.