Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce code smells for the framework quality improvement #1006

Open
wants to merge 15 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ public Json getJson() {
* @deprecated since 0.4.0
* The html is parse just when first time of calling {@link #getHtml()}, so use {@link #setRawText(String)} instead.
*/
@Deprecated
public void setHtml(Html html) {
this.html = html;
}
Expand Down
17 changes: 9 additions & 8 deletions webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,11 @@ public class Spider implements Runnable, Task {

protected boolean exitWhenComplete = true;

protected final static int STAT_INIT = 0;
protected static final int STAT_INIT = 0;

protected final static int STAT_RUNNING = 1;
protected static final int STAT_RUNNING = 1;

protected final static int STAT_STOPPED = 2;
protected static final int STAT_STOPPED = 2;

protected boolean spawnUrl = true;

Expand Down Expand Up @@ -171,6 +171,7 @@ public Spider setUUID(String uuid) {
* set scheduler for Spider
*
* @param scheduler scheduler
* @deprecated since 0.4.0
* @return this
* @see #setScheduler(us.codecraft.webmagic.scheduler.Scheduler)
*/
Expand Down Expand Up @@ -543,9 +544,9 @@ protected CollectorPipeline getCollectorPipeline() {

public <T> T get(String url) {
List<String> urls = WMCollections.newArrayList(url);
List<T> resultItemses = getAll(urls);
if (resultItemses != null && resultItemses.size() > 0) {
return resultItemses.get(0);
List<T> singleResultItems = getAll(urls);
if (singleResultItems != null && !singleResultItems.isEmpty()) {
return singleResultItems.get(0);
} else {
return null;
}
Expand Down Expand Up @@ -677,7 +678,7 @@ public Status getStatus() {


public enum Status {
Init(0), Running(1), Stopped(2);
INIT(0), RUNNING(1), STOPPED(2);

private Status(int value) {
this.value = value;
Expand All @@ -696,7 +697,7 @@ public static Status fromValue(int value) {
}
}
//default value
return Init;
return INIT;
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,7 @@ private SSLConnectionSocketFactory buildSSLConnectionSocketFactory() {
return new SSLConnectionSocketFactory(sslContext, supportedProtocols,
null,
new DefaultHostnameVerifier()); // 优先绕过安全证书
} catch (KeyManagementException e) {
logger.error("ssl connection fail", e);
} catch (NoSuchAlgorithmException e) {
} catch (KeyManagementException | NoSuchAlgorithmException e) {
logger.error("ssl connection fail", e);
}
return SSLConnectionSocketFactory.getSocketFactory();
Expand All @@ -91,7 +89,7 @@ public void checkServerTrusted(X509Certificate[] chain, String authType) throws

@Override
public X509Certificate[] getAcceptedIssuers() {
return null;
return new X509Certificate[0];
}

};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@
public class HttpRequestBody implements Serializable {

private static final long serialVersionUID = 5659170945717023595L;

private static final String ENCODING_ERROR ="illegal encoding ";

public static abstract class ContentType {
public abstract static class ContentType {

public static final String JSON = "application/json";

Expand Down Expand Up @@ -68,15 +70,15 @@ public static HttpRequestBody json(String json, String encoding) {
try {
return new HttpRequestBody(json.getBytes(encoding), ContentType.JSON, encoding);
} catch (UnsupportedEncodingException e) {
throw new IllegalArgumentException("illegal encoding " + encoding, e);
throw new IllegalArgumentException(ENCODING_ERROR+ encoding, e);
}
}

public static HttpRequestBody xml(String xml, String encoding) {
try {
return new HttpRequestBody(xml.getBytes(encoding), ContentType.XML, encoding);
} catch (UnsupportedEncodingException e) {
throw new IllegalArgumentException("illegal encoding " + encoding, e);
throw new IllegalArgumentException(ENCODING_ERROR + encoding, e);
}
}

Expand All @@ -92,7 +94,7 @@ public static HttpRequestBody form(Map<String,Object> params, String encoding){
try {
return new HttpRequestBody(URLEncodedUtils.format(nameValuePairs, encoding).getBytes(encoding), ContentType.FORM, encoding);
} catch (UnsupportedEncodingException e) {
throw new IllegalArgumentException("illegal encoding " + encoding, e);
throw new IllegalArgumentException(ENCODING_ERROR + encoding, e);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ public static void main(String[] args) {
list.add(String.format(urlTemplate,"太阳能"));
list.add(String.format(urlTemplate,"地热发电"));
list.add(String.format(urlTemplate,"地热发电"));
List<ResultItems> resultItemses = spider.<ResultItems>getAll(list);
for (ResultItems resultItemse : resultItemses) {
System.out.println(resultItemse.getAll());
List<ResultItems> multiResultItems = spider.<ResultItems>getAll(list);
for (ResultItems singleResultItems : multiResultItems) {
System.out.println(singleResultItems.getAll());
}
spider.close();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import us.codecraft.webmagic.Task;

/**
* Remove duplicate requests.
* Remove duplicate requests.</br>
* @author code4crafer@gmail.com
* @since 0.5.1
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ public Selectable replace(String regex, String replacement) {
}

public String getFirstSourceText() {
if (getSourceTexts() != null && getSourceTexts().size() > 0) {
if (getSourceTexts() != null && !getSourceTexts().isEmpty()) {
return getSourceTexts().get(0);
}
return null;
Expand All @@ -104,6 +104,6 @@ public String toString() {

@Override
public boolean match() {
return getSourceTexts() != null && getSourceTexts().size() > 0;
return getSourceTexts() != null && !getSourceTexts().isEmpty();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public List<String> selectList(String text) {
resultsTemp.addAll(selector.selectList(result));
}
results = resultsTemp;
if (results == null || results.size() == 0) {
if (results.size() == 0) {
return results;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import java.util.List;

/**
* Base selector for html elements.
*
* @author code4crafter@gmail.com
* @since 0.3.0
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ public String select(String text) {
}
if (object instanceof List) {
List list = (List) object;
if (list != null && list.size() > 0) {
if (list.size() > 0) {
return toString(list.iterator().next());
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,22 +51,21 @@ public String select(String html) {
text.setLength(0);

for (int i = 0; i < indexDistribution.size() - 1; i++) {
if (indexDistribution.get(i) > threshold && ! boolstart) {
if (indexDistribution.get(i+1).intValue() != 0
|| indexDistribution.get(i+2).intValue() != 0
|| indexDistribution.get(i+3).intValue() != 0) {
if (indexDistribution.get(i) > threshold && ! boolstart
&& !isAnyIndexDistributionZero(indexDistribution,i+1,i+2,i+3)){
boolstart = true;
start = i;
continue;
}
}
if (boolstart) {
if (indexDistribution.get(i).intValue() == 0
|| indexDistribution.get(i+1).intValue() == 0) {

if (boolstart && isAnyIndexDistributionZero (indexDistribution,i,i+1,0)) {

end = i;
boolend = true;
}

}


StringBuilder tmp = new StringBuilder();
if (boolend) {
//System.out.println(start+1 + "\t\t" + end+1);
Expand All @@ -83,9 +82,25 @@ public String select(String html) {
}
return text.toString();
}



@Override
public List<String> selectList(String text) {
throw new UnsupportedOperationException();
}

private static boolean isAnyIndexDistributionZero( ArrayList <Integer> indexDistribution, int index, int successorIndex, int afterSuccessorIndex) {


if (afterSuccessorIndex != 0) {
return (indexDistribution.get(index).intValue() == 0
&& indexDistribution.get(successorIndex).intValue() == 0
&& indexDistribution.get(afterSuccessorIndex).intValue() == 0 );
}else {
return (indexDistribution.get(index).intValue() == 0
|| indexDistribution.get(successorIndex).intValue() == 0);
}

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
*/
public abstract class HttpConstant {

public static abstract class Method {
public abstract static class Method {

public static final String GET = "GET";

Expand All @@ -25,13 +25,13 @@ public static abstract class Method {

}

public static abstract class StatusCode {
public abstract static class StatusCode {

public static final int CODE_200 = 200;

}

public static abstract class Header {
public abstract static class Header {

public static final String REFERER = "Referer";

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package us.codecraft.webmagic.utils;

/**
* Numbers comparison utilility for schedule priority
*
* @author yihua.huang@dianping.com
*/
public abstract class NumberUtils {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,12 @@ public static boolean validateProxy(Proxy p) {
logger.warn("FAILRE - CAN not connect! remote: " + p);
return false;
} finally {
if (socket != null) {
try {
try {
socket.close();
} catch (IOException e) {
logger.warn("Error occurred while closing socket of validating proxy", e);
}
}

}

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import java.util.Set;

/**
* WebMagic collections builders
*
* @author code4crafter@gmail.com
* Date: 16/12/18
* Time: 上午10:16
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ public class PhantomJSDownloader extends AbstractDownloader {
private static Logger logger = LoggerFactory.getLogger(PhantomJSDownloader.class);
private static String crawlJsPath;
private static String phantomJsCommand = "phantomjs"; // default
private static final String HTTP_REQUEST_ERROR = "HTTP request failed";

private int retryNum;
private int threadNum;
Expand Down Expand Up @@ -91,14 +92,14 @@ public Page download(Request request, Task task) {
logger.info("downloading page: " + request.getUrl());
}
String content = getPage(request);
if (content.contains("HTTP request failed")) {
if (content.contains(HTTP_REQUEST_ERROR)) {
for (int i = 1; i <= getRetryNum(); i++) {
content = getPage(request);
if (!content.contains("HTTP request failed")) {
if (!content.contains(HTTP_REQUEST_ERROR)) {
break;
}
}
if (content.contains("HTTP request failed")) {
if (content.contains(HTTP_REQUEST_ERROR)) {
//when failed
Page page = new Page();
page.setRequest(request);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ public static void main(String[] args) {
list.add(String.format(urlTemplate,"太阳能"));
list.add(String.format(urlTemplate,"地热发电"));
list.add(String.format(urlTemplate,"地热发电"));
List<BaiduBaike> resultItemses = ooSpider.<BaiduBaike>getAll(list);
for (BaiduBaike resultItemse : resultItemses) {
System.out.println(resultItemse);
List<BaiduBaike> multiResultItems = ooSpider.<BaiduBaike>getAll(list);
for (BaiduBaike singleResultItems : multiResultItems) {
System.out.println(singleResultItems);
}
ooSpider.close();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ public abstract class PatternProcessor extends PatternRequestMatcher implements
/**
* @param pattern url pattern to handle
*/
public PatternProcessor(String pattern) {
protected PatternProcessor(String pattern) {
super(pattern);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public abstract class PatternRequestMatcher implements RequestMatcher {
/**
* @param pattern url pattern to handle
*/
public PatternRequestMatcher(String pattern) {
protected PatternRequestMatcher(String pattern) {
this.pattern = pattern;
this.patternCompiled = Pattern.compile(pattern);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,13 @@ public FieldExtractor(Field field, Selector selector, Source source, boolean not
Field getField() {
return field;
}


@Override
Selector getSelector() {
return selector;
}


@Override
Source getSource() {
return source;
}
Expand All @@ -44,6 +46,7 @@ Method getSetterMethod() {
return setterMethod;
}

@Override
boolean isNotNull() {
return notNull;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@ public abstract class MultiKeyMapBase {
@SuppressWarnings("rawtypes")
private Class<? extends Map> protoMapClass = DEFAULT_CLAZZ;

public MultiKeyMapBase() {
protected MultiKeyMapBase() {
}

@SuppressWarnings("rawtypes")
public MultiKeyMapBase(Class<? extends Map> protoMapClass) {
protected MultiKeyMapBase(Class<? extends Map> protoMapClass) {
this.protoMapClass = protoMapClass;
}

Expand Down