Skip to content

Commit

Permalink
Fixed a bug and added changes regarding updatng cookie store in Simpl…
Browse files Browse the repository at this point in the history
…eHttpFetcher
  • Loading branch information
anudeepti2004 committed Aug 21, 2017
1 parent c59d95d commit 434f25e
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 22 deletions.
20 changes: 18 additions & 2 deletions src/main/java/focusedCrawler/crawler/async/AsyncCrawler.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import focusedCrawler.config.Configuration;
import focusedCrawler.crawler.async.HttpDownloader.Callback;
import focusedCrawler.crawler.async.cookieHandler.Cookie;
import focusedCrawler.crawler.async.cookieHandler.CookieUtils;
import focusedCrawler.crawler.async.cookieHandler.OkHttpCookieJar;
import focusedCrawler.crawler.async.fetcher.OkHttpFetcher;
import focusedCrawler.crawler.crawlercommons.fetcher.BaseFetcher;
Expand Down Expand Up @@ -167,16 +168,31 @@ public void addSeeds(List<String> seeds) {
}
}

/**
* Add cookies to the right fetcher.
* @param cookies
*/
public void addCookies(HashMap<String, List<Cookie>> cookies) {
if(cookies == null) {
throw new NullPointerException("Cookies argument is null");
}
BaseFetcher baseFecther = downloader.getFetcher();
if(baseFecther instanceof SimpleHttpFetcher) {

HashMap<String, List<org.apache.http.cookie.Cookie>> tempCookies = new HashMap<>();
for(String key: cookies.keySet()) {
List<org.apache.http.cookie.Cookie> newCookieArrayList = new ArrayList<>();
for(Cookie c: cookies.get(key)) {
newCookieArrayList.add(CookieUtils.getApacheCookie(c));
}
tempCookies.put(key, newCookieArrayList);
}
SimpleHttpFetcher.updateCookieStore(tempCookies);
}else {
HashMap<String, List<okhttp3.Cookie>> tempCookies = new HashMap<>();
for(String key: cookies.keySet()) {
List<okhttp3.Cookie> newCookieArrayList = new ArrayList<>();
for(Cookie c: cookies.get(key)) {
newCookieArrayList.add(c.parse());
newCookieArrayList.add(CookieUtils.getOkkHttpCookie(c));
}
tempCookies.put(key, newCookieArrayList);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package focusedCrawler.crawler.async.cookieHandler;

import java.io.Serializable;
import java.util.Date;

import org.apache.http.impl.cookie.BasicClientCookie;

import okhttp3.Cookie.Builder;

Expand All @@ -10,7 +13,7 @@ public class Cookie implements Serializable {
*
*/
private static final long serialVersionUID = -8502369293267383776L;

private String name;
private String value;
private long expiresAt;
Expand All @@ -22,6 +25,7 @@ public class Cookie implements Serializable {
private boolean persistent;
private boolean hostOnly;

// Added for serialization
public Cookie() {

}
Expand Down Expand Up @@ -102,22 +106,9 @@ public boolean isHostOnly() {
public void setHostOnly(boolean hostOnly) {
this.hostOnly = hostOnly;
}


public okhttp3.Cookie parse(){
Builder builder = new Builder();
builder.name(name);
builder.value(value);
builder.expiresAt(expiresAt);
builder.domain(domain);
builder.path(path);
if(secure) {
builder.secure();
}
if(httpOnly) {
builder.httpOnly();
}
okhttp3.Cookie cookie = builder.build();
return cookie;

public String toString() {
return "Name: " + name + ", Value: " + value + ", Domain: " + domain + ", Path: " + path;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package focusedCrawler.crawler.async.cookieHandler;

import java.util.Date;

import org.apache.http.impl.cookie.BasicClientCookie;

import okhttp3.Cookie.Builder;

public class CookieUtils {

public static okhttp3.Cookie getOkkHttpCookie(Cookie cookie) {
Builder builder = new Builder();
builder.name(cookie.getName());
builder.value(cookie.getValue());
builder.expiresAt(cookie.getExpiresAt());
builder.domain(cookie.getDomain());
builder.path(cookie.getPath());
if (cookie.isSecure()) {
builder.secure();
}
if (cookie.isHttpOnly()) {
builder.httpOnly();
}
okhttp3.Cookie okkHttpCookie = builder.build();
return okkHttpCookie;
}

public static org.apache.http.cookie.Cookie getApacheCookie(Cookie cookie) {
BasicClientCookie apacheCookie = new BasicClientCookie(cookie.getName(), cookie.getValue());
apacheCookie.setDomain(cookie.getDomain());
apacheCookie.setExpiryDate(new Date(cookie.getExpiresAt()));
apacheCookie.setPath(cookie.getPath());
apacheCookie.setSecure(cookie.isSecure());
return apacheCookie;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@
import java.security.NoSuchAlgorithmException;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.TimeUnit;

Expand Down Expand Up @@ -75,6 +78,7 @@
import org.apache.http.conn.socket.PlainConnectionSocketFactory;
import org.apache.http.conn.ssl.NoopHostnameVerifier;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.cookie.Cookie;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.DefaultRedirectStrategy;
import org.apache.http.impl.client.HttpClientBuilder;
Expand All @@ -90,7 +94,6 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import focusedCrawler.crawler.async.fetcher.GlobalCookieStore;
import focusedCrawler.crawler.crawlercommons.fetcher.AbortedFetchException;
import focusedCrawler.crawler.crawlercommons.fetcher.AbortedFetchReason;
import focusedCrawler.crawler.crawlercommons.fetcher.BadProtocolFetchException;
Expand Down Expand Up @@ -151,7 +154,7 @@ public class SimpleHttpFetcher extends BaseHttpFetcher {
private IdleConnectionMonitorThread monitor;

//Store cookies loaded from configuration file
private CookieStore globalCookieStore = null;
private static CookieStore globalCookieStore = null;


private static final String SSL_CONTEXT_NAMES[] = { "TLS", "Default", "SSL", };
Expand Down Expand Up @@ -1092,4 +1095,34 @@ protected void finalize() {
public void setUserAgentString(String userAgentString) {
this._userAgentString = userAgentString;
}

/**
* Update cookie store with a map of cookies.
* key : domain name
* value : List of cookies associated with that domain name
* @param cookies
* @throws NullPointerException if the cookies argument is null
*/
public static void updateCookieStore(HashMap<String, List<Cookie>> cookies) {
if(cookies == null) {
throw new NullPointerException("Cookies argument cannot be null");
}
for(List<Cookie> listOfCookies : cookies.values()) {
for(Cookie cookie: listOfCookies) {
globalCookieStore.addCookie(cookie);
}
}
}

/**
* Updates the current cookie store with cookie
* @param cookie
* @throws NullPointerException if the cookie argument is null
*/
public static void updateCookieStore(Cookie cookie) {
if(cookie == null) {
throw new NullPointerException("Argument cookie is null.");
}
globalCookieStore.addCookie(cookie);
}
}

0 comments on commit 434f25e

Please sign in to comment.