Skip to content

Commit

Permalink
Merge f01a93a into 0d214bb
Browse files Browse the repository at this point in the history
  • Loading branch information
mkroetzsch committed Apr 5, 2014
2 parents 0d214bb + f01a93a commit 4787930
Show file tree
Hide file tree
Showing 42 changed files with 1,289 additions and 597 deletions.
4 changes: 3 additions & 1 deletion RELEASE-NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@ Version 0.2.0
-------------

New features:
* Support for resolving site links, based on information from the sites table dump
(as demonstrated in a new example program)
* Support for SnakGroups (data model updated to group Snaks by property in all lists)

Bug fixes:
* Support SomeValueSnak and NoValueSnak in references (Issue #44)

* Use correct site links when importing data from dumps (Issue #37)


Version 0.1.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,9 @@ public StatementGroup getStatementGroup(List<Statement> statements) {
}

@Override
public SiteLink getSiteLink(String title, String siteKey, String baseIri,
public SiteLink getSiteLink(String title, String siteKey,
List<String> badges) {
return new SiteLinkImpl(title, siteKey, baseIri, badges);
return new SiteLinkImpl(title, siteKey, badges);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@
* #L%
*/

import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.Collections;
import java.util.List;

Expand All @@ -42,32 +40,31 @@ public class SiteLinkImpl implements SiteLink {

final String title;
final String siteKey;
final String baseIri;
final List<String> badges;

/**
* Constructor.
*
* @param title
* the title string of the linked page, including namespace
* prefixes if any
* @param siteKey
* @param baseIri
* the string key of the site of the linked article
* @param badges
* the list of badges of the linked article
*/
SiteLinkImpl(String title, String siteKey, String baseIri,
List<String> badges) {
SiteLinkImpl(String title, String siteKey, List<String> badges) {
Validate.notNull(title, "title cannot be null");
Validate.notNull(siteKey, "siteKey cannot be null");
Validate.notNull(baseIri, "base IRI cannot be null");
Validate.notNull(badges, "list of badges cannot be null");

this.title = title;
this.siteKey = siteKey;
this.baseIri = baseIri;
this.badges = badges;
}

@Override
public String getArticleTitle() {
public String getPageTitle() {
return title;
}

Expand All @@ -76,16 +73,6 @@ public String getSiteKey() {
return siteKey;
}

@Override
public String getUrl() {
try {
return baseIri.concat(URLEncoder.encode(title, "utf-8"));
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(
"Your JRE does not support UTF-8 encoding. Srsly?!", e);
}
}

@Override
public List<String> getBadges() {
return Collections.unmodifiableList(badges);
Expand All @@ -100,7 +87,6 @@ public List<String> getBadges() {
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + baseIri.hashCode();
result = prime * result + badges.hashCode();
result = prime * result + siteKey.hashCode();
result = prime * result + title.hashCode();
Expand All @@ -124,14 +110,13 @@ public boolean equals(Object obj) {
return false;
}
SiteLinkImpl other = (SiteLinkImpl) obj;
return baseIri.equals(other.baseIri) && badges.equals(other.badges)
&& siteKey.equals(other.siteKey) && title.equals(other.title);
return badges.equals(other.badges) && siteKey.equals(other.siteKey)
&& title.equals(other.title);
}

@Override
public String toString(){
return "SiteLink {title = " + this.baseIri + "/" + this.title
+ ", siteKey = " + siteKey
public String toString() {
return "SiteLink {title = " + this.title + ", siteKey = " + siteKey
+ ", badges = " + this.badges + "}";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
package org.wikidata.wdtk.datamodel.implementation;

/*
* #%L
* Wikidata Toolkit Data Model
* %%
* Copyright (C) 2014 Wikidata Toolkit Developers
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/

import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.HashMap;

import org.apache.commons.lang3.Validate;
import org.wikidata.wdtk.datamodel.interfaces.SiteLink;
import org.wikidata.wdtk.datamodel.interfaces.Sites;

/**
* Implementation of the {@link Sites} interface that allows sites to be
* registered. Objects of this type are not immutable, since they are not data
* objects, but the {@link Sites} interface only supports read access.
*
* @author Markus Kroetzsch
*
*/
public class SitesImpl implements Sites {

/**
* Simple record for holding information about a site.
*
* @author Markus Kroetzsch
*
*/
class SiteInformation {
final String siteKey;
final String group;
final String languageCode;
final String siteType;
final String filePathPre;
final String filePathPost;
final String pagePathPre;
final String pagePathPost;

SiteInformation(String siteKey, String group, String languageCode,
String siteType, String filePath, String pagePath) {
// Null might be acceptable for some of the following; but this
// should only be changed when we have a case where this is correct.
Validate.notNull(siteKey, "Site key must not be null.");
Validate.notNull(group, "Group must not be null.");
Validate.notNull(languageCode, "Language code must not be null.");
Validate.notNull(siteType, "Site type must not be null.");
Validate.notNull(filePath, "File path must not be null.");
Validate.notNull(pagePath, "Page path must not be null.");

this.siteKey = siteKey;
this.group = group;
this.languageCode = languageCode;
this.siteType = siteType;

int iFileName = filePath.indexOf("$1");
this.filePathPre = filePath.substring(0, iFileName);
this.filePathPost = filePath.substring(iFileName + 2,
filePath.length());

int iPageName = pagePath.indexOf("$1");
this.pagePathPre = pagePath.substring(0, iPageName);
this.pagePathPost = pagePath.substring(iPageName + 2,
pagePath.length());
}

/**
* Returns the file URL.
*
* @see Sites#getFileUrl(String, String)
* @param fileName
* the file name
* @return the file URL
*/
String getFileUrl(String fileName) {
return this.filePathPre + fileName + this.filePathPost;
}

/**
* Returns the page URL. The method replaces spaces by underscores in
* page titles on MediaWiki sites, since this is how MediaWiki page URLs
* are constructed. For other sites, this might not be the case and
* spaces will just be escaped in the standard way using "+".
*
* @see Sites#getPageUrl(String, String)
* @param pageTitle
* the page title, not escaped
* @return the page URL
*/
String getPageUrl(String pageTitle) {
try {
String encodedTitle;
if ("mediawiki".equals(this.siteType)) {
encodedTitle = URLEncoder.encode(
pageTitle.replace(" ", "_"), "utf-8");
// Keep special title symbols unescaped:
encodedTitle = encodedTitle.replace("%3A", ":").replace(
"%2F", "/");
} else {
encodedTitle = URLEncoder.encode(pageTitle, "utf-8");
}
return this.pagePathPre + encodedTitle + this.pagePathPost;
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(
"Your JRE does not support UTF-8 encoding. Srsly?!", e);
}
}
}

final HashMap<String, SiteInformation> sites = new HashMap<String, SiteInformation>();

/**
* Sets the stored information for the site of the given key to the given
* values.
*
* @param siteKey
* the global site key
* @param group
* the site group
* @param languageCode
* the site MediaWiki language code
* @param siteType
* the site type
* @param filePath
* the file path with $1 as a placeholder for the file name
* @param pagePath
* the page path with $1 as a placeholder for the page title
*/
public void setSiteInformation(String siteKey, String group,
String languageCode, String siteType, String filePath,
String pagePath) {
this.sites.put(siteKey, new SiteInformation(siteKey, group,
languageCode, siteType, filePath, pagePath));
}

@Override
public String getLanguageCode(String siteKey) {
if (this.sites.containsKey(siteKey)) {
return this.sites.get(siteKey).languageCode;
} else {
return null;
}
}

@Override
public String getGroup(String siteKey) {
if (this.sites.containsKey(siteKey)) {
return this.sites.get(siteKey).group;
} else {
return null;
}
}

@Override
public String getPageUrl(String siteKey, String pageTitle) {
if (this.sites.containsKey(siteKey)) {
return this.sites.get(siteKey).getPageUrl(pageTitle);
} else {
return null;
}
}

@Override
public String getSiteLinkUrl(SiteLink siteLink) {
return this.getPageUrl(siteLink.getSiteKey(), siteLink.getPageTitle());
}

@Override
public String getFileUrl(String siteKey, String fileName) {
if (this.sites.containsKey(siteKey)) {
return this.sites.get(siteKey).getFileUrl(fileName);
} else {
return null;
}
}

@Override
public String getSiteType(String siteKey) {
if (this.sites.containsKey(siteKey)) {
return this.sites.get(siteKey).siteType;
} else {
return null;
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -248,18 +248,15 @@ Statement getStatement(Claim claim, List<? extends Reference> references,
* Creates a {@link SiteLink}.
*
* @param title
* the title string of the linked article
* the title string of the linked page, including namespace
* prefixes if any
* @param siteKey
* the string key of the site of the linked article
* @param baseIri
* the string key of the site of the linked article; this might
* be computed from the site key in the future
* @param badges
* the list of badges of the linked article
* @return a {@link SiteLink} corresponding to the input
*/
SiteLink getSiteLink(String title, String siteKey, String baseIri,
List<String> badges);
SiteLink getSiteLink(String title, String siteKey, List<String> badges);

/**
* Creates a {@link PropertyDocument}.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,23 @@
* site, and a list of "badges" that this article holds. Badges are specific
* tags used on Wikimedia project sites for some articles, most prominently for
* "featured articles".
* <p>
* In spite of its name, the site link does not specify a full URL that it links
* to. It only provides a page title and a site key that may be used to find a
* URL. To do this, the site links need to be resolved using a {@link Sites}
* object.
*
* @author Markus Kroetzsch
*
*/
public interface SiteLink {

/**
* Get the string title of the linked article.
* Get the string title of the linked page.
*
* @return
*/
String getArticleTitle();
String getPageTitle();

/**
* Get the string key of the linked site.
Expand All @@ -47,13 +52,6 @@ public interface SiteLink {
*/
String getSiteKey();

/**
* Get the full IRI (URL) of the linked article.
*
* @return
*/
String getUrl();

/**
* Get the list of badges of the linked article.
*
Expand Down
Loading

0 comments on commit 4787930

Please sign in to comment.