Skip to content

Commit

Permalink
Extract languages from wiki page #3
Browse files Browse the repository at this point in the history
  • Loading branch information
baudoliver7 committed Dec 9, 2021
1 parent fd36aa2 commit 822b15f
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 12 deletions.
4 changes: 2 additions & 2 deletions README.md
Expand Up @@ -26,12 +26,12 @@ You can run it by these commands :

### From Maven
``` cmd
mvn clean pre-integration-test -Pstart-app
mvn clean integration-test -Pstart-app
```

### From JAR file
``` cmd
java -Dfile.encoding=utf-8 -cp ool-survey-X.X.X-jar-with-dependencies.jar com.ool.survey.Main
java -jar ool-survey-X.X.X-jar-with-dependencies.jar
```

## How to contribute
Expand Down
21 changes: 15 additions & 6 deletions pom.xml
Expand Up @@ -84,24 +84,33 @@ SOFTWARE.
<artifactId>hamcrest-core</artifactId>
<version>2.2</version>
</dependency>
<dependency>
<groupId>com.jcabi</groupId>
<artifactId>jcabi-xml</artifactId>
<version>0.23.1</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
<configuration>
<archive>
<manifest>
<mainClass>com.ool.survey.Main</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
</execution>
</executions>
</plugin>
Expand Down
17 changes: 13 additions & 4 deletions src/main/java/com/ool/survey/Main.java
Expand Up @@ -25,17 +25,17 @@

import com.jcabi.http.Request;
import com.jcabi.http.request.JdkRequest;
import com.jcabi.xml.XML;
import com.jcabi.xml.XMLDocument;
import java.io.IOException;
import java.util.List;
import javax.ws.rs.core.HttpHeaders;
import javax.ws.rs.core.MediaType;

/**
* Class Entrance.
*
* @since 0.1
* @todo #1:30min Extract languages from loaded wiki page.
* We have loaded the entire page of wiki. Now, we want to extract
* all languages present in its content.
* @checkstyle HideUtilityClassConstructorCheck (100 lines)
*/
@SuppressWarnings({"PMD.SystemPrintln", "PMD.UseUtilityClass"})
Expand All @@ -47,6 +47,12 @@ public final class Main {
private static final String WIKI_PAGE =
"https://en.wikipedia.org/wiki/List_of_programming_languages";

/**
* Xpath query for retrieve languages in Wiki page.
*/
private static final String QUERY_WIKI_PAGE =
"//div[@id='mw-content-text']/div/div/ul/li/a/text()";

/**
* Entrance.
* @param args Arguments
Expand All @@ -59,6 +65,9 @@ public static void main(final String... args) throws IOException {
.header(HttpHeaders.ACCEPT, MediaType.TEXT_HTML)
.fetch()
.body();
System.out.println(html);
final XML xml = new XMLDocument(html);
final List<String> languages = xml.xpath(Main.QUERY_WIKI_PAGE);
System.out.println(languages);
System.out.println(languages.size());
}
}

0 comments on commit 822b15f

Please sign in to comment.