Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Works?

  • Loading branch information...
commit 8ae17ae33dc7354460513afa6b193ec3a781f645 1 parent 86bfb42
@ambs authored
Showing with 93 additions and 2 deletions.
  1. +51 −1 CLD.xs
  2. +42 −1 t/00-load.t
View
52 CLD.xs
@@ -1,3 +1,5 @@
+/* -*- c -*- */
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -11,6 +13,7 @@ extern "C" {
#endif
#include <stdio.h>
+#include <string.h>
#include "ppport.h"
@@ -25,6 +28,53 @@ void
identify(src)
const char* src
CODE:
- fprintf(stderr, "Hello %s\n\n\n\n", src);
+ int src_length = strlen(src);
+ bool is_plain_text = true; /* make this an option */
+ bool allow_extended_languages = true;
+ bool pick_summary_language = false;
+ bool remove_weak_matches = false;
+ const char* tld_hint = NULL;
+ int encoding_hint = UNKNOWN_ENCODING;
+ Language language_hint = UNKNOWN_LANGUAGE;
+
+ double normalized_score3[3];
+ Language language3[3];
+ int percent3[3];
+
+ int text_bytes;
+ bool is_reliable;
+
+ fprintf(stderr, "Text is >%s<\n", src);
+ fprintf(stderr, "Text length is >%d<\n", src_length);
+ fprintf(stderr, "is_plain_text is >%d<\n", is_plain_text);
+ fprintf(stderr, "allow_extended_languages is >%d<\n", allow_extended_languages);
+ fprintf(stderr, "pick_summary_language is >%d<\n", pick_summary_language);
+ fprintf(stderr, "remove_weak_matches is >%d<\n", remove_weak_matches);
+ if (!tld_hint)
+ fprintf(stderr, "tld_hint is null\n");
+ else
+ fprintf(stderr, "tld_hint is >%s<\n", tld_hint);
+ fprintf(stderr, "encoding_hint is >%d<\n", encoding_hint);
+ fprintf(stderr, "language_hint is >%d<\n", language_hint);
+
+ Language lang = CompactLangDet::DetectLanguage(0,
+ src,
+ src_length,
+ is_plain_text,
+ allow_extended_languages,
+ pick_summary_language,
+ remove_weak_matches,
+ tld_hint,
+ encoding_hint,
+ language_hint,
+ language3,
+ percent3,
+ normalized_score3,
+ &text_bytes,
+ &is_reliable);
+
+ fprintf(stderr, "identified as >%d<\n", lang);
+ fprintf(stderr, "\n\n\n");
+
View
43 t/00-load.t
@@ -1,11 +1,52 @@
#!perl -T
+use utf8;
+
use Test::More tests => 1;
BEGIN {
use_ok( 'Lingua::Identify::CLD' ) || print "Bail out!\n";
}
+my $eng = <<EOE;
+ confiscation of goods is assigned as the penalty part most of the courts
+ consist of members and when it is necessary to bring public cases before a
+ jury of members two courts combine for the purpose the most important cases
+ of all are brought jurors or
+EOE
+
+my $hindi = <<EOI;
+ नेपाल एसिया
+ मंज अख मुलुक
+ राजधानी काठ
+ माडौं नेपाल
+ अधिराज्य पेर
+ ेग्वाय
+ दक्षिण अमेरि
+ का महाद्वीपे
+ मध् यक्षेत्
+ रे एक देश अस
+ ् ति फणीश्वर
+ नाथ रेणु
+ फिजी छु दक्ष
+ िण प्रशान् त
+ महासागर मंज
+ अख देश बहाम
+ ास छु केरेबि
+ यन मंज
+ अख मुलुख राज
+ धानी नसौ सम्
+ बद्घ विषय ब
+ ुरुंडी अफ्री
+ का महाद्वीपे
+ मध्
+ यक्षेत्रे दे
+ श अस् ति सम्
+ बद्घ विषय
+EOI
+
+
diag( "Testing Lingua::Identify::CLD $Lingua::Identify::CLD::VERSION, Perl $], $^X" );
-Lingua::Identify::CLD::identify("OLA");
+Lingua::Identify::CLD::identify($eng);
+Lingua::Identify::CLD::identify($hindi);
Please sign in to comment.
Something went wrong with that request. Please try again.