Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
[dotnet] port the first 200 lines of Cursor.pir to P6Objects.pm (Rege…
…x::Cursor in nqp)

add a few supporting operations: string index & split & is_cclass
  • Loading branch information
diakopter committed Nov 19, 2010
1 parent 0b1a12d commit b930a23
Show file tree
Hide file tree
Showing 2 changed files with 268 additions and 11 deletions.
178 changes: 169 additions & 9 deletions common/NQP/P6Objects.pm
Expand Up @@ -16,25 +16,185 @@ class Mu {
multi method ACCEPTS(Mu:U $self: $topic) {
nqp::type_check($topic, self.WHAT)
}

method defined() {
nqp::repr_defined(self)
}

method isa($type) {
return self.HOW.isa(self, $type);
}
}

class Capture is Mu {
has $.target is rw;
class Capture {
has $.list is rw;
has $.hash is rw;
method new() {
$!list := NQPList.new();
$!hash := NQPHash.new();
self
}
method at_pos($pos) {
$!list.at_pos($pos)
}
method bind_pos($pos, $val) {
$!list.bind_pos($pos, $val)
}
method at_key($key) {
$!hash.at_key($key)
}
method bind_key($key, $val) {
$!hash.bind_key($key, $val)
}
method Int() {
$!list.Int
}
method Numeric() {
self.Int
}
}

class Match is Mu {
has $.target;
has $.from;
has $.pos;
class Match is Capture {
has $.target is rw;
has $.from is rw;
has $.to is rw;
has $.ast is rw;
has $.cursor is rw;

method chars() {
$!pos - $!from;
$!to - $!from
}

multi method Str() {
substr($!target.Str, $!from, $!pos - $!from)
method CURSOR() {
$!cursor;
}

method Str() {
substr($!target.Str, $!from, $!to - $!from)
}
method Bool() {
$!to >= $!from
}
method Int() {
$!to - $!from
}
method make($obj) {
$!ast := $obj
}
}

# Regex::Cursor is used for managing regular expression control flow
# and is also a base class for grammars.
class Regex::Cursor {
has $.target is rw;
has $.from is rw;
has $.pos is rw;
has $.match is rw;
has $.names is rw;
has $.debug is rw;
has @.bstack is rw;
has @.cstack is rw;
has @.caparray is rw;
has $.regex is rw;

my $generation := 0;
my $FALSE := 0;
my $TRUE := 1;

method new_match() {
Match.new()
}

method new_array() {
NQPList.new()
}

# Return this cursor's current Match object, generating a new one
# for the Cursor if one hasn't been created yet.
method MATCH() {
my $match := $!match;
if !nqp::repr_defined($match) || !$match {
if !nqp::repr_defined($match) {
# First, create a Match object and bind it
$match := self.new_match();
self.match($match);
$match.cursor(self);
$match.target($!target);
$match.to($!to);
$match.from($!from);

# Create any arrayed subcaptures.
if nqp::repr_defined(@!caparray) {
my @caparray := @!caparray;
my %caphash := NQPHash.new();
my @arr;
my $keyint;
for @caparray {
@arr := self.new_array();
%caphash{$_} := @arr;
if nqp::is_cclass_str_index("Numeric", $_, 0) {
$match.bind_pos($_, @arr);
} else {
$match.bind_key($_, @arr);
}
}
# If it's not a successful match, or if there are
# no saved subcursors, we're done.
my @cstack := @!cstack;
if $!to >= $!from || !nqp::repr_defined(@cstack) || !@cstack {
my $subcur;
my $submatch;
my $names;
my @namelist;
for @cstack {
if $_.isa(Regex::Cursor) {
$subcur := $_;
$names := $subcur.names;
if nqp::repr_defined($names) {
$submatch := $subcur.MATCH();
if nqp::index_str($names, "=") >= 0 {
@namelist := nqp::split_str($names, "=")
} else {
@namelist := ();
@namelist.push($names)
}
for @namelist {
$keyint := nqp::is_cclass_str($_, "Numeric");
if nqp::repr_defined(@caparray)
&& nqp::repr_defined(%caphash{$_}) {
if $keyint {
$match.key_at($_).push($submatch);
} else {
$match.pos_at($_).push($submatch);
}
} else {
if $keyint {
$match.bind_pos($_, $submatch);
} else {
$match.bind_key($_, $submatch);
}
}
}
}
}
}
}
}
}
}
$match
}
}













101 changes: 99 additions & 2 deletions dotnet/runtime/Runtime/Ops/Primitive.cs
@@ -1,7 +1,8 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text;
using System.Text.RegularExpressions;
using Rakudo.Metamodel;
using Rakudo.Metamodel.Representations;

Expand Down Expand Up @@ -236,12 +237,108 @@ public static RakudoObject substr(ThreadContext TC, RakudoObject x, RakudoObject
/// </summary>
/// <param name="x"></param>
/// <param name="y"></param>
/// <param name="z"></param>
/// <param name="ResultType"></param>
/// <returns></returns>
public static RakudoObject substr(ThreadContext TC, RakudoObject x, RakudoObject y)
{
return Ops.box_str(TC, Ops.unbox_str(TC, x).Substring(Ops.unbox_int(TC, y)), TC.DefaultStrBoxType);
}

/// <summary>
/// Search for the first occurrence of a substring within a string and returns its zero-based index
/// if it's found and -1 if it's not.
/// </summary>
/// <param name="x"></param>
/// <param name="y"></param>
/// <param name="ResultType"></param>
/// <returns></returns>
public static RakudoObject index_str(ThreadContext TC, RakudoObject x, RakudoObject y)
{
return Ops.box_int(TC, Ops.unbox_str(TC, x).IndexOf(Ops.unbox_str(TC, y)), TC.DefaultIntBoxType);
}

/// <summary>
/// Search for the first occurrence of a substring within a string and returns its zero-based index
/// if it's found and -1 if it's not, starting at a specified index.
/// </summary>
/// <param name="x"></param>
/// <param name="y"></param>
/// <param name="z"></param>
/// <param name="ResultType"></param>
/// <returns></returns>
public static RakudoObject index_str_index(ThreadContext TC, RakudoObject x, RakudoObject y, RakudoObject z)
{
return Ops.box_int(TC, Ops.unbox_str(TC, x).IndexOf(Ops.unbox_str(TC, y), Ops.unbox_int(TC, z)), TC.DefaultIntBoxType);
}

/// <summary>
/// Search for the first occurrence of a substring within a string and returns its zero-based index
/// if it's found and -1 if it's not, starting at a specified index.
/// </summary>
/// <param name="x"></param>
/// <param name="y"></param>
/// <param name="z"></param>
/// <param name="ResultType"></param>
/// <returns></returns>
public static RakudoObject split_str(ThreadContext TC, RakudoObject x, RakudoObject y)
{
RakudoObject list = Ops.instance_of(TC, Ops.get_lex(TC, "NQPList"));
var store = ((P6list.Instance)list).Storage;
foreach (string splitted in Ops.unbox_str(TC, x).Split(Ops.unbox_str(TC, y)[0]))
store.Add(Ops.box_str(TC, splitted, TC.DefaultStrBoxType));
return list;
}

/// <summary>
/// Checks whether a character at a particular index in a string
/// is a member of a particular character class.
/// </summary>
/// <param name="x"></param>
/// <param name="y"></param>
/// <param name="z"></param>
/// <param name="ResultType"></param>
/// <returns></returns>
public static RakudoObject is_cclass_str_index(ThreadContext TC, RakudoObject x, RakudoObject y, RakudoObject z)
{
CCLASS cclass = (CCLASS)Enum.Parse(typeof(CCLASS), Ops.unbox_str(TC, x));
string target = Ops.unbox_str(TC, y);
int index = Ops.unbox_int(TC, z);
return Ops.box_int(TC, is_cclass(target, index, cclass) ? 1 : 0, TC.DefaultIntBoxType);
}

/// <summary>
/// Checks whether a character is a member of a particular character class.
/// </summary>
/// <param name="x"></param>
/// <param name="y"></param>
/// <param name="ResultType"></param>
/// <returns></returns>
public static RakudoObject is_cclass_str(ThreadContext TC, RakudoObject x, RakudoObject y)
{
CCLASS cclass = (CCLASS)Enum.Parse(typeof(CCLASS), Ops.unbox_str(TC, x));
string target = Ops.unbox_str(TC, y);
return Ops.box_int(TC, is_cclass(target, 0, cclass) ? 1 : 0, TC.DefaultIntBoxType);
}

// see http://msdn.microsoft.com/en-us/library/20bw873z.aspx
// more precisely http://msdn.microsoft.com/en-us/library/20bw873z.aspx#SupportedUnicodeGeneralCategories
// to add more categories/patterns.
enum CCLASS
{
Numeric
}

static Regex NumericCompare = new Regex(@"^\p{N}$", RegexOptions.Compiled);

static bool is_cclass(string target, int index, CCLASS cclass)
{
switch ((int)cclass)
{
case 0:
return NumericCompare.IsMatch(target, index);
default:
throw new NotImplementedException("The character class " + cclass + " is not yet implemented");
}
}
}
}

0 comments on commit b930a23

Please sign in to comment.