In [169]:
#r "nuget: Microsoft.ML"
#r "nuget: Microsoft.ML.OnnxConverter"

In [170]:
#r "nuget: Jieba.Net.Core"

In [171]:
using System;
using System.IO;
using System.Linq;

using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Transforms;

using JiebaNet;

In [172]:
    public class Question
    {
        [LoadColumn(0)]
        public int Label { get; set; }
        [LoadColumn(1)]
        public string Text { get; set; }
    }

In [173]:
public class JiebaLambdaInput
{
    public string Text { get; set; }
}

public class JiebaLambdaOutput
{
    public string JiebaText { get; set; }
}

public class JiebaLambda
{       
        public static void MyAction(JiebaLambdaInput input, JiebaLambdaOutput output)
        {
            JiebaNet.Segmenter.JiebaSegmenter jiebaSegmenter = new JiebaNet.Segmenter.JiebaSegmenter();
            output.JiebaText = string.Join(" ", jiebaSegmenter.Cut(input.Text));

            Count++;
        }

        static int Count = 0;
}

In [174]:
MLContext mlContext = new MLContext();
var fulldata = mlContext.Data.LoadFromTextFile<Question>("./data/qa.csv", separatorChar: ',', hasHeader: false);

In [175]:
var trainTestData = mlContext.Data.TrainTestSplit(fulldata, testFraction: 0.2);
var trainData = trainTestData.TrainSet;
var testData = trainTestData.TestSet;

In [176]:
trainData

In [177]:

var trainingPipeline = mlContext.Transforms.CustomMapping<JiebaLambdaInput, JiebaLambdaOutput>(mapAction: JiebaLambda.MyAction, contractName: "JiebaLambda")
               .Append(mlContext.Transforms.Text.FeaturizeText("Features", "JiebaText"))
               .Append(mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: "LabelKey", inputColumnName: "Label"))
               .Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy("LabelKey", "Features"))
               .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel"));;

                

In [178]:
ITransformer trainedModel = trainingPipeline.Fit(trainData);

In [179]:
    public class PredictionResult : Question
    {
        
        public string JiebaText { get; set; }
        public float[] Features { get; set; }

        public int PredictedLabel;
        public float[] Score;
        public float Probability;
        public void PrintToConsole()
        {
            Console.WriteLine($"JiebaText={JiebaText}");
            Console.WriteLine($"PredictedLabel:{PredictedLabel},Score:{Score},Probability:{Probability}");
            Console.WriteLine($"TextFeatures Length:{Features.Length}");
            if (Features != null)
            {
                foreach (var f in Features)
                {
                    Console.Write($"{f},");
                }
                Console.WriteLine();
            }
            Console.WriteLine();
        }
    }

In [180]:
var predEngine = mlContext.Model.CreatePredictionEngine<Question, PredictionResult>(trainedModel);

In [181]:

Question sampleStatement1 = new Question { Text = "氣溫多少" };
var predictionresult1 = predEngine.Predict(sampleStatement1);
Console.WriteLine($"{sampleStatement1.Text}:{predictionresult1.PredictedLabel}");

氣溫多少:1


In [182]:
Question sampleStatement2 = new Question { Text = "什麼是新能源車" };
var predictionresult2 = predEngine.Predict(sampleStatement2);
Console.WriteLine($"{sampleStatement2.Text}:{predictionresult2.PredictedLabel}");

什麼是新能源車:0


In [183]:
Question sampleStatement2 = new Question { Text = "課程講什麼" };
var predictionresult2 = predEngine.Predict(sampleStatement2);
Console.WriteLine($"{sampleStatement2.Text}:{predictionresult2.PredictedLabel}");

課程講什麼:0
