## Preparing Environment

[![Google Colab](https://badgen.net/badge/Lancer/run%20Google%20Colab/orange?icon=terminal)](https://colab.research.google.com/github/BrikerMan/Kashgari/blob/kashgari2/examples/train_with_generator.ipynb)


In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

import tensorflow as tf
print(tf.__version__)

In [None]:
!pip install git+https://github.com/BrikerMan/Kashgari.git@kashgari2


## Download Corpus

In [None]:
from tensorflow.keras.utils import get_file

def download_data(duplicate=1000):
    url_list = [
        'https://raw.githubusercontent.com/BrikerMan/JointSLU/master/data/atis-2.train.w-intent.iob',
        'https://raw.githubusercontent.com/BrikerMan/JointSLU/master/data/atis-2.dev.w-intent.iob',
        'https://raw.githubusercontent.com/BrikerMan/JointSLU/master/data/atis.test.w-intent.iob',
        'https://raw.githubusercontent.com/BrikerMan/JointSLU/master/data/atis.train.w-intent.iob'
    ]
    files = []
    for url in url_list:
        files.append(get_file(url.split('/')[-1], url))

    return files * duplicate

corpus_files = download_data()

## Run Classification

In [None]:
from kashgari.generators import ABCGenerator

# Define you classification data generator
class ClassificationGenerator:
    def __init__(self, files):
        self.files = files
        self._line_count = sum(sum(1 for line in open(file, 'r')) for file in files)

    @property
    def steps(self) -> int:
        return self._line_count

    def __iter__(self):
        for file in self.files:
            with open(file, 'r') as f:
                for line in f:
                    rows = line.split('\t')
                    x = rows[0].strip().split(' ')[1:-1]
                    y = rows[1].strip().split(' ')[-1]
                    yield x, y

In [None]:
from kashgari.tasks.classification import BiGRU_Model
files = download_data()
gen = ClassificationGenerator(files)

model = BiGRU_Model()
model.fit_generator(gen)

## Run Ner

In [None]:
class LabelingGenerator(ABCGenerator):
    def __init__(self, files):
        self.files = files
        self._line_count = sum(sum(1 for line in open(file, 'r')) for file in files)

    @property
    def steps(self) -> int:
        return self._line_count

    def __iter__(self):
        for file in self.files:
            with open(file, 'r') as f:
                for line in f:
                    rows = line.split('\t')
                    x = rows[0].strip().split(' ')[1:-1]
                    y = rows[1].strip().split(' ')[1:-1]
                    yield x, y


In [None]:
from kashgari.tasks.labeling import BiGRU_Model
files = download_data()
gen = LabelingGenerator(files)

model = BiGRU_Model()
model.fit_generator(gen)