In [None]:
from collections import Counter

from datasets import load_dataset

In [2]:
dataset = load_dataset("tuetschek/atis")
dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'intent', 'text', 'slots'],
        num_rows: 4978
    })
    test: Dataset({
        features: ['id', 'intent', 'text', 'slots'],
        num_rows: 893
    })
})

In [None]:
train_labels = list(set([example["intent"] for example in dataset["train"]]))
train_labels.sort()
train_labels

['abbreviation',
 'aircraft',
 'aircraft+flight+flight_no',
 'airfare',
 'airfare+flight_time',
 'airline',
 'airline+flight_no',
 'airport',
 'capacity',
 'cheapest',
 'city',
 'distance',
 'flight',
 'flight+airfare',
 'flight_no',
 'flight_time',
 'ground_fare',
 'ground_service',
 'ground_service+ground_fare',
 'meal',
 'quantity',
 'restriction']

In [8]:
train_labels_count = Counter([example["intent"] for example in dataset["train"]])
train_labels_count

Counter({'flight': 3666,
         'airfare': 423,
         'ground_service': 255,
         'airline': 157,
         'abbreviation': 147,
         'aircraft': 81,
         'flight_time': 54,
         'quantity': 51,
         'flight+airfare': 21,
         'airport': 20,
         'distance': 20,
         'city': 19,
         'ground_fare': 18,
         'capacity': 16,
         'flight_no': 12,
         'meal': 6,
         'restriction': 6,
         'airline+flight_no': 2,
         'ground_service+ground_fare': 1,
         'airfare+flight_time': 1,
         'cheapest': 1,
         'aircraft+flight+flight_no': 1})

In [13]:
train_labels_count = Counter(
    [intent for intents in [example["intent"].split("+") for example in dataset["train"]] for intent in intents]
)
train_labels_count

Counter({'flight': 3688,
         'airfare': 445,
         'ground_service': 256,
         'airline': 159,
         'abbreviation': 147,
         'aircraft': 82,
         'flight_time': 55,
         'quantity': 51,
         'airport': 20,
         'distance': 20,
         'ground_fare': 19,
         'city': 19,
         'capacity': 16,
         'flight_no': 15,
         'meal': 6,
         'restriction': 6,
         'cheapest': 1})

In [11]:
[example for example in dataset["train"] if "+" in example["intent"]]

[{'id': 570,
  'intent': 'flight+airfare',
  'text': 'give me the flights and fares on december twenty seventh from indianapolis to orlando',
  'slots': 'O O O O O O O B-depart_date.month_name B-depart_date.day_number I-depart_date.day_number O B-fromloc.city_name O B-toloc.city_name'},
 {'id': 602,
  'intent': 'flight+airfare',
  'text': 'all flights and fares from atlanta to dallas round trip after 12 pm less than 1100 dollars',
  'slots': 'O O O O O B-fromloc.city_name O B-toloc.city_name B-round_trip I-round_trip B-depart_time.time_relative B-depart_time.time I-depart_time.time B-cost_relative O B-fare_amount I-fare_amount'},
 {'id': 719,
  'intent': 'flight+airfare',
  'text': 'first flights and fares from pittsburgh to atlanta on a thursday',
  'slots': 'B-flight_mod O O O O B-fromloc.city_name O B-toloc.city_name O O B-depart_date.day_name'},
 {'id': 859,
  'intent': 'flight+airfare',
  'text': 'all flights and fares from atlanta to dallas round trip after 12 pm less than 1100 d

In [12]:
[example for example in dataset["train"] if "flight_no" in example["intent"]]

[{'id': 330,
  'intent': 'flight_no',
  'text': 'flight numbers from columbus to minneapolis tomorrow',
  'slots': 'O O O B-fromloc.city_name O B-toloc.city_name B-depart_date.today_relative'},
 {'id': 879,
  'intent': 'flight_no',
  'text': "i 'm trying to find the flight number from a flight from orlando to cleveland on us air and it arrives around 10 pm",
  'slots': 'O O O O O O O O O O O O B-fromloc.city_name O B-toloc.city_name O B-airline_name I-airline_name O O O B-arrive_time.time_relative B-arrive_time.time I-arrive_time.time'},
 {'id': 961,
  'intent': 'flight_no',
  'text': 'flight numbers from minneapolis to long beach on june twenty six',
  'slots': 'O O O B-fromloc.city_name O B-toloc.city_name I-toloc.city_name O B-depart_date.month_name B-depart_date.day_number I-depart_date.day_number'},
 {'id': 1255,
  'intent': 'flight_no',
  'text': 'please show me the return flight number from toronto to st. petersburg',
  'slots': 'O O O O O O O O B-fromloc.city_name O B-toloc.cit

In [6]:
test_labels = list(set([example["intent"] for example in dataset["test"]]))
test_labels.sort()
test_labels

['abbreviation',
 'aircraft',
 'airfare',
 'airfare+flight',
 'airline',
 'airport',
 'capacity',
 'city',
 'day_name',
 'distance',
 'flight',
 'flight+airfare',
 'flight+airline',
 'flight_no',
 'flight_no+airline',
 'flight_time',
 'ground_fare',
 'ground_service',
 'meal',
 'quantity']

In [10]:
test_labels_count = Counter([example["intent"] for example in dataset["test"]])
test_labels_count

Counter({'flight': 632,
         'airfare': 48,
         'airline': 38,
         'ground_service': 36,
         'abbreviation': 33,
         'capacity': 21,
         'airport': 18,
         'flight+airfare': 12,
         'distance': 10,
         'aircraft': 9,
         'flight_no': 8,
         'ground_fare': 7,
         'meal': 6,
         'city': 6,
         'quantity': 3,
         'day_name': 2,
         'flight_time': 1,
         'airfare+flight': 1,
         'flight+airline': 1,
         'flight_no+airline': 1})

In [14]:
test_labels_count = Counter(
    [intent for intents in [example["intent"].split("+") for example in dataset["test"]] for intent in intents]
)
test_labels_count

Counter({'flight': 646,
         'airfare': 61,
         'airline': 40,
         'ground_service': 36,
         'abbreviation': 33,
         'capacity': 21,
         'airport': 18,
         'distance': 10,
         'aircraft': 9,
         'flight_no': 9,
         'ground_fare': 7,
         'meal': 6,
         'city': 6,
         'quantity': 3,
         'day_name': 2,
         'flight_time': 1})