Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

deleted stale nltk_data files

  • Loading branch information...
commit f943efedb33813e16337eac55241efe5d58b98a3 1 parent 0bc0f79
@stevenbird stevenbird authored
View
12 nltk_data/__init__.py
@@ -1,12 +0,0 @@
-# Natural Language Toolkit (NLTK) Package for building models
-#
-# Copyright (C) 2001-2011 NLTK Project
-# Authors: Steven Bird <sb@csse.unimelb.edu.au>
-# Edward Loper <edloper@gradient.cis.upenn.edu>
-# URL: <http://www.nltk.org/>
-# For license information, see LICENSE.TXT
-
-"""
-A package for building the models distributed with NLTK.
-"""
-
View
BIN  nltk_data/samples/city.db
Binary file not shown
View
6 nltk_data/samples/polish-lat2.txt
@@ -1,6 +0,0 @@
-Pruska Biblioteka Pa�stwowa. Jej dawne zbiory znane pod nazw�
-"Berlinka" to skarb kultury i sztuki niemieckiej. Przewiezione przez
-Niemc�w pod koniec II wojny �wiatowej na Dolny �l�sk, zosta�y
-odnalezione po 1945 r. na terytorium Polski. Trafi�y do Biblioteki
-Jagiello�skiej w Krakowie, obejmuj� ponad 500 tys. zabytkowych
-archiwali�w, m.in. manuskrypty Goethego, Mozarta, Beethovena, Bacha.
View
7 nltk_data/samples/sinorama-gb.xml
@@ -1,7 +0,0 @@
-<?xml version="1.0" encoding="gb2312" ?>
-
-<sent>
-ÉõÖÁèÒÔÈ˹ó
-
-In some cases, cats were valued above humans.
-</sent>
View
8 nltk_data/samples/sinorama-utf8.xml
@@ -1,8 +0,0 @@
-<?xml version="1.0" encoding="utf-8" ?>
-<doc>
-<sent>
-甚至猫以人贵
-
-In some cases, cats were valued above humans.
-</sent>
-</doc>
View
1,219 nltk_data/tagset_data.py
@@ -1,1219 +0,0 @@
-# Natural Language Toolkit (NLTK) Package for Tagset Tables
-#
-# Copyright (C) 2001-2011 NLTK Project
-# Authors: Steven Bird <sb@csse.unimelb.edu.au>
-# URL: <http://www.nltk.org/>
-# For license information, see LICENSE.TXT
-
-"""
-A package for building the Tagset Tables distributed with NLTK
-"""
-
-
-# http://www.comp.leeds.ac.uk/ccalas/tagsets/brown.html
-brown_tagset = """
- * - (
- - opening parenthesis
- - (
- * - )
- - closing parenthesis
- - )
- * - *
- - negator
- - not n't
- * - ,
- - comma
- - ,
- * - --
- - dash
- - --
- * - .
- - sentence terminator
- - . ? ; ! :
- * - :
- - colon
- - :
- * - ABL
- - determiner/pronoun, pre-qualifier
- - quite such rather
- * - ABN
- - determiner/pronoun, pre-quantifier
- - all half many nary
- * - ABX
- - determiner/pronoun, double conjunction or pre-quantifier
- - both
- * - AP
- - determiner/pronoun, post-determiner
- - many other next more last former little several enough
- most least only very few fewer past same Last latter
- less single plenty 'nough lesser certain various manye
- next-to-last particular final previous present nuf
- * - AP$
- - determiner/pronoun, post-determiner, genitive
- - other's
- * - AP+AP
- - determiner/pronoun, post-determiner, hyphenated pair
- - many-much
- * - AT
- - article
- - the an no a every th' ever' ye
- * - BE
- - verb 'to be', infinitive or imperative
- - be
- * - BED
- - verb 'to be', past tense, 2nd person singular or all persons
- plural
- - were
- * - BED*
- - verb 'to be', past tense, 2nd person singular or all persons
- plural, negated
- - weren't
- * - BEDZ
- - verb 'to be', past tense, 1st and 3rd person singular
- - was
- * - BEDZ*
- - verb 'to be', past tense, 1st and 3rd person singular, negated
- - wasn't
- * - BEG
- - verb 'to be', present participle or gerund
- - being
- * - BEM
- - verb 'to be', present tense, 1st person singular
- - am
- * - BEM*
- - verb 'to be', present tense, 1st person singular, negated
- - ain't
- * - BEN
- - verb 'to be', past participle
- - been
- * - BER
- - verb 'to be', present tense, 2nd person singular or all persons
- plural
- - are art
- * - BER*
- - verb 'to be', present tense, 2nd person singular or all persons
- plural, negated
- - aren't ain't
- * - BEZ
- - verb 'to be', present tense, 3rd person singular
- - is
- * - BEZ*
- - verb 'to be', present tense, 3rd person singular, negated
- - isn't ain't
- * - CC
- - conjunction, coordinating
- - and or but plus & either neither nor yet 'n' and/or minus an'
- * - CD
- - numeral, cardinal
- - two one 1 four 2 1913 71 74 637 1937 8 five three million
- 87-31 29-5 seven 1,119 fifty-three 7.5 billion hundred 125,000
- 1,700 60 100 six ...
- * - CD$
- - numeral, cardinal, genitive
- - 1960's 1961's .404's
- * - CS
- - conjunction, subordinating
- - that as after whether before while like because if since for
- than altho until so unless though providing once lest s'posin'
- till whereas whereupon supposing tho' albeit then so's 'fore
- * - DO
- - verb 'to do', uninflected present tense, infinitive or
- imperative
- - do dost
- * - DO*
- - verb 'to do', uninflected present tense or imperative, negated
- - don't
- * - DO+PPSS
- - verb 'to do', past or present tense + pronoun, personal,
- nominative, not 3rd person singular
- - d'you
- * - DOD
- - verb 'to do', past tense
- - did done
- * - DOD*
- - verb 'to do', past tense, negated
- - didn't
- * - DOZ
- - verb 'to do', present tense, 3rd person singular
- - does
- * - DOZ*
- - verb 'to do', present tense, 3rd person singular, negated
- - doesn't don't
- * - DT
- - determiner/pronoun, singular
- - this each another that 'nother
- * - DT$
- - determiner/pronoun, singular, genitive
- - another's
- * - DT+BEZ
- - determiner/pronoun + verb 'to be', present tense, 3rd person
- singular
- - that's
- * - DT+MD
- - determiner/pronoun + modal auxillary
- - that'll this'll
- * - DTI
- - determiner/pronoun, singular or plural
- - any some
- * - DTS
- - determiner/pronoun, plural
- - these those them
- * - DTS+BEZ
- - pronoun, plural + verb 'to be', present tense, 3rd person
- singular
- - them's
- * - DTX
- - determiner, pronoun or double conjunction
- - neither either one
- * - EX
- - existential there
- - there
- * - EX+BEZ
- - existential there + verb 'to be', present tense, 3rd person
- singular
- - there's
- * - EX+HVD
- - existential there + verb 'to have', past tense
- - there'd
- * - EX+HVZ
- - existential there + verb 'to have', present tense, 3rd person
- singular
- - there's
- * - EX+MD
- - existential there + modal auxillary
- - there'll there'd
- * - FW-*
- - foreign word: negator
- - pas non ne
- * - FW-AT
- - foreign word: article
- - la le el un die der ein keine eine das las les Il
- * - FW-AT+NN
- - foreign word: article + noun, singular, common
- - l'orchestre l'identite l'arcade l'ange l'assistance l'activite
- L'Universite l'independance L'Union L'Unita l'osservatore
- * - FW-AT+NP
- - foreign word: article + noun, singular, proper
- - L'Astree L'Imperiale
- * - FW-BE
- - foreign word: verb 'to be', infinitive or imperative
- - sit
- * - FW-BER
- - foreign word: verb 'to be', present tense, 2nd person singular
- or all persons plural
- - sind sunt etes
- * - FW-BEZ
- - foreign word: verb 'to be', present tense, 3rd person singular
- - ist est
- * - FW-CC
- - foreign word: conjunction, coordinating
- - et ma mais und aber och nec y
- * - FW-CD
- - foreign word: numeral, cardinal
- - une cinq deux sieben unam zwei
- * - FW-CS
- - foreign word: conjunction, subordinating
- - bevor quam ma
- * - FW-DT
- - foreign word: determiner/pronoun, singular
- - hoc
- * - FW-DT+BEZ
- - foreign word: determiner + verb 'to be', present tense, 3rd
- person singular
- - c'est
- * - FW-DTS
- - foreign word: determiner/pronoun, plural
- - haec
- * - FW-HV
- - foreign word: verb 'to have', present tense, not 3rd person
- singular
- - habe
- * - FW-IN
- - foreign word: preposition
- - ad de en a par con dans ex von auf super post sine sur sub avec
- per inter sans pour pendant in di
- * - FW-IN+AT
- - foreign word: preposition + article
- - della des du aux zur d'un del dell'
- * - FW-IN+NN
- - foreign word: preposition + noun, singular, common
- - d'etat d'hotel d'argent d'identite d'art
- * - FW-IN+NP
- - foreign word: preposition + noun, singular, proper
- - d'Yquem d'Eiffel
- * - FW-JJ
- - foreign word: adjective
- - avant Espagnol sinfonica Siciliana Philharmonique grand publique
- haute noire bouffe Douce meme humaine bel serieuses royaux
- anticus presto Sovietskaya Bayerische comique schwarzen ...
- * - FW-JJR
- - foreign word: adjective, comparative
- - fortiori
- * - FW-JJT
- - foreign word: adjective, superlative
- - optimo
- * - FW-NN
- - foreign word: noun, singular, common
- - ballet esprit ersatz mano chatte goutte sang Fledermaus oud def
- kolkhoz roi troika canto boite blutwurst carne muzyka bonheur
- monde piece force ...
- * - FW-NN$
- - foreign word: noun, singular, common, genitive
- - corporis intellectus arte's dei aeternitatis senioritatis curiae
- patronne's chambre's
- * - FW-NNS
- - foreign word: noun, plural, common
- - al culpas vopos boites haflis kolkhozes augen tyrannis
- alpha-beta-gammas metis banditos rata phis negociants crus
- Einsatzkommandos kamikaze wohaws sabinas zorrillas palazzi
- engages coureurs corroborees yori Ubermenschen ...
- * - FW-NP
- - foreign word: noun, singular, proper
- - Karshilama Dieu Rundfunk Afrique Espanol Afrika Spagna Gott
- Carthago deus
- * - FW-NPS
- - foreign word: noun, plural, proper
- - Svenskarna Atlantes Dieux
- * - FW-NR
- - foreign word: noun, singular, adverbial
- - heute morgen aujourd'hui hoy
- * - FW-OD
- - foreign word: numeral, ordinal
- - 18e 17e quintus
- * - FW-PN
- - foreign word: pronoun, nominal
- - hoc
- * - FW-PP$
- - foreign word: determiner, possessive
- - mea mon deras vos
- * - FW-PPL
- - foreign word: pronoun, singular, reflexive
- - se
- * - FW-PPL+VBZ
- - foreign word: pronoun, singular, reflexive + verb, present
- tense, 3rd person singular
- - s'excuse s'accuse
- * - FW-PPO
- - pronoun, personal, accusative
- - lui me moi mi
- * - FW-PPO+IN
- - foreign word: pronoun, personal, accusative + preposition
- - mecum tecum
- * - FW-PPS
- - foreign word: pronoun, personal, nominative, 3rd person singular
- - il
- * - FW-PPSS
- - foreign word: pronoun, personal, nominative, not 3rd person
- singular
- - ich vous sie je
- * - FW-PPSS+HV
- - foreign word: pronoun, personal, nominative, not 3rd person
- singular + verb 'to have', present tense, not 3rd person
- singular
- - j'ai
- * - FW-QL
- - foreign word: qualifier
- - minus
- * - FW-RB
- - foreign word: adverb
- - bas assai deja um wiederum cito velociter vielleicht
- simpliciter non zu domi nuper sic forsan olim oui semper
- tout despues hors
- * - FW-RB+CC
- - foreign word: adverb + conjunction, coordinating
- - forisque
- * - FW-TO+VB
- - foreign word: infinitival to + verb, infinitive
- - d'entretenir
- * - FW-UH
- - foreign word: interjection
- - sayonara bien adieu arigato bonjour adios bueno tchalo ciao o
- * - FW-VB
- - foreign word: verb, present tense, not 3rd person singular,
- imperative or infinitive
- - nolo contendere vive fermate faciunt esse vade noli tangere
- dites duces meminisse iuvabit gosaimasu voulez habla
- ksu'u'peli'afo lacheln miuchi say allons strafe portant
- * - FW-VBD
- - foreign word: verb, past tense
- - stabat peccavi audivi
- * - FW-VBG
- - foreign word: verb, present participle or gerund
- - nolens volens appellant seq. obliterans servanda dicendi
- delenda
- * - FW-VBN
- - foreign word: verb, past participle
- - vue verstrichen rasa verboten engages
- * - FW-VBZ
- - foreign word: verb, present tense, 3rd person singular
- - gouverne sinkt sigue diapiace
- * - FW-WDT
- - foreign word: WH-determiner
- - quo qua quod que quok
- * - FW-WPO
- - foreign word: WH-pronoun, accusative
- - quibusdam
- * - FW-WPS
- - foreign word: WH-pronoun, nominative
- - qui
- * - HV
- - verb 'to have', uninflected present tense, infinitive or
- imperative
- - have hast
- * - HV*
- - verb 'to have', uninflected present tense or imperative, negated
- - haven't ain't
- * - HV+TO
- - verb 'to have', uninflected present tense + infinitival to
- - hafta
- * - HVD
- - verb 'to have', past tense
- - had
- * - HVD*
- - verb 'to have', past tense, negated
- - hadn't
- * - HVG
- - verb 'to have', present participle or gerund
- - having
- * - HVN
- - verb 'to have', past participle
- - had
- * - HVZ
- - verb 'to have', present tense, 3rd person singular
- - has hath
- * - HVZ*
- - verb 'to have', present tense, 3rd person singular, negated
- - hasn't ain't
- * - IN
- - preposition
- - of in for by considering to on among at through with under
- into regarding than since despite according per before toward
- against as after during including between without except upon
- out over ...
- * - IN+IN
- - preposition, hyphenated pair
- - f'ovuh
- * - IN+PPO
- - preposition + pronoun, personal, accusative
- - t'hi-im
- * - JJ
- - adjective
- - ecent over-all possible hard-fought favorable hard meager fit
- such widespread outmoded inadequate ambiguous grand clerical
- effective orderly federal foster general proportionate ...
- * - JJ$
- - adjective, genitive
- - Great's
- * - JJ+JJ
- - adjective, hyphenated pair
- - big-large long-far
- * - JJR
- - adjective, comparative
- - greater older further earlier later freer franker wider better
- deeper firmer tougher faster higher bigger worse younger
- lighter nicer slower happier frothier Greater newer Elder ...
- * - JJR+CS
- - adjective + conjunction, coordinating
- - lighter'n
- * - JJS
- - adjective, semantically superlative
- - top chief principal northernmost master key head main
- tops utmost innermost foremost uppermost paramount topmost
- * - JJT
- - adjective, superlative
- - best largest coolest calmest latest greatest earliest simplest
- strongest newest fiercest unhappiest worst youngest worthiest
- fastest hottest fittest lowest finest smallest staunchest ...
- * - MD
- - modal auxillary
- - should may might will would must can could shall ought need
- wilt
- * - MD*
- - modal auxillary, negated
- - cannot couldn't wouldn't can't won't shouldn't shan't mustn't
- musn't
- * - MD+HV
- - modal auxillary + verb 'to have', uninflected form
- - shouldda musta coulda must've woulda could've
- * - MD+PPSS
- - modal auxillary + pronoun, personal, nominative, not 3rd person
- singular
- - willya
- * - MD+TO
- - modal auxillary + infinitival to
- - oughta
- * - NN
- - noun, singular, common
- - failure burden court fire appointment awarding compensation
- Mayor interim committee fact effect airport management
- surveillance jail doctor intern extern night weekend duty
- legislation Tax Office ...
- * - NN$
- - noun, singular, common, genitive
- - season's world's player's night's chapter's golf's football's
- baseball's club's U.'s coach's bride's bridegroom's board's
- county's firm's company's superintendent's mob's Navy's ...
- * - NN+BEZ
- - noun, singular, common + verb 'to be', present tense, 3rd
- person singular
- - water's camera's sky's kid's Pa's heat's throat's father's
- money's undersecretary's granite's level's wife's fat's
- Knife's fire's name's hell's leg's sun's roulette's cane's
- guy's kind's baseball's ...
- * - NN+HVD
- - noun, singular, common + verb 'to have', past tense
- - Pa'd
- * - NN+HVZ
- - noun, singular, common + verb 'to have', present tense, 3rd
- person singular
- - guy's Knife's boat's summer's rain's company's
- * - NN+IN
- - noun, singular, common + preposition
- - buncha
- * - NN+MD
- - noun, singular, common + modal auxillary
- - cowhand'd sun'll
- * - NN+NN
- - noun, singular, common, hyphenated pair
- - stomach-belly
- * - NNS
- - noun, plural, common
- - irregularities presentments thanks reports voters laws
- legislators years areas adjustments chambers $100 bonds
- courts sales details raises sessions members congressmen
- votes polls calls ...
- * - NNS$
- - noun, plural, common, genitive
- - taxpayers' children's members' States' women's cutters'
- motorists' steelmakers' hours' Nations' lawyers' prisoners'
- architects' tourists' Employers' secretaries' Rogues' ...
- * - NNS+MD
- - noun, plural, common + modal auxillary
- - duds'd oystchers'll
- * - NP
- - noun, singular, proper
- - Fulton Atlanta September-October Durwood Pye Ivan Allen Jr.
- Jan. Alpharetta Grady William B. Hartsfield Pearl Williams
- Aug. Berry J. M. Cheshire Griffin Opelika Ala. E. Pelham
- Snodgrass ...
- * - NP$
- - noun, singular, proper, genitive
- - Green's Landis' Smith's Carreon's Allison's Boston's Spahn's
- Willie's Mickey's Milwaukee's Mays' Howsam's Mantle's Shaw's
- Wagner's Rickey's Shea's Palmer's Arnold's Broglio's ...
- * - NP+BEZ
- - noun, singular, proper + verb 'to be', present tense, 3rd person
- singular
- - W.'s Ike's Mack's Jack's Kate's Katharine's Black's Arthur's
- Seaton's Buckhorn's Breed's Penny's Rob's Kitty's Blackwell's
- Myra's Wally's Lucille's Springfield's Arlene's
- * - NP+HVZ
- - noun, singular, proper + verb 'to have', present tense, 3rd
- person singular
- - Bill's Guardino's Celie's Skolman's Crosson's Tim's Wally's
- * - NP+MD
- - noun, singular, proper + modal auxillary
- - Gyp'll John'll
- * - NPS
- - noun, plural, proper
- - Chases Aderholds Chapelles Armisteads Lockies Carbones French
- Marskmen Toppers Franciscans Romans Cadillacs Masons Blacks
- Catholics British Dixiecrats Mississippians Congresses ...
- * - NPS$
- - noun, plural, proper, genitive
- - Republicans' Orioles' Birds' Yanks' Redbirds' Bucs' Yankees'
- Stevenses' Geraghtys' Burkes' Wackers' Achaeans' Dresbachs'
- Russians' Democrats' Gershwins' Adventists' Negroes'
- Catholics' ...
- * - NR
- - noun, singular, adverbial
- - Friday home Wednesday Tuesday Monday Sunday Thursday yesterday
- tomorrow tonight West East Saturday west left east downtown
- north northeast southeast northwest North South right ...
- * - NR$
- - noun, singular, adverbial, genitive
- - Saturday's Monday's yesterday's tonight's tomorrow's Sunday's
- Wednesday's Friday's today's Tuesday's West's Today's South's
- * - NR+MD
- - noun, singular, adverbial + modal auxillary
- - today'll
- * - NRS
- - noun, plural, adverbial
- - Sundays Mondays Saturdays Wednesdays Souths Fridays
- * - OD
- - numeral, ordinal
- - first 13th third nineteenth 2d 61st second sixth eighth ninth
- twenty-first eleventh 50th eighteenth- Thirty-ninth 72nd
- 1/20th twentieth mid-19th thousandth 350th sixteenth 701st ...
- * - PN
- - pronoun, nominal
- - none something everything one anyone nothing nobody everybody
- everyone anybody anything someone no-one nothin
- * - PN$
- - pronoun, nominal, genitive
- - one's someone's anybody's nobody's everybody's anyone's
- everyone's
- * - PN+BEZ
- - pronoun, nominal + verb 'to be', present tense, 3rd person
- singular
- - nothing's everything's somebody's nobody's someone's
- * - PN+HVD
- - pronoun, nominal + verb 'to have', past tense
- - nobody'd
- * - PN+HVZ
- - pronoun, nominal + verb 'to have', present tense, 3rd person
- singular
- - nobody's somebody's one's
- * - PN+MD
- - pronoun, nominal + modal auxillary
- - someone'll somebody'll anybody'd
- * - PP$
- - determiner, possessive
- - our its his their my your her out thy mine thine
- * - PP$$
- - pronoun, possessive
- - ours mine his hers theirs yours
- * - PPL
- - pronoun, singular, reflexive
- - itself himself myself yourself herself oneself ownself
- * - PPLS
- - pronoun, plural, reflexive
- - themselves ourselves yourselves
- * - PPO
- - pronoun, personal, accusative
- - them it him me us you 'em her thee we'uns
- * - PPS
- - pronoun, personal, nominative, 3rd person singular
- - it he she thee
- * - PPS+BEZ
- - pronoun, personal, nominative, 3rd person singular + verb 'to
- be', present tense, 3rd person singular
- - it's he's she's
- * - PPS+HVD
- - pronoun, personal, nominative, 3rd person singular + verb 'to
- have', past tense
- - she'd he'd it'd
- * - PPS+HVZ
- - pronoun, personal, nominative, 3rd person singular + verb 'to
- have', present tense, 3rd person singular
- - it's he's she's
- * - PPS+MD
- - pronoun, personal, nominative, 3rd person singular + modal
- auxillary
- - he'll she'll it'll he'd it'd she'd
- * - PPSS
- - pronoun, personal, nominative, not 3rd person singular
- - they we I you ye thou you'uns
- * - PPSS+BEM
- - pronoun, personal, nominative, not 3rd person singular + verb
- 'to be', present tense, 1st person singular
- - I'm Ahm
- * - PPSS+BER
- - pronoun, personal, nominative, not 3rd person singular + verb
- 'to be', present tense, 2nd person singular or all persons
- plural
- - we're you're they're
- * - PPSS+BEZ
- - pronoun, personal, nominative, not 3rd person singular + verb
- 'to be', present tense, 3rd person singular
- - you's
- * - PPSS+BEZ*
- - pronoun, personal, nominative, not 3rd person singular + verb
- 'to be', present tense, 3rd person singular, negated
- - 'tain't
- * - PPSS+HV
- - pronoun, personal, nominative, not 3rd person singular + verb
- 'to have', uninflected present tense
- - I've we've they've you've
- * - PPSS+HVD
- - pronoun, personal, nominative, not 3rd person singular + verb
- 'to have', past tense
- - I'd you'd we'd they'd
- * - PPSS+MD
- - pronoun, personal, nominative, not 3rd person singular + modal
- auxillary
- - you'll we'll I'll we'd I'd they'll they'd you'd
- * - PPSS+VB
- - pronoun, personal, nominative, not 3rd person singular + verb
- 'to verb', uninflected present tense
- - y'know
- * - QL
- - qualifier, pre
- - well less very most so real as highly fundamentally even how
- much remarkably somewhat more completely too thus ill deeply
- little overly halfway almost impossibly far severly such ...
- * - QLP
- - qualifier, post
- - indeed enough still 'nuff
- * - RB
- - adverb
- - only often generally also nevertheless upon together back
- newly no likely meanwhile near then heavily there apparently
- yet outright fully aside consistently specifically formally
- ever just ...
- * - RB$
- - adverb, genitive
- - else's
- * - RB+BEZ
- - adverb + verb 'to be', present tense, 3rd person singular
- - here's there's
- * - RB+CS
- - adverb + conjunction, coordinating
- - well's soon's
- * - RBR
- - adverb, comparative
- - further earlier better later higher tougher more harder longer
- sooner less faster easier louder farther oftener nearer cheaper
- slower tighter lower worse heavier quicker ...
- * - RBR+CS
- - adverb, comparative + conjunction, coordinating
- - more'n
- * - RBT
- - adverb, superlative
- - most best highest uppermost nearest brightest hardest fastest
- deepest farthest loudest ...
- * - RN
- - adverb, nominal
- - here afar then
- * - RP
- - adverb, particle
- - up out off down over on in about through across after
- * - RP+IN
- - adverb, particle + preposition
- - out'n outta
- * - TO
- - infinitival to
- - to t'
- * - TO+VB
- - infinitival to + verb, infinitive
- - t'jawn t'lah
- * - UH
- - interjection
- - Hurrah bang whee hmpf ah goodbye oops oh-the-pain-of-it ha
- crunch say oh why see well hello lo alas tarantara
- rum-tum-tum gosh hell keerist Jesus Keeeerist boy c'mon 'mon
- goddamn bah hoo-pig damn ...
- * - VB
- - verb, base: uninflected present, imperative or infinitive
- - investigate find act follow inure achieve reduce take remedy
- re-set distribute realize disable feel receive continue place
- protect eliminate elaborate work permit run enter force ...
- * - VB+AT
- - verb, base: uninflected present or infinitive + article
- - wanna
- * - VB+IN
- - verb, base: uninflected present, imperative or infinitive +
- preposition
- - lookit
- * - VB+JJ
- - verb, base: uninflected present, imperative or infinitive +
- adjective
- - die-dead
- * - VB+PPO
- - verb, uninflected present tense + pronoun, personal, accusative
- - let's lemme gimme
- * - VB+RP
- - verb, imperative + adverbial particle
- - g'ahn c'mon
- * - VB+TO
- - verb, base: uninflected present, imperative or infinitive +
- infinitival to
- - wanta wanna
- * - VB+VB
- - verb, base: uninflected present, imperative or infinitive;
- hypenated pair
- - say-speak
- * - VBD
- - verb, past tense
- - said produced took recommended commented urged found added
- praised charged listed became announced brought attended
- wanted voted defeated received got stood shot scheduled
- feared promised made ...
- * - VBG
- - verb, present participle or gerund
- - modernizing improving purchasing Purchasing lacking enabling
- pricing keeping getting picking entering voting warning making
- strengthening setting neighboring attending participating
- moving ...
- * - VBG+TO
- - verb, present participle + infinitival to
- - gonna
- * - VBN
- - verb, past participle
- - conducted charged won received studied revised operated
- accepted combined experienced recommended effected granted
- seen protected adopted retarded notarized selected composed
- gotten printed ...
- * - VBN+TO
- - verb, past participle + infinitival to
- - gotta
- * - VBZ
- - verb, present tense, 3rd person singular
- - deserves believes receives takes goes expires says opposes
- starts permits expects thinks faces votes teaches holds calls
- fears spends collects backs eliminates sets flies gives seeks
- reads ...
- * - WDT
- - WH-determiner
- - which what whatever whichever whichever-the-hell
- * - WDT+BER
- - WH-determiner + verb 'to be', present tense, 2nd person
- singular or all persons plural
- - what're
- * - WDT+BER+PP
- - WH-determiner + verb 'to be', present, 2nd person singular or
- all persons plural + pronoun, personal, nominative, not 3rd
- person singular
- - whaddya
- * - WDT+BEZ
- - WH-determiner + verb 'to be', present tense, 3rd person
- singular
- - what's
- * - WDT+DO+PPS
- - WH-determiner + verb 'to do', uninflected present tense +
- pronoun, personal, nominative, not 3rd person singular
- - whaddya
- * - WDT+DOD
- - WH-determiner + verb 'to do', past tense
- - what'd
- * - WDT+HVZ
- - WH-determiner + verb 'to have', present tense, 3rd person
- singular
- - what's
- * - WP$
- - WH-pronoun, genitive
- - whose whosever
- * - WPO
- - WH-pronoun, accusative
- - whom that who
- * - WPS
- - WH-pronoun, nominative
- - that who whoever whosoever what whatsoever
- * - WPS+BEZ
- - WH-pronoun, nominative + verb 'to be', present, 3rd person
- singular
- - that's who's
- * - WPS+HVD
- - WH-pronoun, nominative + verb 'to have', past tense
- - who'd
- * - WPS+HVZ
- - WH-pronoun, nominative + verb 'to have', present tense, 3rd
- person singular
- - who's that's
- * - WPS+MD
- - WH-pronoun, nominative + modal auxillary
- - who'll that'd who'd that'll
- * - WQL
- - WH-qualifier
- - however how
- * - WRB
- - WH-adverb
- - however when where why whereby wherever how whenever
- whereon wherein wherewith wheare wherefore whereof howsabout
- * - WRB+BER
- - WH-adverb + verb 'to be', present, 2nd person singular or all
- persons plural
- - where're
- * - WRB+BEZ
- - WH-adverb + verb 'to be', present, 3rd person singular
- - how's where's
- * - WRB+DO
- - WH-adverb + verb 'to do', present, not 3rd person singular
- - howda
- * - WRB+DOD
- - WH-adverb + verb 'to do', past tense
- - where'd how'd
- * - WRB+DOD*
- - WH-adverb + verb 'to do', past tense, negated
- - whyn't
- * - WRB+DOZ
- - WH-adverb + verb 'to do', present tense, 3rd person singular
- - how's
- * - WRB+IN
- - WH-adverb + preposition
- - why'n
- * - WRB+MD
- - WH-adverb + modal auxillary
- - where'd
-"""
-
-# http://ucrel.lancs.ac.uk/claws5tags.html
-claws5_tagset = """
- * - AJ0
- - adjective (unmarked)
- - good, old
- * - AJC
- - comparative adjective
- - better, older
- * - AJS
- - superlative adjective
- - best, oldest
- * - AT0
- - article
- - THE, A, AN
- * - AV0
- - adverb (unmarked)
- - often, well, longer, furthest
- * - AVP
- - adverb particle
- - up, off, out
- * - AVQ
- - wh-adverb
- - when, how, why
- * - CJC
- - coordinating conjunction
- - and, or
- * - CJS
- - subordinating conjunction
- - although, when
- * - CJT
- - the conjunction THAT
- - that
- * - CRD
- - cardinal numeral
- - 3, fifty-five, 6609 (excl one)
- * - DPS
- - possessive determiner form
- - your, their
- * - DT0
- - general determiner
- - these, some
- * - DTQ
- - wh-determiner
- - whose, which
- * - EX0
- - existential THERE
- - there
- * - ITJ
- - interjection or other isolate
- - oh, yes, mhm
- * - NN0
- - noun (neutral for number)
- - aircraft, data
- * - NN1
- - singular noun
- - pencil, goose
- * - NN2
- - plural noun
- - pencils, geese
- * - NP0
- - proper noun
- - London, Michael, Mars
- * - NULL
- - the null tag (for items not to be tagged)
- -
- * - ORD
- - ordinal
- - sixth, 77th, last
- * - PNI
- - indefinite pronoun
- - none, everything
- * - PNP
- - personal pronoun
- - you, them, ours
- * - PNQ
- - wh-pronoun
- - who, whoever
- * - PNX
- - reflexive pronoun
- - itself, ourselves
- * - POS
- - the possessive (or genitive morpheme)
- - 's or '
- * - PRF
- - the preposition OF
- - of
- * - PRP
- - preposition (except for OF)
- - for, above, to
- * - PUL
- - punctuation - left bracket
- - ( or [ )
- * - PUN
- - punctuation - general mark
- - . ! , : ; - ? ...
- * - PUQ
- - punctuation - quotation mark
- - ` ' "
- * - PUR
- - punctuation - right bracket
- - ) or ]
- * - TO0
- - infinitive marker TO
- - to
- * - UNC
- - "unclassified" items which are not words of the English lexicon
- -
- * - VBB
- - the "base forms" of the verb "BE" (except the infinitive)
- - am, are
- * - VBD
- - past form of the verb "BE"
- - was, were
- * - VBG
- - -ing form of the verb "BE"
- - being
- * - VBI
- - infinitive of the verb "BE"
- - be
- * - VBN
- - past participle of the verb "BE"
- - been
- * - VBZ
- - -s form of the verb "BE"
- - is, 's
- * - VDB
- - base form of the verb "DO" (except the infinitive)
- - do
- * - VDD
- - past form of the verb "DO"
- - did
- * - VDG
- - -ing form of the verb "DO"
- - doing
- * - VDI
- - infinitive of the verb "DO"
- - do
- * - VDN
- - past participle of the verb "DO"
- - done
- * - VDZ
- - -s form of the verb "DO"
- - does
- * - VHB
- - base form of the verb "HAVE" (except the infinitive)
- - have
- * - VHD
- - past tense form of the verb "HAVE"
- - had, 'd
- * - VHG
- - -ing form of the verb "HAVE"
- - having
- * - VHI
- - infinitive of the verb "HAVE"
- - have
- * - VHN
- - past participle of the verb "HAVE"
- - had
- * - VHZ
- - -s form of the verb "HAVE"
- - has, 's
- * - VM0
- - modal auxiliary verb
- - can, could, will, 'll
- * - VVB
- - base form of lexical verb (except the infinitive)
- - take, live
- * - VVD
- - past tense form of lexical verb
- - took, lived
- * - VVG
- - -ing form of lexical verb
- - taking, living
- * - VVI
- - infinitive of lexical verb
- - take, live
- * - VVN
- - past participle form of lex. verb
- - taken, lived
- * - VVZ
- - -s form of lexical verb
- - takes, lives
- * - XX0
- - the negative NOT or N'T
- - not
- * - ZZ0
- - alphabetical symbol
- - A, B, c, d
-"""
-
-# http://www.comp.leeds.ac.uk/amalgam/tagsets/upenn.html
-
-upenn_tagset = """
- * - $
- - dollar
- - $ -$ --$ A$ C$ HK$ M$ NZ$ S$ U.S.$ US$
- * - ``
- - opening quotation mark
- - ` ``
- * - ''
- - closing quotation mark
- - ' ''
- * - (
- - opening parenthesis
- - ( [ {
- * - )
- - closing parenthesis
- - ) ] }
- * - ,
- - comma
- - ,
- * - --
- - dash
- - --
- * - .
- - sentence terminator
- - . ! ?
- * - :
- - colon or ellipsis
- - : ; ...
- * - CC
- - conjunction, coordinating
- - & 'n and both but either et for less minus neither nor or
- plus so therefore times v. versus vs. whether yet
- * - CD
- - numeral, cardinal
- - mid-1890 nine-thirty forty-two one-tenth ten million 0.5 one
- forty-seven 1987 twenty '79 zero two 78-degrees eighty-four IX '60s .025
- fifteen 271,124 dozen quintillion DM2,000 ...
- * - DT
- - determiner
- - all an another any both del each either every half la many much
- nary neither no some such that the them these this those
- * - EX
- - existential there
- - there
- * - FW
- - foreign word
- - gemeinschaft hund ich jeux habeas Haementeria Herr K'ang-si
- vous lutihaw alai je jour objets salutaris fille quibusdam pas trop Monte
- terram fiche oui corporis ...
- * - IN
- - preposition or conjunction, subordinating
- - astride among uppon whether out inside pro despite on by throughout
- below within for towards near behind atop around if like until below next
- into if beside ...
- * - JJ
- - adjective or numeral, ordinal
- - third ill-mannered pre-war regrettable oiled calamitous first
- separable ectoplasmic battery-powered participatory fourth still-to-be-named
- multilingual multi-disciplinary ...
- * - JJR
- - adjective, comparative
- - bleaker braver breezier briefer brighter brisker broader bumper
- busier calmer cheaper choosier cleaner clearer closer colder commoner costlier
- cozier creamier crunchier cuter ...
- * - JJS
- - adjective, superlative
- - calmest cheapest choicest classiest cleanest clearest closest
- commonest corniest costliest crassest creepiest crudest cutest darkest
- deadliest dearest deepest densest dinkiest ...
- * - LS
- - list item marker
- - A A. B B. C C. D E F First G H I J K One SP-44001 SP-44002 SP-44005
- SP-44007 Second Third Three Two * a b c d first five four one six three
- two
- * - MD
- - modal auxiliary
- - can cannot could couldn't dare may might must need ought shall
- should shouldn't will would
- * - NN
- - noun, common, singular or mass
- - common-carrier cabbage knuckle-duster Casino afghan shed thermostat
- investment slide humour falloff slick wind hyena override subhumanity machinist
- ...
- * - NNP
- - noun, proper, singular
- - Motown Venneboerger Czestochwa Ranzer Conchita Trumplane Christos
- Oceanside Escobar Kreisler Sawyer Cougar Yvette Ervin ODI Darryl CTCA Shannon
- A.K.C. Meltex Liverpool ...
- * - NNPS
- - noun, proper, plural
- - Americans Americas Amharas Amityvilles Amusements Anarcho-Syndicalists
- Andalusians Andes Andruses Angels Animals Anthony Antilles Antiques Apache
- Apaches Apocrypha ...
- * - NNS
- - noun, common, plural
- - undergraduates scotches bric-a-brac products bodyguards facets
- coasts divestitures storehouses designs clubs fragrances averages subjectivists
- apprehensions muses factory-jobs ...
- * - PDT
- - pre-determiner
- - all both half many quite such sure this
- * - POS
- - genitive marker
- - ' 's
- * - PRP
- - pronoun, personal
- - hers herself him himself hisself it itself me myself one oneself
- ours ourselves ownself self she thee theirs them themselves they thou thy
- us
- * - PRP$
- - pronoun, possessive
- - her his mine my our ours their thy your
- * - RB
- - adverb
- - occasionally unabatingly maddeningly adventurously professedly
- stirringly prominently technologically magisterially predominately swiftly
- fiscally pitilessly ...
- * - RBR
- - adverb, comparative
- - further gloomier grander graver greater grimmer harder harsher
- healthier heavier higher however larger later leaner lengthier less-perfectly
- lesser lonelier longer louder lower more ...
- * - RBS
- - adverb, superlative
- - best biggest bluntest earliest farthest first furthest hardest
- heartiest highest largest least less most nearest second tightest worst
-
- * - RP
- - particle
- - aboard about across along apart around aside at away back before
- behind by crop down ever fast for forth from go high i.e. in into just
- later low more off on open out over per pie raising start teeth that through
- under unto up up-pp upon whole with you
- * - SYM
- - symbol
- - % & ' '' ''. ) ). * + ,. < = > @ A[fj] U.S U.S.S.R * ** ***
- * - TO
- - "to" as preposition or infinitive marker
- - to
- * - UH
- - interjection
- - Goodbye Goody Gosh Wow Jeepers Jee-sus Hubba Hey Kee-reist Oops
- amen huh howdy uh dammit whammo shucks heck anyways whodunnit honey golly
- man baby diddle hush sonuvabitch ...
- * - VB
- - verb, base form
- - ask assemble assess assign assume atone attention avoid bake
- balkanize bank begin behold believe bend benefit bevel beware bless boil
- bomb boost brace break bring broil brush build ...
- * - VBD
- - verb, past tense
- - dipped pleaded swiped regummed soaked tidied convened halted
- registered cushioned exacted snubbed strode aimed adopted belied figgered
- speculated wore appreciated contemplated ...
- * - VBG
- - verb, present participle or gerund
- - telegraphing stirring focusing angering judging stalling lactating
- hankerin' alleging veering capping approaching traveling besieging encrypting
- interrupting erasing wincing ...
- * - VBN
- - verb, past participle
- - multihulled dilapidated aerosolized chaired languished panelized
- used experimented flourished imitated reunifed factored condensed sheared
- unsettled primed dubbed desired ...
- * - VBP
- - verb, present tense, not 3rd person singular
- - predominate wrap resort sue twist spill cure lengthen brush
- terminate appear tend stray glisten obtain comprise detest tease attract
- emphasize mold postpone sever return wag ...
- * - VBZ
- - verb, present tense, 3rd person singular
- - bases reconstructs marks mixes displeases seals carps weaves
- snatches slumps stretches authorizes smolders pictures emerges stockpiles
- seduces fizzes uses bolsters slaps speaks pleads ...
- * - WDT
- - WH-determiner
- - that what whatever which whichever
- * - WP
- - WH-pronoun
- - that what whatever whatsoever which who whom whosoever
- * - WP$
- - WH-pronoun, possessive
- - whose
- * - WRB
- - Wh-adverb
- - how however whence whenever where whereby whereever wherein
- whereof why
-"""
View
46 nltk_data/tagsets.py
@@ -1,46 +0,0 @@
-# Natural Language Toolkit (NLTK) Package for Tagset Tables
-#
-# Copyright (C) 2001-2011 NLTK Project
-# Authors: Steven Bird <sb@csse.unimelb.edu.au>
-# URL: <http://www.nltk.org/>
-# For license information, see LICENSE.TXT
-
-"""
-A package for building the Tagset Tables distributed with NLTK
-"""
-
-import re
-import pickle
-
-from tagset_data import *
-
-RECORD_SEP = " *"
-FIELD_SEP = " - "
-
-TAGSETS = {'upenn_tagset': upenn_tagset,
- 'brown_tagset': brown_tagset,
- 'claws5_tagset': claws5_tagset}
-
-def load_tagset(s):
- tagset = {}
- entries = s.split(RECORD_SEP)
- for entry in entries:
- if FIELD_SEP in entry:
- entry = re.sub(r'(?m)\s+', ' ', entry)
- _, tag, defn, examples = entry.split(FIELD_SEP, 3)
- if tag not in tagset:
- tagset[tag] = (defn, examples)
- else:
- raise ValueError, "Duplicate tag: %s" % tag
- return tagset
-
-def build_tagsets():
- for tagset in TAGSETS:
- print "Building", tagset
- output = open(tagset + ".pickle", "w")
- tagset_dict = load_tagset(TAGSETS[tagset])
- pickle.dump(tagset_dict, output)
- output.close()
-
-if __name__ == '__main__':
- build_tagsets()
Please sign in to comment.
Something went wrong with that request. Please try again.