R package with a collection of small corpuses of interesting data, from https://github.com/dariusk/corpora
Switch branches/tags
Nothing to show
Clone or download
Fetching latest commit…
Cannot retrieve the latest commit at this time.
Permalink
Type Name Latest commit message Commit time
Failed to load latest commit information.
R
inst
man
.Rbuildignore
.gitignore
.travis.yml
DESCRIPTION
Makefile
NAMESPACE
NEWS.md
README.Rmd
README.markdown
appveyor.yml

README.markdown

rcorpora

Linux Build Status Windows Build status CRAN version CRAN RStudio mirror downloads

R package that contains all data sets from https://github.com/dariusk/corpora

Installation

install.packages("rcorpora")

Usage

Calling the corpora() function without arguments lists all data sets in the package, calling it with the name of a data set, returns the data set itself. For example

library(rcorpora)
corpora()
#>   [1] "animals/birds_antarctica"                                       
#>   [2] "animals/birds_north_america"                                    
#>   [3] "animals/cats"                                                   
#>   [4] "animals/collateral_adjectives"                                  
#>   [5] "animals/common"                                                 
#>   [6] "animals/dinosaurs"                                              
#>   [7] "animals/dog_names"                                              
#>   [8] "animals/dogs"                                                   
#>   [9] "animals/donkeys"                                                
#>  [10] "animals/horses"                                                 
#>  [11] "animals/ponies"                                                 
#>  [12] "archetypes/artifact"                                            
#>  [13] "archetypes/character"                                           
#>  [14] "archetypes/event"                                               
#>  [15] "archetypes/setting"                                             
#>  [16] "architecture/passages"                                          
#>  [17] "architecture/rooms"                                             
#>  [18] "art/isms"                                                       
#>  [19] "colors/crayola"                                                 
#>  [20] "colors/dulux"                                                   
#>  [21] "colors/google_material_colors"                                  
#>  [22] "colors/paints"                                                  
#>  [23] "colors/palettes"                                                
#>  [24] "colors/web_colors"                                              
#>  [25] "colors/xkcd"                                                    
#>  [26] "corporations/cars"                                              
#>  [27] "corporations/djia"                                              
#>  [28] "corporations/fortune500"                                        
#>  [29] "corporations/industries"                                        
#>  [30] "corporations/nasdaq"                                            
#>  [31] "corporations/newspapers"                                        
#>  [32] "divination/tarot_interpretations"                               
#>  [33] "divination/zodiac"                                              
#>  [34] "film-tv/game-of-thrones-houses"                                 
#>  [35] "film-tv/iab_categories"                                         
#>  [36] "film-tv/netflix-categories"                                     
#>  [37] "film-tv/popular-movies"                                         
#>  [38] "film-tv/tv_shows"                                               
#>  [39] "foods/apple_cultivars"                                          
#>  [40] "foods/bad_beers"                                                
#>  [41] "foods/beer_categories"                                          
#>  [42] "foods/beer_styles"                                              
#>  [43] "foods/breads_and_pastries"                                      
#>  [44] "foods/combine"                                                  
#>  [45] "foods/condiments"                                               
#>  [46] "foods/curds"                                                    
#>  [47] "foods/fruits"                                                   
#>  [48] "foods/herbs_n_spices"                                           
#>  [49] "foods/hot_peppers"                                              
#>  [50] "foods/iba_cocktails"                                            
#>  [51] "foods/menuItems"                                                
#>  [52] "foods/pizzaToppings"                                            
#>  [53] "foods/sandwiches"                                               
#>  [54] "foods/sausages"                                                 
#>  [55] "foods/scotch_whiskey"                                           
#>  [56] "foods/tea"                                                      
#>  [57] "foods/vegetable_cooking_times"                                  
#>  [58] "foods/vegetables"                                               
#>  [59] "foods/wine_descriptions"                                        
#>  [60] "games/bannedGames/argentina/bannedList"                         
#>  [61] "games/bannedGames/brazil/bannedList"                            
#>  [62] "games/bannedGames/china/bannedList"                             
#>  [63] "games/bannedGames/denmark/bannedList"                           
#>  [64] "games/cluedo"                                                   
#>  [65] "games/dark_souls_iii_messages"                                  
#>  [66] "games/jeopardy_questions"                                       
#>  [67] "games/pokemon"                                                  
#>  [68] "games/scrabble"                                                 
#>  [69] "games/street_fighter_ii"                                        
#>  [70] "games/trivial_pursuit"                                          
#>  [71] "games/wrestling_moves"                                          
#>  [72] "games/zelda"                                                    
#>  [73] "geography/canada_provinces_and_territories"                     
#>  [74] "geography/canadian_municipalities"                              
#>  [75] "geography/countries_with_capitals"                              
#>  [76] "geography/countries"                                            
#>  [77] "geography/english_towns_cities"                                 
#>  [78] "geography/japanese_prefectures"                                 
#>  [79] "geography/london_underground_stations"                          
#>  [80] "geography/nationalities"                                        
#>  [81] "geography/norwegian_cities"                                     
#>  [82] "geography/nyc_neighborhood_zips"                                
#>  [83] "geography/oceans"                                               
#>  [84] "geography/rivers"                                               
#>  [85] "geography/sf_neighborhoods"                                     
#>  [86] "geography/us_airport_codes"                                     
#>  [87] "geography/us_cities"                                            
#>  [88] "geography/us_counties"                                          
#>  [89] "geography/us_metropolitan_areas"                                
#>  [90] "geography/us_state_capitals"                                    
#>  [91] "geography/venues"                                               
#>  [92] "geography/winds"                                                
#>  [93] "governments/mass-surveillance-project-names"                    
#>  [94] "governments/nsa_projects"                                       
#>  [95] "governments/uk_political_parties"                               
#>  [96] "governments/us_federal_agencies"                                
#>  [97] "governments/us_mil_operations"                                  
#>  [98] "humans/2016_us_presidential_candidates"                         
#>  [99] "humans/atus_activities"                                         
#> [100] "humans/authors"                                                 
#> [101] "humans/bodyParts"                                               
#> [102] "humans/britishActors"                                           
#> [103] "humans/celebrities"                                             
#> [104] "humans/descriptions"                                            
#> [105] "humans/englishHonorifics"                                       
#> [106] "humans/famousDuos"                                              
#> [107] "humans/firstNames"                                              
#> [108] "humans/lastNames"                                               
#> [109] "humans/moods"                                                   
#> [110] "humans/norwayFirstNamesBoys"                                    
#> [111] "humans/norwayFirstNamesGirls"                                   
#> [112] "humans/norwayLastNames"                                         
#> [113] "humans/occupations"                                             
#> [114] "humans/prefixes"                                                
#> [115] "humans/richpeople"                                              
#> [116] "humans/scientists"                                              
#> [117] "humans/spanishFirstNames"                                       
#> [118] "humans/spanishLastNames"                                        
#> [119] "humans/spinalTapDrummers"                                       
#> [120] "humans/suffixes"                                                
#> [121] "humans/thirdPersonPronouns"                                     
#> [122] "humans/tolkienCharacterNames"                                   
#> [123] "humans/us_presidents"                                           
#> [124] "humans/wrestlers"                                               
#> [125] "instructions/laundry_care"                                      
#> [126] "materials/abridged-body-fluids"                                 
#> [127] "materials/building-materials"                                   
#> [128] "materials/carbon-allotropes"                                    
#> [129] "materials/decorative-stones"                                    
#> [130] "materials/fabrics"                                              
#> [131] "materials/fibers"                                               
#> [132] "materials/gemstones"                                            
#> [133] "materials/layperson-metals"                                     
#> [134] "materials/metals"                                               
#> [135] "materials/natural-materials"                                    
#> [136] "materials/packaging"                                            
#> [137] "materials/plastic-brands"                                       
#> [138] "materials/sculpture-materials"                                  
#> [139] "materials/technical-fabrics"                                    
#> [140] "mathematics/fibonnaciSequence"                                  
#> [141] "mathematics/primes_binary"                                      
#> [142] "mathematics/primes"                                             
#> [143] "mathematics/trigonometry"                                       
#> [144] "medicine/diagnoses"                                             
#> [145] "medicine/drugNameStems"                                         
#> [146] "medicine/drugs"                                                 
#> [147] "medicine/hospitals"                                             
#> [148] "music/a_list_of_guitar_manufacturers"                           
#> [149] "music/bands_that_have_opened_for_tool"                          
#> [150] "music/female_classical_guitarists"                              
#> [151] "music/genres"                                                   
#> [152] "music/hamilton_musical_obcrecording_actors_characters"          
#> [153] "music/instruments"                                              
#> [154] "music/mtv_day_one"                                              
#> [155] "music/rock_hall_of_fame"                                        
#> [156] "music/xxl_freshman"                                             
#> [157] "mythology/greek_gods"                                           
#> [158] "mythology/greek_monsters"                                       
#> [159] "mythology/greek_myths_master"                                   
#> [160] "mythology/greek_titans"                                         
#> [161] "mythology/hebrew_god"                                           
#> [162] "mythology/lovecraft"                                            
#> [163] "mythology/monsters"                                             
#> [164] "mythology/norse_gods"                                           
#> [165] "objects/clothing"                                               
#> [166] "objects/corpora_winners"                                        
#> [167] "objects/objects"                                                
#> [168] "plants/cannabis"                                                
#> [169] "plants/flowers"                                                 
#> [170] "plants/plants"                                                  
#> [171] "religion/christian_saints"                                      
#> [172] "religion/fictional_religions"                                   
#> [173] "religion/parody_religions"                                      
#> [174] "religion/religions"                                             
#> [175] "science/elements"                                               
#> [176] "science/hail_size"                                              
#> [177] "science/minor_planets"                                          
#> [178] "science/planets"                                                
#> [179] "science/pregnancy"                                              
#> [180] "science/toxic_chemicals"                                        
#> [181] "science/weather_conditions"                                     
#> [182] "societies_and_groups/animal_welfare"                            
#> [183] "societies_and_groups/designated_terrorist_groups/australia"     
#> [184] "societies_and_groups/designated_terrorist_groups/canada"        
#> [185] "societies_and_groups/designated_terrorist_groups/china"         
#> [186] "societies_and_groups/designated_terrorist_groups/egypt"         
#> [187] "societies_and_groups/designated_terrorist_groups/european_union"
#> [188] "societies_and_groups/designated_terrorist_groups/india"         
#> [189] "societies_and_groups/designated_terrorist_groups/iran"          
#> [190] "societies_and_groups/designated_terrorist_groups/israel"        
#> [191] "societies_and_groups/designated_terrorist_groups/kazakhstan"    
#> [192] "societies_and_groups/designated_terrorist_groups/russia"        
#> [193] "societies_and_groups/designated_terrorist_groups/saudi_arabia"  
#> [194] "societies_and_groups/designated_terrorist_groups/tunisia"       
#> [195] "societies_and_groups/designated_terrorist_groups/turkey"        
#> [196] "societies_and_groups/designated_terrorist_groups/uae"           
#> [197] "societies_and_groups/designated_terrorist_groups/ukraine"       
#> [198] "societies_and_groups/designated_terrorist_groups/united_kingdom"
#> [199] "societies_and_groups/designated_terrorist_groups/united_nations"
#> [200] "societies_and_groups/designated_terrorist_groups/united_states" 
#> [201] "societies_and_groups/fraternities/coeducational_fraternities"   
#> [202] "societies_and_groups/fraternities/defunct"                      
#> [203] "societies_and_groups/fraternities/fraternities"                 
#> [204] "societies_and_groups/fraternities/professional"                 
#> [205] "societies_and_groups/fraternities/service"                      
#> [206] "societies_and_groups/fraternities/sororities"                   
#> [207] "societies_and_groups/semi_secret"                               
#> [208] "sports/football/epl_teams"                                      
#> [209] "sports/football/laliga_teams"                                   
#> [210] "sports/football/serieA"                                         
#> [211] "sports/mlb_teams"                                               
#> [212] "sports/nba_mvps"                                                
#> [213] "sports/nba_teams"                                               
#> [214] "sports/nfl_teams"                                               
#> [215] "sports/nhl_teams"                                               
#> [216] "sports/olympics"                                                
#> [217] "technology/appliances"                                          
#> [218] "technology/computer_sciences"                                   
#> [219] "technology/fireworks"                                           
#> [220] "technology/guns_n_rifles"                                       
#> [221] "technology/knots"                                               
#> [222] "technology/lisp"                                                
#> [223] "technology/new_technologies"                                    
#> [224] "technology/photo_sharing_websites"                              
#> [225] "technology/programming_languages"                               
#> [226] "technology/social_networking_websites"                          
#> [227] "technology/video_hosting_websites"                              
#> [228] "transportation/commercial-aircraft"                             
#> [229] "travel/lcc"                                                     
#> [230] "words/adjs"                                                     
#> [231] "words/adverbs"                                                  
#> [232] "words/closed_pairs"                                             
#> [233] "words/common"                                                   
#> [234] "words/compounds"                                                
#> [235] "words/crash_blossoms"                                           
#> [236] "words/eggcorns"                                                 
#> [237] "words/emoji/cute_kaomoji"                                       
#> [238] "words/emoji/emoji"                                              
#> [239] "words/encouraging_words"                                        
#> [240] "words/ergative_verbs"                                           
#> [241] "words/expletives"                                               
#> [242] "words/harvard_sentences"                                        
#> [243] "words/infinitive_verbs"                                         
#> [244] "words/interjections"                                            
#> [245] "words/literature/infinitejest"                                  
#> [246] "words/literature/lovecraft_words"                               
#> [247] "words/literature/mr_men_little_miss"                            
#> [248] "words/literature/shakespeare_phrases"                           
#> [249] "words/literature/shakespeare_sonnets"                           
#> [250] "words/literature/shakespeare_words"                             
#> [251] "words/literature/technology_quotes"                             
#> [252] "words/nouns"                                                    
#> [253] "words/oprah_quotes"                                             
#> [254] "words/personal_nouns"                                           
#> [255] "words/personal_pronouns"                                        
#> [256] "words/possessive_pronouns"                                      
#> [257] "words/prefix_root_suffix"                                       
#> [258] "words/prepositions"                                             
#> [259] "words/proverbs"                                                 
#> [260] "words/resume_action_words"                                      
#> [261] "words/rhymeless_words"                                          
#> [262] "words/spells"                                                   
#> [263] "words/state_verbs"                                              
#> [264] "words/states_of_drunkenness"                                    
#> [265] "words/stopwords/ar"                                             
#> [266] "words/stopwords/bg"                                             
#> [267] "words/stopwords/cs"                                             
#> [268] "words/stopwords/da"                                             
#> [269] "words/stopwords/de"                                             
#> [270] "words/stopwords/en"                                             
#> [271] "words/stopwords/es"                                             
#> [272] "words/stopwords/fi"                                             
#> [273] "words/stopwords/fr"                                             
#> [274] "words/stopwords/gr"                                             
#> [275] "words/stopwords/it"                                             
#> [276] "words/stopwords/jp"                                             
#> [277] "words/stopwords/lv"                                             
#> [278] "words/stopwords/nl"                                             
#> [279] "words/stopwords/no"                                             
#> [280] "words/stopwords/pl"                                             
#> [281] "words/stopwords/pt"                                             
#> [282] "words/stopwords/ru"                                             
#> [283] "words/stopwords/sk"                                             
#> [284] "words/stopwords/sv"                                             
#> [285] "words/stopwords/tr"                                             
#> [286] "words/strange_words"                                            
#> [287] "words/units_of_time"                                            
#> [288] "words/us_president_quotes"                                      
#> [289] "words/verbs_with_conjugations"                                  
#> [290] "words/verbs"                                                    
#> [291] "words/word_clues/clues_five"                                    
#> [292] "words/word_clues/clues_four"                                    
#> [293] "words/word_clues/clues_six"
corpora("foods/pizzaToppings")
#> $description
#> [1] "A list of pizza toppings."
#> 
#> $pizzaToppings
#>  [1] "anchovies"        "artichoke"        "bacon"           
#>  [4] "breakfast bacon"  "Canadian bacon"   "cheese"          
#>  [7] "chicken"          "chili peppers"    "feta"            
#> [10] "garlic"           "green peppers"    "grilled onions"  
#> [13] "ground beef"      "ham"              "hot sauce"       
#> [16] "meatballs"        "mushrooms"        "olives"          
#> [19] "onions"           "pepperoni"        "pineapple"       
#> [22] "sausage"          "spinach"          "sun-dried tomato"
#> [25] "tomatoes"

License

CC0