diff --git a/munging/airline_flights/airline.rb b/munging/airline_flights/airline.rb deleted file mode 100644 index 8e81438..0000000 --- a/munging/airline_flights/airline.rb +++ /dev/null @@ -1,57 +0,0 @@ -class Airline - include Gorillib::Model - field :icao_id, String, doc: "3-letter ICAO code, if available", identifier: true, length: 2 - field :iata_id, String, doc: "2-letter IATA code, if available", identifier: true, length: 2 - field :airline_ofid, Integer, doc: "Unique OpenFlights identifier for this airline.", identifier: true - field :active, :boolean, doc: 'true if the airline is or has until recently been operational, false if it is defunct. (This is only a rough indication and should not be taken as 100% accurate)' - field :country, String, doc: "Country or territory where airline is incorporated" - field :name, String, doc: "Airline name." - field :callsign, String, doc: "Airline callsign", identifier: true - field :alias, String, doc: "Alias of the airline. For example, 'All Nippon Airways' is commonly known as 'ANA'" -end - -# -# As of January 2012, the OpenFlights Airlines Database contains 5888 -# airlines. If you enjoy this data, please consider [visiting their page and -# donating](http://openflights.org/data.html) -# -# > Notes: Airlines with null codes/callsigns/countries generally represent -# > user-added airlines. Since the data is intended primarily for current -# > flights, defunct IATA codes are generally not included. For example, -# > "Sabena" is not listed with a SN IATA code, since "SN" is presently used by -# > its successor Brussels Airlines. -# -# Sample entries -# -# 324,"All Nippon Airways","ANA All Nippon Airways","NH","ANA","ALL NIPPON","Japan","Y" -# 412,"Aerolineas Argentinas",\N,"AR","ARG","ARGENTINA","Argentina","Y" -# 413,"Arrowhead Airways",\N,"","ARH","ARROWHEAD","United States","N" -# -class RawOpenflightAirline - include Gorillib::Model - include Gorillib::Model::LoadFromCsv - BLANKISH_STRINGS = ["", nil, "NULL", '\\N', "NONE", "NA", "Null", "..."] - - field :airline_ofid, Integer, blankish: BLANKISH_STRINGS, doc: "Unique OpenFlights identifier for this airline.", identifier: true - field :name, String, blankish: BLANKISH_STRINGS, doc: "Airline name." - field :alias, String, blankish: BLANKISH_STRINGS, doc: "Alias of the airline. For example, 'All Nippon Airways' is commonly known as 'ANA'" - field :iata_id, String, blankish: BLANKISH_STRINGS, doc: "2-letter IATA code, if available", identifier: true, length: 2 - field :icao_id, String, blankish: BLANKISH_STRINGS, doc: "3-letter ICAO code, if available", identifier: true, length: 2 - field :callsign, String, blankish: BLANKISH_STRINGS, doc: "Airline callsign" - field :country, String, blankish: BLANKISH_STRINGS, doc: "Country or territory where airline is incorporated" - field :active, :boolean, blankish: BLANKISH_STRINGS, doc: 'true if the airline is or has until recently been operational, false if it is defunct. (This is only a rough indication and should not be taken as 100% accurate)' - - def receive_iata_id(val) super if val =~ /\A\w+\z/ ; end - def receive_icao_id(val) super if val =~ /\A\w+\z/ ; end - def receive_active(val) - super(case val.to_s when "Y" then true when "N" then false else val ; end) - end - - def to_airline - Airline.receive(self.compact_attributes) - end - - def self.load_airlines(filename) - load_csv(filename){|raw_airline| yield(raw_airline.to_airline) } - end -end diff --git a/munging/airline_flights/airline_flights.rake b/munging/airline_flights/airline_flights.rake deleted file mode 100755 index fc63948..0000000 --- a/munging/airline_flights/airline_flights.rake +++ /dev/null @@ -1,83 +0,0 @@ -require_relative('../../rake_helper') -require_relative('./models') - -Pathname.register_paths( - af_data: [:data, 'airline_flights'], - af_work: [:work, 'airline_flights'], - af_code: File.dirname(__FILE__), - # - openflights_raw_airports: [:af_data, "openflights_airports-raw#{Settings[:mini_slug]}.csv" ], - openflights_raw_airlines: [:af_data, "openflights_airlines-raw.csv" ], - dataexpo_raw_airports: [:af_data, "dataexpo_airports-raw#{Settings[:mini_slug]}.csv" ], - wikipedia_icao: [:af_data, "wikipedia_icao.tsv" ], - wikipedia_iata: [:af_data, "wikipedia_iata.tsv" ], - wikipedia_us_abroad: [:af_data, "wikipedia_us_abroad.tsv" ], - # - openflights_airports: [:af_work, "openflights_airports-parsed#{Settings[:mini_slug]}.tsv"], - openflights_airlines: [:af_work, "openflights_airlines-parsed#{Settings[:mini_slug]}.tsv"], - dataexpo_airports: [:af_work, "dataexpo_airports-parsed#{Settings[:mini_slug]}.tsv" ], - airport_identifiers: [:af_work, "airport_identifiers.tsv" ], - airport_identifiers_mini: [:af_work, "airport_identifiers-sample.tsv" ], - # helpers - country_name_lookup: [:work, 'geo', "country_name_lookup.tsv"], - ) - -chain :airline_flights do - code_files = FileList[Pathname.of(:af_code, '*.rb').to_s] - chain(:parse) do - - # desc 'parse the dataexpo airports' - # create_file(:dataexpo_airports, after: code_files) do |dest| - # RawDataexpoAirport.load_airports(:dataexpo_raw_airports) do |airport| - # dest << airport.to_tsv << "\n" - # end - # end - - desc 'parse the openflights airports' - create_file(:openflights_airports, after: [code_files, :force]) do |dest| - require_relative('../geo/geo_models') - Geo::CountryNameLookup.load - RawOpenflightAirport.load_airports(:openflights_raw_airports) do |airport| - dest << airport.to_tsv << "\n" - # puts airport.country - end - end - - # task :reconcile_airports => [:dataexpo_airports, :openflights_airports] do - # require_relative 'reconcile_airports' - # Airport::IdReconciler.load_all - # end - # - # desc 'run the identifier reconciler' - # create_file(:airport_identifiers, after: code_files, invoke: 'airline_flights:parse:reconcile_airports') do |dest| - # Airport::IdReconciler.airports.each do |airport| - # dest << airport.to_tsv << "\n" - # end - # end - # - # desc 'run the identifier reconciler' - # create_file(:airport_identifiers_mini, after: code_files, invoke: 'airline_flights:parse:reconcile_airports') do |dest| - # Airport::IdReconciler.exemplars.each do |airport| - # dest << airport.to_tsv << "\n" - # end - # end - # - # desc 'parse the openflights airlines' - # create_file(:openflights_airlines, after: code_files) do |dest| - # RawOpenflightAirline.load_airlines(:openflights_raw_airlines) do |airline| - # dest << airline.to_tsv << "\n" - # puts airline.to_tsv - # end - # end - - end -end - -task :default => [ - 'airline_flights', - # 'airline_flights:parse:dataexpo_airports', - # 'airline_flights:parse:openflights_airports', - # 'airline_flights:parse:airport_identifiers', - # 'airline_flights:parse:airport_identifiers_mini', - # 'airline_flights:parse:openflights_airlines', -] diff --git a/munging/airline_flights/airplane.rb b/munging/airline_flights/airplane.rb deleted file mode 100644 index e69de29..0000000 diff --git a/munging/airline_flights/airport.rb b/munging/airline_flights/airport.rb deleted file mode 100644 index dbb476c..0000000 --- a/munging/airline_flights/airport.rb +++ /dev/null @@ -1,211 +0,0 @@ -# -*- coding: utf-8 -*- - -### @export "airport_model" -class Airport - include Gorillib::Model - - field :icao, String, doc: "4-letter ICAO code, or blank if not assigned.", length: 4, identifier: true, :blankish => ["", nil] - field :iata, String, doc: "3-letter IATA code, or blank if not assigned.", length: 3, identifier: true, :blankish => ["", nil] - field :faa, String, doc: "3-letter FAA code, or blank if not assigned.", length: 3, identifier: true, :blankish => ["", nil] - field :utc_offset, Float, doc: "Hours offset from UTC. Fractional hours are expressed as decimals, eg. India is 5.5.", validates: { inclusion: (-12...12) } - field :dst_rule, String, doc: "Daylight savings time rule. One of E (Europe), A (US/Canada), S (South America), O (Australia), Z (New Zealand), N (None) or U (Unknown). See the readme for more.", validates: { inclusion: %w[E A S O Z N U] } - field :longitude, Float, doc: "Decimal degrees, usually to six significant digits. Negative is West, positive is East.", validates: { inclusion: (-180...180) } - field :latitude, Float, doc: "Decimal degrees, usually to six significant digits. Negative is South, positive is North.", validates: { inclusion: (-90.0...90.0) } - field :altitude, Float, doc: "Elevation in meters." - field :name, String, doc: "Name of airport." - field :country, String, doc: "Country or territory where airport is located.", length: 2 - field :state, String, doc: "State in which the airport is located", length: 2 - field :city, String, doc: "Main city served by airport. This is the logical city it serves; so, for example SFO gets 'San Francisco', not 'San Bruno'" - field :airport_ofid, String, doc: "OpenFlights identifier for this airport.", identifier: true -end -### @export "nil" -class Airport - EXEMPLARS = %w[ - ANC ATL AUS BDL BNA BOI BOS BWI CLE CLT - CMH DCA DEN DFW DTW EWR FLL HNL IAD IAH - IND JAX JFK LAS LAX LGA MCI MCO MDW MIA - MSP MSY OAK ORD PDX PHL PHX PIT PVD RDU - SAN SEA SFO SJC SJU SLC SMF STL TPA YYZ - ] - - def utc_time_for(tm) - utc_time = tm.get_utc + utc_offset - utc_time += (60*60) if TimezoneFixup.dst?(tm) - utc_time - end - - BLANKISH_STRINGS = ["", nil, "NULL", '\\N', "NONE", "NA", "Null", "..."] - OK_CHARS_RE = /[^a-zA-Z0-9\:\ \/\.\,\-\(\)\'ÁÂÄÅÇÉÍÎÑÓÖØÚÜÞàáâãäåæçèéêëìíîïðñóôõöøúüýĀāăĆćČčēėęěğīİıŁłńņňŌōőřŞşŠšţťūůųźŽžơț]/ - - def lint - errors = {} - errors["ICAO is wrong length"] = icao if icao.present? && icao.length != 4 - if (icao && faa && (icao =~ /^K.../)) - errors["ICAO != K+FAA yet ICAO is a K..."] = [icao, faa] if (icao != "K#{faa}") - end - # errors["ICAO present for piddlyshit airport"] = icao if icao.present? && ((faa.to_s.length == 4) || (faa.to_s =~ /\d/)) - errors[:spaces] ||= [] - errors[:funny] ||= [] - attributes.each do |attr, val| - next if val.blank? - errors["#{attr} looks blankish"] = val if BLANKISH_STRINGS.include?(val) - if (val.is_a?(String)) - errors[:spaces] << [attr, val] if (val.strip != val) - errors[:funny] << [attr, val] if val =~ OK_CHARS_RE - end - end - errors.compact_blank - end - - def to_s - str = "#" - end - - def faa_controlled? - icao =~ /^(?:K|P[ABFGHJKMOPW]|T[IJ]|NS(AS|FQ|TU))/ - end -end -### @export "airport_load" -class Airport - include Gorillib::Model::LoadFromTsv - self.tsv_options.merge!(num_fields: 10..20) - def self.load_airports(filename) - load_tsv(filename){|airport| yield(airport) } - end - -end -### @export "nil" - -# -# As of January 2012, the OpenFlights Airports Database contains 6977 airports -# [spanning the globe](http://openflights.org/demo/openflights-apdb-2048.png). -# If you enjoy this data, please consider [visiting their page and -# donating](http://openflights.org/data.html) -# -# > Note: Rules for daylight savings time change from year to year and from -# > country to country. The current data is an approximation for 2009, built on -# > a country level. Most airports in DST-less regions in countries that -# > generally observe DST (eg. AL, HI in the USA, NT, QL in Australia, parts of -# > Canada) are marked incorrectly. -# -# Sample entries -# -# 507,"Heathrow","London","United Kingdom","LHR","EGLL",51.4775,-0.461389,83,0,"E" -# 26,"Kugaaruk","Pelly Bay","Canada","YBB","CYBB",68.534444,-89.808056,56,-6,"A" -# 3127,"Pokhara","Pokhara","Nepal","PKR","VNPK",28.200881,83.982056,2712,5.75,"N" -# - -### @export "raw_openflight_airport" - -module RawAirport - COUNTRIES = { 'Puerto Rico' => 'us', 'Canada' => 'ca', 'USA' => 'us', 'United States' => 'us', - 'Northern Mariana Islands' => 'us', 'N Mariana Islands' => 'us', - 'Federated States of Micronesia' => 'fm', - 'Thailand' => 'th', 'Palau' => 'pw', - 'American Samoa' => 'as', 'Wake Island' => 'us', 'Virgin Islands' => 'vi', 'Guam' => 'gu' - } - BLANKISH_STRINGS = ["", nil, "NULL", '\\N', "NONE", "NA", "Null", "..."] - OK_CHARS_RE = /[^a-zA-Z0-9\:\ \/\.\,\-\(\)\'ÁÂÄÅÇÉÍÎÑÓÖØÚÜÞàáâãäåæçèéêëìíîïðñóôõöøúüýĀāăĆćČčēėęěğīİıŁłńņňŌōőřŞşŠšţťūůųźŽžơț]/ - - def receive_city(val) - super.tap{|val| if val then val.strip! ; val.gsub!(/\\+/, '') ; end } - end - - def receive_country(val) - super(COUNTRIES[val] || val) - end - - def receive_name(val) - super.tap do |val| - if val - val.strip! - val.gsub!(/\\+/, '') - val.gsub!(/\s*\[(military|private)\]/, '') - val.gsub!(/\b(Int\'l|International)\b/, 'Intl') - val.gsub!(/\b(Intercontinental)\b/, 'Intcntl') - val.gsub!(/\b(Airpt)\b/, 'Airport') - val.gsub!(/ Airport$/, '') - end - end - end -end - -# -class RawOpenflightAirport - include Gorillib::Model - include Gorillib::Model::LoadFromCsv - include RawAirport - # - field :airport_ofid, String, doc: "Unique OpenFlights identifier for this airport." - field :name, String, doc: "Name of airport. May or may not contain the City name." - field :city, String, blankish: BLANKISH_STRINGS, doc: "Main city served by airport. May be spelled differently from Name." - field :country, String, doc: "Country or territory where airport is located." - field :iata_faa, String, blankish: BLANKISH_STRINGS, doc: "3-letter FAA code, for airports located in the USA. For all other airports, 3-letter IATA code, or blank if not assigned." - field :icao, String, blankish: BLANKISH_STRINGS, doc: "4-letter ICAO code; Blank if not assigned." - field :latitude, Float, doc: "Decimal degrees, usually to six significant digits. Negative is South, positive is North." - field :longitude, Float, doc: "Decimal degrees, usually to six significant digits. Negative is West, positive is East." - field :altitude_ft, Float, blankish: ['', nil, 0, '0'], doc: "In feet." - field :utc_offset, Float, doc: "Hours offset from UTC. Fractional hours are expressed as decimals, eg. India is 5.5." - field :dst_rule, String, doc: "Daylight savings time rule. One of E (Europe), A (US/Canada), S (South America), O (Australia), Z (New Zealand), N (None) or U (Unknown). See the readme for more." - - UNRELIABLE_OPENFLIGHTS_IATA_VALUES = /^(7AK|AGA|AUQ|BDJ|BGW|BME|BPM|BXH|BZY|CAT|CEE|CEJ|CFS|CGU|CIO|CLV|CNN|DEE|DIB|DNM|DUH|DUR|FKI|GES|GSM|HKV|HOJ|HYD|IEO|IFN|IKA|IZA|JCU|JGS|KMW|KNC|LGQ|LUM|MCU|MCY|MDO|MOH|MON|MPH|MVF|NAY|NMA|NOE|NQY|OTU|OUI|PBV|PCA|PCB|PGK|PHO|PIF|PKN|PKY|PMK|PTG|PZO|QAS|QKT|QVY|RCM|RJL|RTG|SBG|SDZ|SFG|SIC|SIQ|SJI|SRI|STP|STU|SWQ|TJQ|TJS|TMC|TYA|UKC|VIY|VQS|VTS|WDH|WKM|WPR|WPU|ZQF)$/ - - def id_is_faa? - (icao =~ /^(?:K)/) || (icao.blank? && country == 'us') - end - - def iata ; (id_is_faa? ? nil : iata_faa) unless iata_faa =~ UNRELIABLE_OPENFLIGHTS_IATA_VALUES end - def faa ; (id_is_faa? ? iata_faa : nil ) end - def altitude - altitude_ft && (0.3048 * altitude_ft).round(1) - end - - def receive_country(val) - country = Geo::CountryNameLookup.for_alt_name(val, nil) - p val unless country - super(country ? country.country_id : val) - end - - def to_airport - attrs = self.compact_attributes.except(:altitude_ft) - attrs[:altitude] = altitude - attrs[:iata] = iata unless iata.to_s =~ UNRELIABLE_OPENFLIGHTS_IATA_VALUES - attrs[:faa] = faa - Airport.receive(attrs) - end - - def self.load_airports(filename) - load_csv(filename){|raw_airport| yield(raw_airport.to_airport) } - end -end - -### @export "raw_dataexpo_airport" -class RawDataexpoAirport - include Gorillib::Model - include Gorillib::Model::LoadFromCsv - include RawAirport - self.csv_options = self.csv_options.merge(pop_headers: true) - - field :faa, String, doc: "the international airport abbreviation code" - field :name, String, doc: "Airport name" - field :city, String, blankish: ["NA"], doc: "city in which the airport is located" - field :state, String, blankish: ["NA"], doc: "state in which the airport is located" - field :country, String, doc: "country in which airport is located" - field :latitude, Float, doc: "latitude of the airport" - field :longitude, Float, doc: "longitude of the airport" - - def to_airport - attrs = self.compact_attributes - attrs[:icao] = "K#{faa}" if faa =~ /[A-Z]{3}/ && (not ['PR', 'AK', 'CQ', 'HI', 'AS', 'GU', 'VI'].include?(state)) && (country == 'us') - Airport.receive(attrs) - end - - def self.load_airports(filename) - load_csv(filename){|raw_airport| yield(raw_airport.to_airport) } - end -end -### @export "nil" diff --git a/munging/airline_flights/airport_id_unification.rb b/munging/airline_flights/airport_id_unification.rb deleted file mode 100644 index 7fca449..0000000 --- a/munging/airline_flights/airport_id_unification.rb +++ /dev/null @@ -1,129 +0,0 @@ -class Airport - - # [Hash] all options passed to the field not recognized by one of its own current fields - attr_reader :_extra_attributes - - # # Airports whose IATA and FAA codes differ; all are in the US, so their ICAO is "K"+the FAA id - # FAA_ICAO_FIXUP = { - # "GRM" => "CKC", "CLD" => "CRQ", "SDX" => "SEZ", "AZA" => "IWA", "SCE" => "UNV", "BLD" => "BVU", - # "LKE" => "W55", "HSH" => "HND", "BKG" => "BBG", "UST" => "SGJ", "LYU" => "ELO", "WFK" => "FVE", - # "FRD" => "FHR", "ESD" => "ORS", "RKH" => "UZA", "NZC" => "VQQ", "SCF" => "SDL", "JCI" => "IXD", - # "AVW" => "AVQ", "UTM" => "UTA", "ONP" => "NOP", } - # - # [:iata, :icao, :latitude, :longitude, :country, :city, :name].each do |attr| - # define_method("of_#{attr}"){ @_extra_attributes[:"of_#{attr}"] } - # define_method("de_#{attr}"){ @_extra_attributes[:"de_#{attr}"] } - # end - # - # def lint_differences - # errors = {} - # return errors unless de_name.present? && of_name.present? - # [ - # [:iata, of_iata, de_iata], [:icao, of_icao, de_icao], [:country, of_country, de_country], - # [:city, of_city, de_city], - # [:name, of_name, de_name], - # ].each{|attr, of, de| next unless of && de ; errors[attr] = [of, de] if of != de } - # - # if (of_latitude && of_longitude && de_latitude && de_longitude) - # lat_diff = (of_latitude - de_latitude ).abs - # lng_diff = (of_longitude - de_longitude).abs - # unless (lat_diff < 0.015) && (lng_diff < 0.015) - # msg = [of_latitude, de_latitude, of_longitude, de_longitude, lat_diff, lng_diff].map{|val| "%9.4f" % val }.join(" ") - # errors["distance"] = ([msg, of_city, de_city, of_name, de_name]) - # end - # end - # - # errors - # end - # - # AIRPORTS = Hash.new # unless defined?(AIRPORTS) - # def self.load(of_filename, de_filename) - # RawOpenflightAirport.load_csv(of_filename) do |raw_airport| - # airport = raw_airport.to_airport - # AIRPORTS[airport.iata_icao] = airport - # end - # RawDataexpoAirport.load_csv(de_filename) do |raw_airport| - # airport = (AIRPORTS[raw_airport.iata_icao] ||= self.new) - # if airport.de_name - # warn "duplicate data for #{[iata, de_iata, icao, de_icao]}: #{raw_airport.to_tsv} #{airport.to_tsv}" - # end - # airport.receive!(raw_airport.airport_attrs) - # end - # AIRPORTS - # end - - def self.load(dirname) - load_csv(File.join(dirname, 'wikipedia_icao.tsv')) do |id_mapping| - [:icao, :iata, :faa ].each do |attr| - val = id_mapping.read_attribute(attr) or next - next if (val == '.') || (val == '_') - if that = ID_MAPPINGS[attr][val] - lint = that.disagreements(id_mapping) - puts [attr, val, "%-25s" % lint.inspect, id_mapping, that, "%-60s" % id_mapping.name, "%-25s" % that.name].join("\t") if lint.present? - else - ID_MAPPINGS[attr][val] = id_mapping - end - end - # [:icao, :iata, :faa ].each do |attr| - # val = id_mapping.read_attribute(attr) - # ID_MAPPINGS[attr][val] = id_mapping - # end - end - load_csv(File.join(dirname, 'wikipedia_iata.tsv')) do |id_mapping| - # if not ID_MAPPINGS[:icao].has_key?(id_mapping.icao) - # puts [:badicao, "%-25s" % "", id_mapping, " "*24, "%-60s" % id_mapping.name].join("\t") - # end - [:icao, :iata, :faa ].each do |attr| - val = id_mapping.read_attribute(attr) or next - next if (val == '.') || (val == '_') - if that = ID_MAPPINGS[attr][val] - lint = that.disagreements(id_mapping) - puts [attr, val, "%-25s" % lint.inspect, id_mapping, that, "%-60s" % id_mapping.name, "%-25s" % that.name].join("\t") if lint.present? - else - ID_MAPPINGS[attr][val] = id_mapping - end - end - end - - # def adopt_field(that, attr) - # this_val = self.read_attribute(attr) - # that_val = that.read_attribute(attr) - # if name =~ /Bogus|Austin/i - # puts [attr, this_val, that_val, attribute_set?(attr), that.attribute_set?(attr), to_tsv, that.to_tsv].join("\t") - # end - # if this_val && that_val - # if (this_val != that_val) then warn [attr, this_val, that_val, name].join("\t") ; end - # elsif that_val - # write_attribute(that_val) - # end - # end - - def to_s - attributes.values[0..2].join("\t") - end - - def disagreements(that) - errors = {} - [:icao, :iata, :faa ].each do |attr| - this_val = self.read_attribute(attr) or next - that_val = that.read_attribute(attr) or next - next if that_val == '.' || that_val == '_' - errors[attr] = [this_val, that_val] if this_val != that_val - end - errors - end - - def self.dump_ids(ids) - "%s\t%s\t%s" % [icao, iata, faa] - end - def self.dump_mapping - [:icao, :iata, :faa].map do |attr| - "%-50s" % ID_MAP[attr].to_a.sort.map{|id, val| "#{id}:#{val.icao||' '}|#{val.iata||' '}|#{val.faa||' '}"}.join(";") - end - end - - def self.dump_info(kind, ids, reconciler, existing, *args) - ex_str = [existing.map{|el| dump_ids(el.ids) }, "\t\t","\t\t","\t\t"].flatten[0..2] - puts [kind, dump_ids(ids), dump_ids(reconciler.ids), ex_str, *args, dump_mapping.join("//") ].flatten.join("\t| ") - end -end diff --git a/munging/airline_flights/airport_ok_chars.rb b/munging/airline_flights/airport_ok_chars.rb deleted file mode 100644 index 5c2dd93..0000000 --- a/munging/airline_flights/airport_ok_chars.rb +++ /dev/null @@ -1,4 +0,0 @@ -# -*- coding: utf-8 -*- - - -OK_CHARS_RE = /[^a-zA-Z0-9\ \/\.\,\-\(\)\'ÁÂÄÅÇÉÍÎÑÖØÜÞàáâãäåæçèéêëìíîïðñóôõöøúüýāăčėęěğİıŁłńōőřŞşŠšţťūźŽžơț]/ diff --git a/munging/airline_flights/flight.rb b/munging/airline_flights/flight.rb deleted file mode 100644 index 3dbfe1e..0000000 --- a/munging/airline_flights/flight.rb +++ /dev/null @@ -1,156 +0,0 @@ -# Raw data: -# Year,Month,DayofMonth,DayOfWeek,DepTime,CRSDepTime,ArrTime,CRSArrTime,UniqueCarrier,FlightNum,TailNum,ActualElapsedTime,CRSElapsedTime,AirTime,ArrDelay,DepDelay,Origin,Dest,Distance,TaxiIn,TaxiOut,Can -# 2007,1,1,1,1232,1225,1341,1340,WN,2891,N351,69,75,54,1,7,SMF,ONT,389,4,11,0,,0,0,0,0,0,0 - -class RawAirlineFlight - include Gorillib::Model - - field :date_year, Integer, position: 1, doc: "Year (1987-2008)" - field :date_month, Integer, position: 2, doc: "Month (1-12)" - field :date_day, Integer, position: 3, doc: "Day of month (1-31)" - field :day_of_week, Integer, position: 4, doc: "Day of week -- 1 (Monday) - 7 (Sunday)" - # - field :act_dep_tod, String, position: 5, doc: "time of day for actual departure (local, hhmm)", blankish: [nil, '', 'NA'] - field :crs_dep_tod, String, position: 6, doc: "time of day for scheduled departure (local, hhmm)" - field :act_arr_tod, String, position: 7, doc: "time of day for actual arrival (local, hhmm). Not adjusted for wrap-around.", blankish: [nil, '', 'NA'] - field :crs_arr_tod, String, position: 8, doc: "time of day for scheduled arrival (local, hhmm). Not adjusted for wrap-around." - # - field :unique_carrier, String, position: 9, doc: "unique carrier code", validates: { length: { in: 0..5 } } - field :flight_num, Integer, position: 10, doc: "flight number" - field :tail_num, String, position: 11, doc: "plane tail number", validates: { length: { in: 0..8 } } - # - field :act_duration, Integer, position: 12, doc: "actual flight time, in minutes", blankish: [nil, '', 'NA'] - field :crs_duration, Integer, position: 13, doc: "CRS flight time, in minutes" - field :air_duration, Integer, position: 14, doc: "Air time, in minutes", blankish: [nil, '', 'NA'] - field :arr_delay, Integer, position: 15, doc: "arrival delay, in minutes", blankish: [nil, '', 'NA'] - field :dep_delay, Integer, position: 16, doc: "departure delay, in minutes", blankish: [nil, '', 'NA'] - field :from_airport, String, position: 17, doc: "Origin IATA airport code", validates: { length: { in: 0..3 } } - field :into_airport, String, position: 18, doc: "Destination IATA airport code", validates: { length: { in: 0..3 } } - field :distance_mi, Integer, position: 19, doc: "Flight distance, in miles" - field :taxi_in_duration, Integer, position: 20, doc: "taxi in time, in minutes", blankish: [nil, '', 'NA'] - field :taxi_out_duration, Integer, position: 21, doc: "taxi out time in minutes", blankish: [nil, '', 'NA'] - # - field :is_cancelled, :boolean_10, position: 22, doc: "was the flight cancelled?" - field :cancellation_code, String, position: 23, doc: "Reason for cancellation (A = carrier, B = weather, C = NAS, D = security, Z = no cancellation)" - field :is_diverted, :boolean_10, position: 24, doc: "Was the plane diverted?" - field :carrier_delay, Integer, position: 25, doc: "in minutes" - field :weather_delay, Integer, position: 26, doc: "in minutes" - field :nas_delay, Integer, position: 27, doc: "in minutes" - field :security_delay, Integer, position: 28, doc: "in minutes" - field :late_aircraft_delay, Integer, position: 29, doc: "in minutes" - - def flight_date - Time.new(date_year, date_month, date_day) - end - - # uses the year / month / day, along with an "hhmm" string, to - def inttime_from_hhmm(val, fencepost=nil) - hour, minutes = [val.to_i / 100, val.to_i % 100] - res = Time.utc(date_year, date_month, date_day, hour, minutes) - # if before fencepost, we wrapped around in time - res += (24 * 60 * 60) if fencepost && (res.to_i < fencepost) - res.to_i - end - - def act_dep_itime ; @act_dep_itime = inttime_from_hhmm(act_dep_tod) if act_dep_tod ; end - def crs_dep_itime ; @crs_dep_itime = inttime_from_hhmm(crs_dep_tod) ; end - def act_arr_itime ; @act_arr_itime = inttime_from_hhmm(act_arr_tod, act_dep_itime) if act_arr_tod ; end - def crs_arr_itime ; @crs_arr_itime = inttime_from_hhmm(crs_arr_tod, crs_dep_itime) ; end - - def receive_tail_num(val) ; val = nil if val.to_s == "0" ; super(val) ; end - def arr_delay(val) val = nil if val.to_s == 0 ; super(val) ; end - - def receive_cancellation_code(val) ; if val == "" then super("Z") else super(val) ; end ; end - - def to_airline_flight - attrs = self.attributes.reject{|attr,val| [:year, :month, :day, :distance_mi].include?(attr) } - attrs[:flight_datestr] = flight_date.strftime("%Y%m%d") - attrs[:distance_km] = (distance_mi * 1.609_344).to_i - - attrs[:act_dep_tod] = "%04d" % act_dep_tod.to_i if act_dep_tod - attrs[:crs_dep_tod] = "%04d" % crs_dep_tod.to_i if crs_dep_tod - attrs[:act_arr_tod] = "%04d" % act_arr_tod.to_i if act_arr_tod - attrs[:crs_arr_tod] = "%04d" % crs_arr_tod.to_i if crs_arr_tod - - attrs[:act_dep_itime] = act_dep_itime - attrs[:crs_dep_itime] = crs_dep_itime - attrs[:act_arr_itime] = act_arr_itime - attrs[:crs_arr_itime] = crs_arr_itime - - AirlineFlight.receive(attrs) - end -end - -class AirlineFlight - include Gorillib::Model - - # Identifier - field :flight_datestr, String, position: 0, doc: "Date, YYYYMMDD. Use flight_date method if you want a date" - field :unique_carrier, String, position: 1, doc: "Unique Carrier Code. When the same code has been used by multiple carriers, a numeric suffix is used for earlier users, for example, PA, PA(1), PA(2).", validates: { length: { in: 0..5 } } - field :flight_num, Integer, position: 2, doc: "flight number" - # Flight - field :from_airport, String, position: 3, doc: "Origin IATA airport code", validates: { length: { in: 0..3 } } - field :into_airport, String, position: 4, doc: "Destination IATA airport code", validates: { length: { in: 0..3 } } - field :tail_num, String, position: 5, doc: "Plane tail number", validates: { length: { in: 0..8 } } - field :distance_km, Integer, position: 6, doc: "Flight distance, in kilometers" - field :day_of_week, Integer, position: 7, doc: "Day of week -- 1 (Monday) - 7 (Sunday)" - # Departure and Arrival Absolute Time - field :crs_dep_itime, IntTime, position: 8, doc: "scheduled departure time (utc epoch seconds)" - field :crs_arr_itime, IntTime, position: 9, doc: "scheduled arrival time (utc epoch seconds)" - field :act_dep_itime, IntTime, position: 10, doc: "actual departure time (utc epoch seconds)" - field :act_arr_itime, IntTime, position: 11, doc: "actual arrival time (utc epoch seconds)" - # Departure and Arrival Local Time of Day - field :crs_dep_tod, String, position: 12, doc: "time of day for scheduled departure (local, hhmm)" - field :crs_arr_tod, String, position: 13, doc: "time of day for scheduled arrival (local, hhmm). Not adjusted for wrap-around." - field :act_dep_tod, String, position: 14, doc: "time of day for actual departure (local, hhmm)" - field :act_arr_tod, String, position: 15, doc: "time of day for actual arrival (local, hhmm). Not adjusted for wrap-around." - # Duration - field :crs_duration, Integer, position: 16, doc: "CRS flight time, in minutes" - field :act_duration, Integer, position: 17, doc: "Actual flight time, in minutes" - field :air_duration, Integer, position: 18, doc: "Air time, in minutes" - field :taxi_in_duration, Integer, position: 19, doc: "taxi in time, in minutes" - field :taxi_out_duration, Integer, position: 20, doc: "taxi out time in minutes" - # Delay - field :is_diverted, :boolean_10, position: 21, doc: "Was the plane diverted? The actual_duration column remains NULL for all diverted flights." - field :is_cancelled, :boolean_10, position: 22, doc: "was the flight cancelled?" - field :cancellation_code, String, position: 23, doc: "Reason for cancellation (A = carrier, B = weather, C = NAS, D = security, Z = no cancellation)" - field :dep_delay, Integer, position: 24, doc: "Difference in minutes between scheduled and actual departure time. Early departures show negative numbers. " - field :arr_delay, Integer, position: 25, doc: "Difference in minutes between scheduled and actual arrival time. Early arrivals show negative numbers." - field :carrier_delay, Integer, position: 26, doc: "Carrier delay, in minutes" - field :weather_delay, Integer, position: 27, doc: "Weather delay, in minutes" - field :nas_delay, Integer, position: 28, doc: "National Air System delay, in minutes" - field :security_delay, Integer, position: 29, doc: "Security delay, in minutes" - field :late_aircraft_delay, Integer, position: 30, doc: "Late Aircraft delay, in minutes" - - def to_tsv - attrs = attributes - attrs[:is_cancelled] = is_cancelled ? 1 : 0 - attrs[:is_diverted] = is_diverted ? 1 : 0 - attrs[:act_dep_itime] ||= ' ' - attrs[:act_arr_itime] ||= ' ' - - # FIXME - attrs[:act_duration] = ((crs_arr_itime - crs_dep_itime) / 60.0).to_i - attrs[:air_duration] = attrs[:act_duration] - attrs[:crs_duration] - attrs.each{|key, val| attrs[key] = val.to_s[-7..-1] if val.to_s.length > 7 } # FIXME: for testing - - attrs.values.join("\t") - end - - def flight_date - @flight_date ||= Gorillib::Factory::DateFactory.receive(flight_datestr) - end - - # checks that the record is sane - def lint - { - act_duration: (!act_arr_itime) || (act_arr_itime - act_dep_itime == act_duration * 60), - crs_duration: (!crs_arr_itime) || (crs_arr_itime - crs_dep_itime == crs_duration * 60), - cancelled_has_code: (is_cancelled == (cancellation_code != "Z")), - cancellation_code: (%w[A B C D Z].include?(cancellation_code)), - act_duration: (!act_duration) || (act_duration == (air_duration + taxi_in_duration + taxi_out_duration)), - dep_delay: (!act_dep_itime) || (dep_delay == (act_dep_itime - crs_dep_itime)/60.0), - arr_delay: (!act_arr_itime) || (arr_delay == (act_arr_itime - crs_arr_itime)/60.0), - } - end -end diff --git a/munging/airline_flights/models.rb b/munging/airline_flights/models.rb deleted file mode 100644 index 1495023..0000000 --- a/munging/airline_flights/models.rb +++ /dev/null @@ -1,4 +0,0 @@ -require_relative './airline' -require_relative './airport' -require_relative './route' -require_relative './flight' diff --git a/munging/airline_flights/parse.rb b/munging/airline_flights/parse.rb deleted file mode 100644 index d178142..0000000 --- a/munging/airline_flights/parse.rb +++ /dev/null @@ -1,26 +0,0 @@ - -# see alsospec/examples/munging/airline_flights_spec.rb - - puts described_class.field_names.map{|fn| fn[0..6] }.join("\t") - raw_airports = RawDataexpoAirport.load_csv(de_airports_filename) - raw_airports.each do |airport| - puts airport.to_tsv - end - - puts described_class.field_names.join("\t") # .map{|fn| fn[0..6] }.join("\t") - raw_airports = described_class.load_csv(raw_airports_filename) - raw_airports.each do |airport| - # puts airport.to_tsv - linted = airport.lint - puts [airport.iata, airport.icao, linted.inspect, airport.to_tsv, ].join("\t") if linted.present? - end - - Airport.load(raw_airports_filename, de_airports_filename) - Airport::AIRPORTS.each{|id,airport| - #puts airport.to_tsv - linted = airport.lint - warn [airport.iata, airport.icao, airport.de_iata, "%-25s" % airport.name, linted.inspect].join("\t") if linted.present? - } - - -# Model.from_tuple(...) diff --git a/munging/airline_flights/reconcile_airports.rb b/munging/airline_flights/reconcile_airports.rb deleted file mode 100644 index be2a468..0000000 --- a/munging/airline_flights/reconcile_airports.rb +++ /dev/null @@ -1,142 +0,0 @@ -require_relative './models' -require 'gorillib/model/reconcilable' - -class Airport - include Gorillib::Model::Reconcilable - attr_accessor :_origin # source of the record - - def conflicting_attribute!(attr, this_val, that_val) - case attr - when :name, :city, :airport_ofid then return :pass - when :latitude, :longitude then return true if (this_val - that_val).abs < 3 - when :altitude then return true if (this_val - that_val).abs < 5 - end - super - end - - def ids - [:icao, :iata, :faa].hashify{|attr| public_send(attr) }.compact - end -end - -# -# Loads the Airport identifier tables scraped from Wikipedia -# -class RawAirportIdentifier < Airport - include RawAirport - include Gorillib::Model::LoadFromTsv - - def self.from_tuple(icao, iata, faa, name, city=nil, *_) - self.new({icao: icao, iata: iata, faa: faa, name: name, city: city}.compact_blank) - end - - def self.load_airports(filename, &block) - load_tsv(filename, num_fields: 4..6, &block) - end -end - -class Airport - # - # Reconciler for Airports - # - # For each airport in turn across openflights, dataexpo and the two scraped - # identifier sets, - # - # - class IdReconciler - include Gorillib::Model - include Gorillib::Model::LoadFromCsv - include Gorillib::Model::Reconcilable - self.csv_options = { col_sep: "\t", num_fields: 3..6 } - - # Map the reconcilers to each ID they have anything to say about - ID_MAP = { icao: {}, iata: {}, faa: {} } - - field :opinions, Array, default: Array.new, doc: "every record having an id in common with the other records in this field" - - def ids - opinions.flat_map{|op| op.ids.to_a }.uniq.compact - end - - def self.load_all - Log.info "Loading all Airports and reconciling" - @airports = Array.new - RawDataexpoAirport .load_airports(:dataexpo_raw_airports ){|airport| register(:dataexpo, airport) } - RawOpenflightAirport.load_airports(:openflights_raw_airports){|airport| register(:openflights, airport) } - RawAirportIdentifier.load_airports(:wikipedia_icao ){|airport| register(:wp_icao, airport) } - RawAirportIdentifier.load_airports(:wikipedia_iata ){|airport| register(:wp_iata, airport) } - RawAirportIdentifier.load_airports(:wikipedia_us_abroad ){|airport| register(:wp_us_abroad, airport) } - - recs = ID_MAP.map{|attr, hsh| hsh.sort.map(&:last) }.flatten.uniq - recs.each do |rec| - consensus = rec.reconcile - # lint = consensus.lint - # puts "%-79s\t%s" % [lint, consensus.to_s[0..100]] if lint.present? - @airports << consensus - end - end - - def self.airports - @airports - end - - def self.exemplars - Airport::EXEMPLARS.map do |iata| - ID_MAP[:iata][iata].reconcile - end - end - - def reconcile - consensus = Airport.new - clean = opinions.all?{|op| consensus.adopt(op) } - # puts "\t#{consensus.inspect}" - puts "confl\t#{self.inspect}" if not clean - consensus - end - - def adopt_opinions(vals, _) - self.opinions = vals + self.opinions - self.opinions.uniq! - end - - # * find all existing reconcilers that share an ID with that record - # * unify them into one reconciler - # * store it back under all the IDs - # - # Suppose our dataset has 3 identifiers, which look like - # - # a S - # S 88 - # a Z - # b - # Q - # b Q 77 - # - # We will wind up with these two reconcilers: - # - # - # - # - def self.register(origin, obj) - obj._origin = origin - # get the existing reconcilers - existing = obj.ids.map{|attr, id| ID_MAP[attr][id] }.compact.uniq - # push the new object in, and pull the most senior one out - existing.unshift(self.new(opinions: [obj])) - reconciler = existing.shift - # unite them into the reconciler - existing.each{|that| reconciler.adopt(that) } - # save the reconciler under each of the ids. - reconciler.ids.each{|attr, id| ID_MAP[attr][id] = reconciler } - end - - def inspect - str = "#<#{self.class.name} #{ids}" - opinions.each do |op| - str << "\n\t #{op._origin}\t#{op}" - end - str << ">" - end - end - -end diff --git a/munging/airline_flights/route.rb b/munging/airline_flights/route.rb deleted file mode 100644 index 15ebfee..0000000 --- a/munging/airline_flights/route.rb +++ /dev/null @@ -1,35 +0,0 @@ - - -# As of January 2012, the OpenFlights/Airline Route Mapper Route Database -# contains 59036 routes between 3209 airports on 531 airlines [spanning the -# globe](http://openflights.org/demo/openflights-routedb-2048.png). If you -# enjoy this data, please consider [visiting their page and -# donating](http://openflights.org/data.html) -# -# > Notes: Routes are directional: if an airline operates services from A to B -# > and from B to A, both A-B and B-A are listed separately. Routes where one -# > carrier operates both its own and codeshare flights are listed only once. -# -# Sample entries -# -# BA,1355,SIN,3316,LHR,507,,0,744 777 -# BA,1355,SIN,3316,MEL,3339,Y,0,744 -# TOM,5013,ACE,1055,BFS,465,,0,320 -# -class RawOpenflightRoute - include Gorillib::Model - - field :iataicao, String, doc: "2-letter (IATA) or 3-letter (ICAO) code of the airline." - field :airline_ofid, Integer, doc: "Unique OpenFlights identifier for airline (see Airline)." - field :from_airport_iataicao, String, doc: "3-letter (IATA) or 4-letter (ICAO) code of the source airport." - field :from_airport_ofid, Integer, doc: "Unique OpenFlights identifier for source airport (see Airport)" - field :into_airport_iataicao, String, doc: "3-letter (IATA) or 4-letter (ICAO) code of the destination airport." - field :into_airport_ofid, Integer, doc: "Unique OpenFlights identifier for destination airport (see Airport)" - field :codeshare, :boolean, doc: "true if this flight is a codeshare (that is, not operated by Airline, but another carrier); empty otherwise." - field :stops, Integer, doc: "Number of stops on this flight, or '0' for direct" - field :equipment_list, String, doc: "3-letter codes for plane type(s) generally used on this flight, separated by spaces" - - def receive_codeshare(val) - super(case val when "Y" then true when "N" then false else val ; end) - end -end diff --git a/munging/airline_flights/tasks.rake b/munging/airline_flights/tasks.rake deleted file mode 120000 index ef0deb4..0000000 --- a/munging/airline_flights/tasks.rake +++ /dev/null @@ -1 +0,0 @@ -airline_flights.rake \ No newline at end of file diff --git a/munging/airline_flights/timezone_fixup.rb b/munging/airline_flights/timezone_fixup.rb deleted file mode 100644 index 2905894..0000000 --- a/munging/airline_flights/timezone_fixup.rb +++ /dev/null @@ -1,62 +0,0 @@ -require 'date' -require 'gorillib/hash/zip' - -class Airport - - class TimezoneFixup - - YEARS = (2010 .. 2012).to_a - - DST_RULES = { - 'E' => { name: 'European', beg_doy: 'last Sunday in March', end_doy: 'last Sunday in October', beg_dates: {}, end_dates: {}, used_in: 'all European countries (except Iceland), as well as Greenland, Lebanon, Russia and Tunisia. Jordan and Syria are almost the same, starting and ending on Friday instead of Sunday. European DST is also used to (crudely) approximate Iranian DST, although they actually use an entirely different calendar.', }, - 'A' => { name: 'US/Canada', beg_doy: '2nd Sunday in March', end_doy: '1st Sunday in November', beg_dates: {}, end_dates: {}, used_in: 'the United States (except Arizona, Hawaii and island territories) and Canada (with convoluted exceptions).', }, - 'S' => { name: 'South American', beg_doy: '3rd Sunday in March', end_doy: '3rd Sunday in October', southern: true, beg_dates: {}, end_dates: {}, used_in: 'With some variance in the exact dates, in Argentina, Chile, Mexico, Paraguay, Uruguay as well as the African states of Namibia and Mauritius.', }, - 'O' => { name: 'Australia', beg_doy: '1st Sunday in April', end_doy: '1st Sunday in October', southern: true, beg_dates: {}, end_dates: {}, used_in: 'Australia, except for Queensland and the Northern Territory.' }, - 'Z' => { name: 'New Zealand', beg_doy: '1st Sunday in April', end_doy: 'last Sunday in September', southern: true, beg_dates: {}, end_dates: {}, used_in: 'New Zealand', }, - 'N' => { name: 'None', beg_doy: nil, end_doy: nil, beg_dates: {}, end_dates: {}, used_in: 'DST not observed.', }, - 'U' => { name: 'Unknown', beg_doy: nil, end_doy: nil, beg_dates: {}, end_dates: {}, used_in: 'DST status not known. The same as "None".', }, - } - - DST_RULES['E'][:beg_dates] = { 1987 => "1987-03-29", 1988 => "1988-03-27", 1989 => "1989-03-26", 1990 => "1990-03-25", 1991 => "1991-03-31", 1992 => "1992-03-29", 1993 => "1993-03-28", 1994 => "1994-03-27", 1995 => "1995-03-26", 1996 => "1996-03-31", 1997 => "1997-03-30", 1998 => "1998-03-29", 1999 => "1999-03-28", 2000 => "2000-03-26", 2001 => "2001-03-25", 2002 => "2002-03-31", 2003 => "2003-03-30", 2004 => "2004-03-28", 2005 => "2005-03-27", 2006 => "2006-03-26", 2007 => "2007-03-25", 2008 => "2008-03-30", 2009 => "2009-03-29", 2010 => "2010-03-28", 2011 => "2011-03-27", 2012 => "2012-03-25", 2013 => "2013-03-31", 2014 => "2014-03-30", 2015 => "2015-03-29", 2016 => "2016-03-27", 2017 => "2017-03-26", 2018 => "2018-03-25", 2019 => "2019-03-31", 2020 => "2020-03-29", }.tap{|hsh| hsh.each{|year,date_str| hsh[year] = Date.parse(date_str) } } - DST_RULES['E'][:end_dates] = { 1987 => "1987-10-25", 1988 => "1988-10-30", 1989 => "1989-10-29", 1990 => "1990-10-28", 1991 => "1991-10-27", 1992 => "1992-10-25", 1993 => "1993-10-31", 1994 => "1994-10-30", 1995 => "1995-10-29", 1996 => "1996-10-27", 1997 => "1997-10-26", 1998 => "1998-10-25", 1999 => "1999-10-31", 2000 => "2000-10-29", 2001 => "2001-10-28", 2002 => "2002-10-27", 2003 => "2003-10-26", 2004 => "2004-10-31", 2005 => "2005-10-30", 2006 => "2006-10-29", 2007 => "2007-10-28", 2008 => "2008-10-26", 2009 => "2009-10-25", 2010 => "2010-10-31", 2011 => "2011-10-30", 2012 => "2012-10-28", 2013 => "2013-10-27", 2014 => "2014-10-26", 2015 => "2015-10-25", 2016 => "2016-10-30", 2017 => "2017-10-29", 2018 => "2018-10-28", 2019 => "2019-10-27", 2020 => "2020-10-25", }.tap{|hsh| hsh.each{|year,date_str| hsh[year] = Date.parse(date_str) } } - DST_RULES['A'][:beg_dates] = { 1987 => "1987-03-08", 1988 => "1988-03-13", 1989 => "1989-03-12", 1990 => "1990-03-11", 1991 => "1991-03-10", 1992 => "1992-03-08", 1993 => "1993-03-14", 1994 => "1994-03-13", 1995 => "1995-03-12", 1996 => "1996-03-10", 1997 => "1997-03-09", 1998 => "1998-03-08", 1999 => "1999-03-14", 2000 => "2000-03-12", 2001 => "2001-03-11", 2002 => "2002-03-10", 2003 => "2003-03-09", 2004 => "2004-03-14", 2005 => "2005-03-13", 2006 => "2006-03-12", 2007 => "2007-03-11", 2008 => "2008-03-09", 2009 => "2009-03-08", 2010 => "2010-03-14", 2011 => "2011-03-13", 2012 => "2012-03-11", 2013 => "2013-03-10", 2014 => "2014-03-09", 2015 => "2015-03-08", 2016 => "2016-03-13", 2017 => "2017-03-12", 2018 => "2018-03-11", 2019 => "2019-03-10", 2020 => "2020-03-08", }.tap{|hsh| hsh.each{|year,date_str| hsh[year] = Date.parse(date_str) } } - DST_RULES['A'][:end_dates] = { 1987 => "1987-11-01", 1988 => "1988-11-06", 1989 => "1989-11-05", 1990 => "1990-11-04", 1991 => "1991-11-03", 1992 => "1992-11-01", 1993 => "1993-11-07", 1994 => "1994-11-06", 1995 => "1995-11-05", 1996 => "1996-11-03", 1997 => "1997-11-02", 1998 => "1998-11-01", 1999 => "1999-11-07", 2000 => "2000-11-05", 2001 => "2001-11-04", 2002 => "2002-11-03", 2003 => "2003-11-02", 2004 => "2004-11-07", 2005 => "2005-11-06", 2006 => "2006-11-05", 2007 => "2007-11-04", 2008 => "2008-11-02", 2009 => "2009-11-01", 2010 => "2010-11-07", 2011 => "2011-11-06", 2012 => "2012-11-04", 2013 => "2013-11-03", 2014 => "2014-11-02", 2015 => "2015-11-01", 2016 => "2016-11-06", 2017 => "2017-11-05", 2018 => "2018-11-04", 2019 => "2019-11-03", 2020 => "2020-11-01", }.tap{|hsh| hsh.each{|year,date_str| hsh[year] = Date.parse(date_str) } } - DST_RULES['S'][:beg_dates] = { 1987 => "1987-10-18", 1988 => "1988-10-16", 1989 => "1989-10-15", 1990 => "1990-10-21", 1991 => "1991-10-20", 1992 => "1992-10-18", 1993 => "1993-10-17", 1994 => "1994-10-16", 1995 => "1995-10-15", 1996 => "1996-10-20", 1997 => "1997-10-19", 1998 => "1998-10-18", 1999 => "1999-10-17", 2000 => "2000-10-15", 2001 => "2001-10-21", 2002 => "2002-10-20", 2003 => "2003-10-19", 2004 => "2004-10-17", 2005 => "2005-10-16", 2006 => "2006-10-15", 2007 => "2007-10-21", 2008 => "2008-10-19", 2009 => "2009-10-18", 2010 => "2010-10-17", 2011 => "2011-10-16", 2012 => "2012-10-21", 2013 => "2013-10-20", 2014 => "2014-10-19", 2015 => "2015-10-18", 2016 => "2016-10-16", 2017 => "2017-10-15", 2018 => "2018-10-21", 2019 => "2019-10-20", 2020 => "2020-10-18", }.tap{|hsh| hsh.each{|year,date_str| hsh[year] = Date.parse(date_str) } } - DST_RULES['S'][:end_dates] = { 1987 => "1987-03-15", 1988 => "1988-03-20", 1989 => "1989-03-19", 1990 => "1990-03-18", 1991 => "1991-03-17", 1992 => "1992-03-15", 1993 => "1993-03-21", 1994 => "1994-03-20", 1995 => "1995-03-19", 1996 => "1996-03-17", 1997 => "1997-03-16", 1998 => "1998-03-15", 1999 => "1999-03-21", 2000 => "2000-03-19", 2001 => "2001-03-18", 2002 => "2002-03-17", 2003 => "2003-03-16", 2004 => "2004-03-21", 2005 => "2005-03-20", 2006 => "2006-03-19", 2007 => "2007-03-18", 2008 => "2008-03-16", 2009 => "2009-03-15", 2010 => "2010-03-21", 2011 => "2011-03-20", 2012 => "2012-03-18", 2013 => "2013-03-17", 2014 => "2014-03-16", 2015 => "2015-03-15", 2016 => "2016-03-20", 2017 => "2017-03-19", 2018 => "2018-03-18", 2019 => "2019-03-17", 2020 => "2020-03-15", }.tap{|hsh| hsh.each{|year,date_str| hsh[year] = Date.parse(date_str) } } - DST_RULES['O'][:beg_dates] = { 1987 => "1987-10-04", 1988 => "1988-10-02", 1989 => "1989-10-01", 1990 => "1990-10-07", 1991 => "1991-10-06", 1992 => "1992-10-04", 1993 => "1993-10-03", 1994 => "1994-10-02", 1995 => "1995-10-01", 1996 => "1996-10-06", 1997 => "1997-10-05", 1998 => "1998-10-04", 1999 => "1999-10-03", 2000 => "2000-10-01", 2001 => "2001-10-07", 2002 => "2002-10-06", 2003 => "2003-10-05", 2004 => "2004-10-03", 2005 => "2005-10-02", 2006 => "2006-10-01", 2007 => "2007-10-07", 2008 => "2008-10-05", 2009 => "2009-10-04", 2010 => "2010-10-03", 2011 => "2011-10-02", 2012 => "2012-10-07", 2013 => "2013-10-06", 2014 => "2014-10-05", 2015 => "2015-10-04", 2016 => "2016-10-02", 2017 => "2017-10-01", 2018 => "2018-10-07", 2019 => "2019-10-06", 2020 => "2020-10-04", }.tap{|hsh| hsh.each{|year,date_str| hsh[year] = Date.parse(date_str) } } - DST_RULES['O'][:end_dates] = { 1987 => "1987-04-05", 1988 => "1988-04-03", 1989 => "1989-04-02", 1990 => "1990-04-01", 1991 => "1991-04-07", 1992 => "1992-04-05", 1993 => "1993-04-04", 1994 => "1994-04-03", 1995 => "1995-04-02", 1996 => "1996-04-07", 1997 => "1997-04-06", 1998 => "1998-04-05", 1999 => "1999-04-04", 2000 => "2000-04-02", 2001 => "2001-04-01", 2002 => "2002-04-07", 2003 => "2003-04-06", 2004 => "2004-04-04", 2005 => "2005-04-03", 2006 => "2006-04-02", 2007 => "2007-04-01", 2008 => "2008-04-06", 2009 => "2009-04-05", 2010 => "2010-04-04", 2011 => "2011-04-03", 2012 => "2012-04-01", 2013 => "2013-04-07", 2014 => "2014-04-06", 2015 => "2015-04-05", 2016 => "2016-04-03", 2017 => "2017-04-02", 2018 => "2018-04-01", 2019 => "2019-04-07", 2020 => "2020-04-05", }.tap{|hsh| hsh.each{|year,date_str| hsh[year] = Date.parse(date_str) } } - DST_RULES['Z'][:beg_dates] = { 1987 => "1987-09-27", 1988 => "1988-09-25", 1989 => "1989-09-24", 1990 => "1990-09-30", 1991 => "1991-09-29", 1992 => "1992-09-27", 1993 => "1993-09-26", 1994 => "1994-09-25", 1995 => "1995-09-24", 1996 => "1996-09-29", 1997 => "1997-09-28", 1998 => "1998-09-27", 1999 => "1999-09-26", 2000 => "2000-09-24", 2001 => "2001-09-30", 2002 => "2002-09-29", 2003 => "2003-09-28", 2004 => "2004-09-26", 2005 => "2005-09-25", 2006 => "2006-09-24", 2007 => "2007-09-30", 2008 => "2008-09-28", 2009 => "2009-09-27", 2010 => "2010-09-26", 2011 => "2011-09-25", 2012 => "2012-09-30", 2013 => "2013-09-29", 2014 => "2014-09-28", 2015 => "2015-09-27", 2016 => "2016-09-25", 2017 => "2017-09-24", 2018 => "2018-09-30", 2019 => "2019-09-29", 2020 => "2020-09-27", }.tap{|hsh| hsh.each{|year,date_str| hsh[year] = Date.parse(date_str) } } - DST_RULES['Z'][:end_dates] = { 1987 => "1987-04-05", 1988 => "1988-04-03", 1989 => "1989-04-02", 1990 => "1990-04-01", 1991 => "1991-04-07", 1992 => "1992-04-05", 1993 => "1993-04-04", 1994 => "1994-04-03", 1995 => "1995-04-02", 1996 => "1996-04-07", 1997 => "1997-04-06", 1998 => "1998-04-05", 1999 => "1999-04-04", 2000 => "2000-04-02", 2001 => "2001-04-01", 2002 => "2002-04-07", 2003 => "2003-04-06", 2004 => "2004-04-04", 2005 => "2005-04-03", 2006 => "2006-04-02", 2007 => "2007-04-01", 2008 => "2008-04-06", 2009 => "2009-04-05", 2010 => "2010-04-04", 2011 => "2011-04-03", 2012 => "2012-04-01", 2013 => "2013-04-07", 2014 => "2014-04-06", 2015 => "2015-04-05", 2016 => "2016-04-03", 2017 => "2017-04-02", 2018 => "2018-04-01", 2019 => "2019-04-07", 2020 => "2020-04-05", }.tap{|hsh| hsh.each{|year,date_str| hsh[year] = Date.parse(date_str) } } - - def self.parse_boundary(str, *args) - require 'chronic' - rank, weekday, art, month = str.split(/\s+/) - if rank == 'last' - val = ['5th', '4th'].map{|wk| Chronic.parse([wk, weekday, art, month].join(' '), *args) }.compact.first - else - val = Chronic.parse(str, *args) - end - Date.new(val.year, val.month, val.day) - end - - def self.beg_date(rule, year) - DST_RULES[rule][:beg_dates][year] ||= parse_boundary(DST_RULES[rule][:beg_doy], now: Time.utc(year, 1, 1)) - end - def self.end_date(rule, year) - DST_RULES[rule][:end_dates][year] ||= parse_boundary(DST_RULES[rule][:end_doy], now: Time.utc(year, 1, 1)) - end - - def self.table - %w[E A S O Z].each{|rule| YEARS.each{|year| beg_date(rule, year) ; end_date(rule, year) } } - DST_RULES - end - - def self.dst?(rule, val) - early = beg_date(rule, val.year) - late = end_date(rule, val.year) - in_range = (val >= early) && (val < late) - DST_RULES[rule][:southern] ? (not in_range) : in_range - end - - end -end diff --git a/munging/airline_flights/topcities.rb b/munging/airline_flights/topcities.rb deleted file mode 100755 index 223118b..0000000 --- a/munging/airline_flights/topcities.rb +++ /dev/null @@ -1,167 +0,0 @@ -#!/usr/bin/env ruby -require('rake') -require_relative('../../rake_helper') -require_relative './models' - -Pathname.register_paths( - af_data: [:data, 'airline_flights'], - af_work: [:work, 'airline_flights'], - af_code: File.dirname(__FILE__), - airport_identifiers: [:af_work, "airport_identifiers.tsv" ], - ) - -AIRPORTS_TO_MATCH = [ - [ 'Tokyo', 1, "HND", ], - [ 'Guangzhou', 2, "CAN", ], - [ 'Seoul', 3, "ICN", ], - [ 'Shanghai', 4, "PVG", ], - [ 'Mexico.*City', 5, "MEX", ], - [ 'Delhi', 6, "DEL", ], - [ 'New.*York', 7, "JFK", ], - [ 'S.*o.*Paulo', 8, "GRU", ], - [ 'Mumbai|Bombay', 9, "BOM", ], - [ 'Manila', 10, "MNL", ], - [ 'Jakarta', 11, "CGK", ], - [ 'Los.*Angeles', 12, "LAX", ], - [ 'Karachi', 13, "KHI", ], - [ 'Osaka', 14, "KIX", ], - [ 'Beijing', 15, "PEK", ], - [ 'Moscow', 16, "SVO", ], - [ 'Cairo', 17, "CAI", ], - [ 'Kolkata|Calcutta', 18, "CCU", ], - [ 'Buenos.*Aires', 19, "EZE", ], - [ 'Dhaka', 20, "DAC", ], - [ 'Bangkok', 21, "BKK", ], - [ 'Tehran|Abyek', 22, "IKA", ], - [ 'Istanbul', 23, "IST", ], - [ 'Janeiro', 24, "GIG", ], - [ 'London', 25, "LHR", ], - [ 'Lagos', 26, "LOS", ], - [ 'Paris', 27, "CDG", ], - [ 'Chicago', 28, "ORD", ], - [ 'Kinshasa', 29, "FIH", ], - [ 'Lima', 30, "LIM", ], - [ 'Wuhan', 31, "WUH", ], - [ 'Bangalore', 32, "BLR", ], - [ 'Bogot.*', 33, "BOG", ], - [ 'Taipei', 34, "TSA", ], - [ 'Washington|Arling', 35, "DCA", ], - [ 'Johannesburg', 36, "JNB", ], - [ 'Saigon|Ho.Chi.M', 37, "SGN", ], - [ 'San.*Francisco', 38, "SFO", ], - [ 'Boston', 39, "BOS", ], - [ 'Hong.*Kong', 40, "HKG", ], - [ 'Baghdad', 41, "SDA", ], - [ 'Madrid', 42, "MAD", ], - [ 'Singapore', 43, "SIN", ], - [ 'Kuala.*Lumpur', 44, "KUL", ], - [ 'Chongqing|Chung.*', 45, "CKG", ], - [ 'Santiago', 46, "SCL", ], - [ 'Toronto', 47, "YYZ", ], - [ 'Riyadh', 48, "RUH", ], - [ 'Atlanta', 49, "ATL", ], - [ 'Miami', 50, "MIA", ], - [ 'Detroit', 51, "DTW", ], - [ 'St..*Petersburg', 52, "LED", ], - [ 'Khartoum', 53, "KRT", ], - [ 'Sydney', 54, "SYD", ], - [ 'Milan', 55, "MXP", ], - [ 'Abidjan', 56, "ABJ", ], - [ 'Barcelona', 57, "BCN", ], - [ 'Nairobi', 58, "NBO", ], - [ 'Caracas', 59, "CCS", ], - [ 'Monterrey', 60, "MTY", ], - [ 'Phoenix', 61, "PHX", ], - [ 'Berlin', 62, "TXL", ], - [ 'Melbourne', 63, "MEL", ], - [ 'Casablanca', 64, "CMN", ], - [ 'Montreal', 65, "YUL", ], - [ 'Salvador', 66, "SSA", ], - [ 'Rome', 67, "FCO", ], - [ 'Kiev', 68, "KBP", ], - [ 'Ad+is.*Ab.ba', 69, "ADD", ], - [ 'Denver', 70, "DEN", ], - [ 'St.*Louis', 71, "STL", ], - [ 'Dakar', 72, "DKR", ], - [ 'San.*Juan', 73, "SJU", ], - [ 'Vancouver', 74, "YVR", ], - [ 'Tel.*Aviv', 75, "TLV", ], - [ 'Tunis', 76, "TUN", ], - [ 'Portland', 77, "PDX", ], - [ 'Manaus', 78, "MAO", ], - [ 'Calgary', 79, "YYC", ], - [ 'Halifax', 80, "YHZ", ], - [ 'Prague', 81, "PRG", ], - [ 'Copenhagen', 82, "CPH", ], - [ 'Djibouti', 83, "JIB", ], - [ 'Quito', 84, "UIO", ], - [ 'Helsinki', 85, "HEL", ], - [ 'Papeete|Tahiti', 86, "PPT", ], - [ 'Frankfurt', 87, "FRA", ], - [ 'Reykjavik', 88, "RKV", ], - [ 'Riga', 89, "RIX", ], - [ 'Antananarivo', 90, "TNR", ], - [ 'Amsterdam', 91, "AMS", ], - [ 'Bucharest', 92, "OTP", ], - [ 'Novosibirsk', 93, "OVB", ], - [ 'Kigali', 94, "KGL", ], - [ 'Dushanbe', 95, "DYU", ], - [ 'Dubai', 96, "DXB", ], - [ 'Bermuda', 97, "BDA", ], - [ 'Anchorage', 98, "ANC", ], - [ 'Austin', 99, "AUS", ], - [ 'Honolulu', 100, "HNL", ], - [ 'Apia', 101, "FGI", ], - [ 'Vienna', 102, "VIE", ], - [ 'Brussels', 103, "BRU", ], - [ 'Munich', 104, "MUC", ], - [ 'Dublin', 105, "DUB", ], - [ 'Doha', 106, "DOH", ], - [ 'Taipei', 107, "TPE", ], - [ 'Yakutsk', 108, "YKS", ], - [ 'Z.rich', 109, "ZRH", ], - [ 'Manchester', 110, "MAN", ], - [ 'Houston', 111, "IAH", ], - [ 'Charlotte', 112, "CLT", ], - [ 'Dallas', 113, "DFW", ], - [ 'Las.*Vegas', 114, "LAS", ], - [ 'Antalya', 115, "AYT", ], - [ 'Auckland', 116, "AKL", ], -] - -MATCHED_AIRPORTS = {} -MATCH_ON_IATA = {} -MATCH_ON_CITY = {} -match_on_city_names = [] - -AIRPORTS_TO_MATCH.each do |name, idx, iata| - hsh = {iata: iata, re: Regexp.new(name, 'i'), name: name, idx: idx} - if iata.present? - MATCH_ON_IATA[iata] = hsh - else - match_on_city_names << name - MATCH_ON_CITY[hsh[:re]] = hsh - end -end -match_on_city_re = Regexp.new(match_on_city_names.join('|')) - -Airport.load_tsv(:airport_identifiers) do |airport| - airport.name = airport.name[0..30] - if MATCH_ON_IATA.include?(airport.iata) - hsh = MATCH_ON_IATA[airport.iata] - warn [hsh.values, airport.to_tsv].flatten.join("\t") unless hsh[:re] =~ airport.city - MATCHED_AIRPORTS[hsh[:idx]] = airport - # elsif (airport.city =~ match_on_city_re) - # MATCH_ON_CITY.each do |re, hsh| - # if (airport.city =~ re) - # puts [airport.to_tsv, hsh[:name], hsh[:idx]].join("\t") - # end - # end - end -end - -AIRPORTS_TO_MATCH.each do |name, idx, iata| - # next if MATCHED_AIRPORTS[idx] - airport_str = MATCHED_AIRPORTS[idx] ? MATCHED_AIRPORTS[idx].to_tsv : "\t\t\t\t\t\t\t\t\t\t\t\t" - puts [airport_str, name, "", idx].join("\t") -end diff --git a/munging/airports/40_wbans.txt b/munging/airports/40_wbans.txt deleted file mode 100755 index 4b7ab72..0000000 --- a/munging/airports/40_wbans.txt +++ /dev/null @@ -1,40 +0,0 @@ -13874 -13874 -14739 -13881 -13881 -03017 -03017 -03927 -03927 -94847 -94847 -14734 -14734 -53127 -99999 -12960 -12960 -94789 -94789 -23169 -23169 -23174 -23174 -12815 -12815 -12839 -12839 -14922 -14922 -94846 -94846 -13739 -13739 -23183 -23183 -99999 -24233 -24233 -23234 -23234 diff --git a/munging/airports/filter_weather_reports.rb b/munging/airports/filter_weather_reports.rb deleted file mode 100755 index ca231f1..0000000 --- a/munging/airports/filter_weather_reports.rb +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env ruby -# encoding:UTF-8 - -require 'wukong' -require 'pathname' -load '/home/dlaw/dev/wukong/examples/wikipedia/munging_utils.rb' - -module WeatherFilter - class Mapper < Wukong::Streamer::LineStreamer - - WBAN_FILENAME = '/home/dlaw/dev/wukong/examples/airports/wbans.txt' - USA_WBAN_FILENAME = '/home/dlaw/dev/wukong/examples/airports/usa_wbans.txt' - FORTY_WBANS_FILENAME = '/home/dlaw/dev/wukong/examples/airports/40_wbans.txt' - - def initialize - @wbans = [] - wban_file = File.open(FORTY_WBANS_FILENAME) - wban_file.each_line do |line| - @wbans << line[0..-2] - end - end - - def process line - MungingUtils.guard_encoding(line) do |clean_line| - wban = Pathname(ENV['map_input_file']).basename.to_s.split('-')[1] - if @wbans.include? wban - yield line - end - end - end - end -end - -Wukong::Script.new( - WeatherFilter::Mapper, - nil -).run diff --git a/munging/airports/join.pig b/munging/airports/join.pig deleted file mode 100644 index 1fd7cd9..0000000 --- a/munging/airports/join.pig +++ /dev/null @@ -1,31 +0,0 @@ -/* This was a misguided attempt at generating a list of WBAN IDs assigned to airports by filtering the mshr_enhanced - * and joining it with isd_stations. This is misguided because mshr_enhanced contains much more data than isd_stations, - * and also contains multiple entries for each weather station, making it non-obvious how best to join the data. - * A simpler and better approach, taken in usa_wbans.pig and wbans.pig, is to filter and unique mshr_enhanced. - */ - -mshr = LOAD '/Users/dlaw/Desktop/stations/mshr_enhanced.tsv' AS - (source_id:chararray, source:chararray, begin_date:chararray, end_date:chararray, station_status:chararray, - ncdcstn_id:chararray, icao_id:chararray, wban_id:chararray, faa_id:chararray, nwsli_id:chararray, wmo_id:chararray, - coop_id:chararray, transmittal_id:chararray, ghcnd_id:chararray, name_principal:chararray, name_principal_short:chararray, - name_coop:chararray, name_coop_short:chararray, name_publication:chararray, name_alias:chararray, nws_clim_div:chararray, - nws_clim_div_name:chararray, state_prov:chararray, county:chararray, nws_st_code:chararray, fips_country_code:chararray, - fips_country_name:chararray, nws_region:chararray, nws_wfo:chararray, elev_ground:chararray, elev_ground_unit:chararray, - elev_barom:chararray, elev_barom_unit:chararray, elev_air:chararray, elev_air_unit:chararray, elev_zerodat:chararray, - elev_zerodat_unit:chararray, elev_unk:chararray, elev_unk_unit:chararray, lat_dec:chararray, lon_dec:chararray, - lat_lon_precision:chararray, relocation:chararray, utc_offset:chararray, obs_env:chararray, platform:chararray); - -mshr_grouped = GROUP mshr BY (icao_id, wban_id, faa_id, begin_date, end_date); -mshr_final = FOREACH mshr_grouped GENERATE FLATTEN(group) AS (wban_id, icao_id, faa_id, begin_date, end_date); - -stations = LOAD '/Users/dlaw/Desktop/stations/stations.tsv' AS - (usaf_id:chararray, wban_id:chararray, station_name:chararray, wmo_country_id:chararray, fips_country_id:chararray, - state:chararray, icao_call_sign:chararray, latitude:chararray, longitude:chararray, elevation:chararray, begin:chararray, end:chararray); - -first_pass_j = JOIN mshr_final BY (wban_id) RIGHT OUTER, stations BY (wban_id); -first_pass_f = FILTER first_pass_j BY (mshr_final::icao_id is not null); -first_pass = FOREACH first_pass_f GENERATE - stations::wban_id, mshr_final::icao_id, stations::icao_call_sign, stations::usaf_id, mshr_final::faa_id, - stations::station_name, stations::wmo_country_id, stations::fips_country_id, stations::state, stations::latitude, stations::longitude, stations::elevation, stations::begin, stations::end; - -STORE first_pass INTO '/Users/dlaw/Desktop/stations/airport_stations'; diff --git a/munging/airports/to_tsv.rb b/munging/airports/to_tsv.rb deleted file mode 100755 index 1f04a90..0000000 --- a/munging/airports/to_tsv.rb +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env ruby -load 'flat/lib/flat.rb' - -# This is a script that uses the flat file parser -# to transform the mshr enhanced data file and the -# ISD stations list from fixed-width to .tsv. -# The script must be in the same directory with -# mshr_enhanced.txt, isd_stations.txt, and the -# flat file parsing library to work. - -# mshr-enhanced format description can be found at -# ftp://ftp.ncdc.noaa.gov/pub/data/homr/docs/MSHR_Enhanced_Table.txt - -# The actual mshr-enhanced table can be found at -# http://www.ncdc.noaa.gov/homr/file/mshr_enhanced.txt.zip - -# isd_stations can be found at -# http://www1.ncdc.noaa.gov/pub/data/noaa/ish-history.txt - -# Format strings -MSHR_FORMAT_STRING = %{s20 s10 s8 s8 s20 s20 s20 s20 s20 s20 s20 s20 s20 s20 - s100 s30 s100 s30 s100 s100 s10 s40 s10 s50 s2 s2 s100 - s30 s10 s40 s20 s40 s20 s40 s20 s40 s20 s40 s20 s20 s20 - s10 s62 s16 s40 s100} -ISD_FORMAT_STRING = %{s6 s5 s29 s2 s2 s2 s5 D6e3 D7e3 D6e1 _2 s8 s8} - -# Parse mshr_enhanced -mshr_parser = Flat.create_parser(MSHR_FORMAT_STRING,1) -mshr_parser.file_to_tsv('mshr_enhanced.txt','mshr_enhanced.tsv') - -# Parse isd_stations -isd_parser = Flat.create_parser(ISD_FORMAT_STRING,1,false) -isd_parser.file_to_tsv('isd_stations.txt','isd_stations.tsv') diff --git a/munging/airports/usa_wbans.pig b/munging/airports/usa_wbans.pig deleted file mode 100644 index 4c07f69..0000000 --- a/munging/airports/usa_wbans.pig +++ /dev/null @@ -1,19 +0,0 @@ -// Outputs a list of WBAN ids that are assigned to airports in the USA - -mshr = LOAD '/Users/dlaw/Desktop/stations/mshr_enhanced.tsv' AS - (source_id:chararray, source:chararray, begin_date:chararray, end_date:chararray, station_status:chararray, - ncdcstn_id:chararray, icao_id:chararray, wban_id:chararray, faa_id:chararray, nwsli_id:chararray, wmo_id:chararray, - coop_id:chararray, transmittal_id:chararray, ghcnd_id:chararray, name_principal:chararray, name_principal_short:chararray, - name_coop:chararray, name_coop_short:chararray, name_publication:chararray, name_alias:chararray, nws_clim_div:chararray, - nws_clim_div_name:chararray, state_prov:chararray, county:chararray, nws_st_code:chararray, fips_country_code:chararray, - fips_country_name:chararray, nws_region:chararray, nws_wfo:chararray, elev_ground:chararray, elev_ground_unit:chararray, - elev_barom:chararray, elev_barom_unit:chararray, elev_air:chararray, elev_air_unit:chararray, elev_zerodat:chararray, - elev_zerodat_unit:chararray, elev_unk:chararray, elev_unk_unit:chararray, lat_dec:chararray, lon_dec:chararray, - lat_lon_precision:chararray, relocation:chararray, utc_offset:chararray, obs_env:chararray, platform:chararray); - -mshr_grouped = GROUP mshr BY (icao_id, wban_id, faa_id, fips_country_code); -mshr_flattened = FOREACH mshr_grouped GENERATE FLATTEN(group) AS (wban_id, icao_id, faa_id, fips_country_code); -mshr_filtered = FILTER mshr_flattened BY (icao_id is not null and wban_id is not null and fips_country_code == 'US'); - -mshr_final = FOREACH mshr_filtered GENERATE wban_id; -STORE mshr_final INTO '/Users/dlaw/Desktop/stations/usa_wbans'; diff --git a/munging/airports/usa_wbans.txt b/munging/airports/usa_wbans.txt deleted file mode 100644 index cbadf40..0000000 --- a/munging/airports/usa_wbans.txt +++ /dev/null @@ -1,2157 +0,0 @@ -03069 -94119 -04204 -94991 -53928 -54779 -03196 -63844 -63875 -53929 -64773 -04201 -94076 -04849 -63834 -94037 -63839 -63876 -63877 -03051 -63878 -12981 -12971 -53866 -53904 -12978 -53862 -94061 -63848 -63879 -63847 -53990 -53966 -53972 -03053 -94298 -03032 -63827 -92807 -92807 -03064 -93226 -63880 -94993 -53964 -94623 -94070 -94943 -04862 -12832 -03974 -94299 -14737 -13962 -23050 -14929 -14929 -03019 -13869 -04863 -14756 -53991 -54916 -13959 -24283 -93730 -94997 -04847 -53930 -93940 -03970 -03043 -94998 -13705 -03034 -03064 -94968 -93915 -93065 -04827 -04993 -54770 -53909 -14762 -12804 -03820 -13873 -24044 -04828 -04864 -14930 -94999 -53931 -63853 -53175 -53870 -24015 -93773 -93773 -14813 -14735 -12932 -53146 -93097 -03958 -94910 -23061 -24160 -63833 -23047 -23047 -13870 -53989 -54816 -94989 -12899 -13871 -93846 -94974 -14847 -94975 -04850 -14736 -93067 -93227 -12897 -13701 -94849 -04837 -04837 -53983 -54917 -53932 -03949 -54768 -53915 -94889 -93991 -12953 -04808 -94790 -04865 -94987 -53865 -93073 -03973 -54754 -93796 -94224 -54809 -94929 -13874 -03035 -13958 -04825 -94287 -14946 -14605 -04901 -04902 -23224 -03892 -13904 -13958 -14897 -93797 -53933 -03812 -14777 -23191 -04903 -94190 -53959 -04205 -04904 -14910 -63901 -04779 -13861 -53129 -94815 -54772 -93216 -13944 -14775 -13803 -24119 -24119 -54817 -12971 -54904 -93942 -94946 -93240 -53881 -23159 -12803 -94961 -14816 -03036 -14740 -94702 -14702 -94871 -04751 -24028 -24234 -23155 -13838 -04905 -03024 -53882 -04725 -04725 -14606 -23005 -14616 -94055 -13876 -94289 -94793 -94947 -23157 -63835 -24033 -24011 -24011 -04839 -13820 -03065 -93068 -14958 -04842 -53145 -53988 -24130 -23036 -04853 -12982 -13726 -12818 -03872 -94902 -03859 -23158 -24217 -54760 -23225 -13802 -94046 -24180 -03893 -54831 -94700 -03999 -53823 -13897 -25312 -94185 -04906 -24131 -24267 -14739 -12809 -54765 -03044 -24164 -53918 -12917 -53883 -53992 -94938 -14931 -12919 -12919 -13904 -14815 -24135 -64705 -13970 -14742 -14964 -14733 -14733 -23156 -23152 -04866 -93783 -04813 -54921 -03959 -94282 -53934 -54733 -93943 -93808 -93721 -54922 -94054 -13814 -24133 -03182 -92804 -12981 -24132 -04867 -04839 -04868 -14817 -13883 -13883 -24046 -14895 -23051 -14607 -04907 -03732 -04908 -04909 -03037 -13825 -94625 -04101 -53884 -23254 -03038 -54764 -14966 -93129 -94977 -53916 -24017 -23007 -54743 -24286 -14703 -53850 -13884 -93069 -54901 -54828 -93967 -04805 -03935 -94866 -03914 -13882 -53128 -53935 -93736 -13880 -93203 -14990 -04910 -93809 -04869 -93798 -03802 -04911 -03881 -04912 -54920 -03894 -14820 -14820 -54832 -13999 -03904 -94266 -13881 -53845 -23136 -14821 -94870 -14858 -94940 -54923 -04913 -13984 -23086 -93033 -03179 -13981 -93075 -24045 -24136 -12867 -14745 -04914 -93037 -12947 -03945 -13983 -93799 -03701 -24089 -03960 -53981 -03039 -53936 -03027 -54905 -93134 -94199 -94624 -93718 -53860 -12924 -12924 -03177 -53912 -13866 -12879 -93842 -03932 -04915 -03847 -24137 -12833 -03727 -53867 -53993 -63840 -94032 -93814 -23077 -24202 -23008 -04870 -94890 -13941 -94979 -53902 -54774 -14751 -24018 -24018 -94204 -03164 -94056 -93728 -12834 -23161 -13960 -13728 -54791 -93235 -93815 -63881 -94908 -13743 -53852 -13985 -13985 -54781 -03887 -04916 -03017 -23062 -53925 -14822 -94119 -25515 -04851 -03927 -22015 -94057 -13839 -93843 -93042 -04135 -24012 -93771 -04871 -14747 -03073 -22001 -14913 -14913 -54833 -24138 -24219 -23109 -93784 -23078 -03994 -03724 -94984 -63903 -13837 -53885 -04917 -94891 -13707 -93770 -94878 -94892 -24103 -03702 -63842 -03160 -03976 -54844 -93005 -22010 -14933 -94704 -23162 -94962 -53905 -03991 -53853 -94847 -04830 -53937 -53938 -93026 -04872 -04787 -03070 -94928 -94982 -94982 -54924 -03184 -53910 -54734 -04918 -54786 -03809 -13910 -04919 -03987 -53965 -03049 -14905 -94239 -14991 -03983 -03983 -12983 -04920 -13786 -13787 -73805 -23098 -03703 -03197 -23114 -23179 -94721 -94050 -53864 -12906 -54758 -04873 -23063 -03844 -54838 -93076 -63843 -53886 -24213 -04806 -13729 -24121 -63882 -93992 -03977 -14748 -24220 -94964 -23044 -23154 -54757 -24165 -13989 -03165 -03704 -53112 -13909 -53887 -24193 -04845 -54766 -53876 -04921 -53851 -24141 -14608 -03020 -53872 -14860 -25318 -12961 -04874 -14824 -94853 -13935 -03756 -94971 -53110 -04875 -04922 -63872 -24221 -94195 -92808 -04923 -93817 -04111 -53825 -94726 -53939 -93719 -14734 -12971 -53888 -53842 -03705 -12836 -53114 -03706 -53969 -53907 -93735 -93996 -14914 -93193 -93740 -93737 -94969 -24146 -23167 -24047 -94963 -94015 -53831 -13730 -03981 -03981 -03022 -14825 -04876 -04924 -94006 -03736 -53819 -53819 -04925 -94966 -13840 -53841 -03185 -04926 -94276 -03124 -54792 -04780 -04927 -94868 -13763 -54818 -04840 -03103 -12849 -13744 -03918 -13921 -93733 -14704 -23090 -12835 -94957 -94035 -94062 -14826 -53889 -23256 -94933 -13920 -14719 -04928 -04877 -12895 -53890 -54787 -13947 -94948 -14944 -14944 -04929 -13945 -13964 -23091 -04930 -53113 -03018 -13807 -13961 -03888 -03166 -04836 -03707 -14827 -53891 -54793 -03985 -03985 -12885 -03734 -93993 -54773 -03896 -13975 -93764 -12993 -13940 -94959 -03148 -23168 -94023 -23064 -53940 -03195 -04999 -53977 -23055 -24087 -13764 -24157 -53126 -53892 -24048 -53866 -24112 -14916 -14750 -03901 -14976 -94008 -94008 -64774 -04931 -12876 -23066 -03025 -04843 -53907 -23065 -23065 -93929 -13939 -04854 -12923 -53941 -13886 -94992 -03056 -24142 -12990 -94626 -93057 -12816 -53913 -14707 -04878 -24146 -53984 -93874 -94919 -14898 -14898 -53874 -24201 -14935 -03902 -03992 -94860 -94860 -13713 -14829 -13723 -03870 -03870 -14715 -24143 -53893 -53942 -93007 -23081 -94833 -03030 -53838 -13926 -03929 -13978 -03708 -53837 -24051 -53979 -63889 -53967 -04932 -03186 -54762 -04807 -53820 -54850 -53855 -13833 -03709 -93986 -12994 -04933 -04934 -03908 -04935 -94025 -12962 -03023 -03710 -53894 -94038 -63873 -03961 -14752 -93747 -93706 -93218 -03980 -93046 -03167 -94931 -54728 -24101 -94261 -04998 -53119 -53869 -13927 -03810 -93990 -14894 -24144 -03933 -53895 -23002 -94187 -53896 -53127 -04936 -53111 -03711 -93034 -14936 -13806 -03962 -12918 -94745 -94745 -53839 -94225 -63836 -53970 -04113 -03712 -12904 -13971 -03852 -63852 -53897 -93729 -94949 -93757 -12826 -03856 -94814 -64761 -03860 -53857 -63883 -03868 -93823 -14609 -12927 -26522 -13986 -23170 -14758 -94012 -93228 -92809 -54790 -63837 -94720 -12979 -94973 -03968 -04829 -04997 -04857 -53842 -63884 -03923 -93738 -04724 -12960 -94073 -04937 -94990 -03928 -03928 -53118 -24145 -64706 -94039 -53943 -53135 -93167 -04879 -93785 -04989 -53898 -54767 -53944 -04880 -25626 -04938 -03972 -13781 -13748 -13841 -13841 -04833 -04833 -24091 -94893 -93819 -93819 -53972 -23040 -14918 -23141 -93807 -23194 -03984 -14937 -53899 -03144 -14778 -14938 -54827 -92813 -94014 -93726 -04781 -04881 -04826 -94761 -03026 -23104 -23104 -94926 -94623 -12969 -04844 -93909 -93194 -93244 -54772 -24166 -03940 -53945 -13889 -13889 -03953 -13973 -53978 -94051 -03963 -04110 -94789 -03730 -53824 -04720 -63885 -04939 -03889 -03889 -13987 -04940 -14919 -03713 -14834 -63801 -53971 -04726 -53946 -94854 -53947 -53966 -53982 -14833 -54906 -63846 -04882 -03714 -14989 -25314 -24223 -53995 -03013 -14835 -12883 -93091 -14836 -24022 -23169 -03950 -23174 -23042 -23042 -54735 -24023 -23020 -53963 -13776 -12976 -54925 -03937 -03937 -54736 -13812 -04883 -94765 -12819 -94709 -93820 -13702 -93987 -13976 -14732 -23129 -03821 -24148 -94128 -53973 -53844 -03875 -23067 -03731 -93010 -13963 -53813 -53813 -14623 -04941 -03715 -94285 -04114 -53919 -94236 -53975 -24021 -14939 -14886 -04809 -63802 -14921 -54737 -94049 -24172 -64752 -53861 -04831 -04831 -13810 -03849 -23243 -04849 -12907 -03930 -04942 -93041 -04108 -14920 -13829 -23112 -03982 -03982 -13902 -93774 -53964 -23111 -93812 -04995 -12975 -23285 -24150 -04943 -23054 -53827 -54826 -53801 -03997 -94991 -94723 -24149 -24036 -13809 -93767 -04944 -14994 -53879 -93009 -13733 -54926 -03952 -03952 -63803 -63851 -93242 -23023 -23023 -03818 -03817 -94296 -94052 -94894 -14845 -94048 -93919 -23208 -54820 -23257 -12810 -03947 -94040 -03813 -12815 -14940 -03071 -93810 -63804 -14711 -14819 -54911 -93782 -24152 -13865 -13893 -23203 -53996 -14891 -12959 -94985 -24225 -03716 -13864 -04945 -04789 -13895 -04884 -53817 -13736 -53859 -94950 -03936 -23206 -24215 -14710 -03183 -93768 -12839 -94011 -94960 -94895 -13735 -94988 -04946 -13988 -14839 -14840 -63805 -03811 -53997 -93953 -14954 -04834 -12838 -12838 -93950 -24151 -04992 -23176 -14923 -63886 -24154 -24037 -14610 -13942 -03181 -54788 -94976 -94887 -03858 -54738 -94273 -03040 -94896 -04855 -13894 -13894 -23258 -04885 -24013 -26621 -04948 -54789 -94705 -94983 -04947 -04949 -14753 -13766 -94850 -94850 -13827 -13734 -93035 -93765 -54907 -63859 -53832 -23259 -13896 -14837 -24153 -14922 -94725 -54746 -04103 -12916 -14804 -12896 -93013 -93744 -53802 -54780 -63806 -94897 -04104 -54723 -24106 -04950 -93236 -04951 -54771 -93894 -94986 -94724 -03865 -94869 -24110 -63807 -93985 -04952 -14755 -63887 -93136 -53921 -13821 -04953 -53826 -03131 -94182 -03041 -13717 -93205 -04954 -94852 -93831 -93901 -12958 -14855 -93727 -53847 -14780 -63890 -53917 -13762 -03154 -53848 -93102 -13911 -12926 -13750 -12946 -23239 -13721 -14611 -93104 -93837 -12925 -93776 -23199 -93743 -53849 -13754 -03190 -53115 -93107 -23110 -03866 -12974 -24228 -03855 -93839 -93839 -12928 -12850 -94297 -03853 -23240 -93115 -93841 -93116 -93111 -93114 -13769 -93117 -03757 -23244 -24255 -12973 -93121 -14793 -94728 -13773 -03145 -93832 -93101 -14790 -93112 -94299 -93241 -93753 -23230 -93217 -94980 -12861 -53948 -03741 -03031 -94958 -04886 -54912 -14949 -14941 -93775 -94063 -53854 -24126 -26632 -53833 -03967 -53121 -13967 -54807 -53949 -03717 -94703 -14622 -94017 -24227 -03196 -94951 -63808 -53822 -04955 -14942 -03718 -94197 -92822 -04956 -04957 -93040 -24162 -24285 -03102 -03737 -54819 -54919 -12882 -63809 -53868 -54752 -04958 -04959 -94846 -54756 -13737 -53998 -94746 -12841 -04224 -53962 -14808 -94855 -04804 -94927 -24284 -14950 -04106 -03948 -93210 -04960 -94994 -04961 -53803 -54704 -93786 -64707 -93110 -04962 -03850 -04887 -03957 -03163 -54813 -94898 -94899 -94173 -03170 -04109 -04856 -12968 -24222 -03816 -03816 -13846 -23289 -27506 -93988 -04742 -64776 -54913 -12844 -04832 -92824 -54852 -04963 -53863 -24155 -24155 -24229 -04964 -54778 -03058 -54927 -03882 -03162 -12812 -13783 -04852 -93741 -13739 -04888 -24024 -23183 -14842 -53808 -12873 -24156 -12957 -24025 -94823 -03804 -94967 -14962 -12986 -94733 -14841 -23182 -25625 -92805 -94086 -13969 -94732 -54914 -13899 -04889 -13923 -13714 -03180 -03931 -03993 -03975 -14757 -03052 -03998 -53950 -14604 -53858 -04965 -93209 -23184 -63810 -63874 -25330 -93955 -14761 -24163 -14763 -13868 -04743 -93983 -93138 -12935 -93714 -26526 -94817 -12936 -93978 -23149 -54782 -93058 -93141 -94129 -03021 -64708 -14765 -53951 -24174 -23097 -03954 -54915 -53952 -04838 -14764 -94263 -54769 -03045 -54825 -04134 -94818 -03171 -14806 -24090 -12995 -93772 -03971 -24231 -24216 -12984 -24006 -54928 -03738 -24257 -14712 -14767 -04966 -24230 -94925 -13722 -23021 -94022 -94107 -94822 -04967 -53104 -04803 -63811 -93232 -13740 -03846 -03016 -23069 -23119 -24061 -24061 -94601 -12972 -53953 -24027 -53834 -14717 -64775 -93801 -03735 -54821 -12911 -04968 -23043 -53120 -23185 -54824 -94248 -13741 -14768 -53954 -04969 -23009 -04970 -54909 -53965 -04890 -14864 -03029 -53999 -92802 -04891 -04971 -93997 -53958 -14925 -63841 -12894 -23052 -94288 -53920 -63812 -94737 -53908 -14992 -93759 -24057 -23901 -94194 -54834 -63813 -04972 -93781 -04113 -23232 -93084 -23049 -23188 -63814 -12921 -03822 -94836 -54910 -23190 -23122 -04841 -14848 -93206 -94033 -93720 -04741 -23237 -04973 -23187 -93821 -03192 -03178 -94028 -24233 -03172 -53143 -14770 -03969 -53904 -94290 -12854 -94990 -94176 -64709 -23234 -03719 -04107 -04996 -64710 -13995 -13995 -63888 -92814 -12977 -04974 -13925 -23186 -93760 -04975 -94227 -24029 -13957 -13957 -03763 -24259 -23293 -23293 -93027 -23034 -23034 -04892 -24114 -12909 -03012 -14846 -04976 -24127 -24232 -53955 -04893 -53141 -94740 -03919 -93998 -03879 -93937 -63815 -93225 -24196 -93197 -24237 -54785 -23273 -93184 -03050 -93911 -23233 -04112 -24030 -03059 -03720 -03101 -53117 -93804 -04201 -03028 -92806 -93822 -13966 -14972 -04894 -93231 -53985 -23902 -12871 -93083 -53906 -13849 -12970 -13878 -14847 -14926 -04895 -13993 -13994 -14927 -23213 -23274 -92815 -04824 -94161 -03966 -03721 -23202 -54908 -14943 -93063 -24258 -63816 -13824 -14714 -03965 -23033 -24235 -54930 -14771 -13930 -04129 -12975 -12957 -03044 -12984 -12980 -23070 -54777 -63817 -03938 -63818 -92801 -23048 -93806 -24207 -93045 -03722 -24241 -04848 -94741 -54822 -03011 -04102 -93778 -94087 -13919 -04896 -94274 -12898 -54931 -53914 -54953 -93805 -93805 -03979 -12888 -63819 -04977 -03191 -03122 -04978 -03878 -94830 -13996 -94053 -12842 -92825 -23153 -93984 -94978 -53956 -13877 -93201 -53911 -03104 -03723 -24242 -54823 -14792 -12886 -13968 -73804 -93862 -23160 -14850 -94956 -93230 -03996 -94178 -04979 -03986 -13977 -13972 -13891 -54776 -63825 -94194 -04114 -04116 -04110 -94184 -94281 -94794 -63820 -03742 -94047 -04897 -94981 -14880 -94240 -93989 -63821 -23275 -64753 -04980 -03946 -53901 -04898 -54739 -53806 -23903 -53903 -14787 -12985 -53871 -13857 -53856 -93780 -93214 -03978 -93234 -93241 -12912 -23131 -92816 -63822 -94030 -53123 -13997 -93144 -53818 -93999 -03838 -93845 -54829 -03995 -23130 -94930 -53873 -13858 -04846 -63823 -12843 -54740 -04858 -04981 -24032 -03042 -53101 -63824 -94298 -92817 -93795 -04982 -04983 -63845 -04899 -63832 -04221 -93775 -03725 -26618 -93739 -53986 -03832 -24198 -53130 -03159 -13932 -24128 -13860 -14706 -24062 -14794 -23277 -14615 -03726 -53957 -94163 -92802 -53976 -12868 -53922 -04984 -14853 -24243 -94911 -14852 -23195 -53116 -93044 -93824 -25708 -25715 -25348 -25375 -25331 -27401 -26615 -26415 -26645 -46402 -27502 -26533 -26497 -25316 -25624 -26418 -25361 -26410 -26445 -26633 -26646 -26643 -26649 -25704 -25513 -25501 -26486 -25616 -26641 -96401 -26401 -26422 -25623 -26407 -25357 -26650 -26650 -26523 -26554 -26411 -26498 -25369 -26555 -26551 -26519 -26501 -26501 -26509 -26465 -26513 -26425 -26628 -26703 -25310 -25322 -25335 -26521 -26548 -26548 -26552 -25323 -25507 -26651 -26522 -26499 -25376 -25521 -25404 -25519 -25506 -26535 -46403 -25518 -25309 -96402 -25503 -26602 -25503 -26542 -25325 -27408 -26502 -25367 -26652 -26491 -26546 -27517 -26480 -26631 -26557 -26510 -25402 -25403 -26512 -26524 -25377 -26621 -26635 -26409 -46404 -26622 -26451 -26516 -26435 -25308 -26644 -25378 -26617 -26653 -26412 -26616 -25628 -26639 -24690 -25329 -25508 -25604 -25613 -26623 -27515 -46405 -26654 -26561 -26704 -27406 -25617 -26625 -25333 -26558 -26553 -26647 -25713 -25516 -26439 -25341 -26414 -26534 -26514 -26559 -45715 -26529 -26634 -25629 -26528 -26536 -26492 -26492 -27518 -46406 -26508 -26627 -26560 -26442 -26479 -26642 -26442 -26438 -25338 -27503 -27503 -26630 -26648 -26444 -26443 -26445 -26484 -25339 -46407 -26413 -22501 -22508 -22552 -22551 -22548 -22550 -21510 -22536 -22534 -22547 -22539 -22514 -22519 -22521 -22517 -22524 -22516 -22526 -21508 -21504 -22513 -27403 -26624 -26638 -11640 -11624 -11603 -11653 -11630 -11630 -11641 -11655 -11649 diff --git a/munging/airports/wbans.pig b/munging/airports/wbans.pig deleted file mode 100644 index 7d1385a..0000000 --- a/munging/airports/wbans.pig +++ /dev/null @@ -1,19 +0,0 @@ -// Outputs a list of WBAN IDs assigned to airports - -mshr = LOAD '/Users/dlaw/Desktop/stations/mshr_enhanced.tsv' AS - (source_id:chararray, source:chararray, begin_date:chararray, end_date:chararray, station_status:chararray, - ncdcstn_id:chararray, icao_id:chararray, wban_id:chararray, faa_id:chararray, nwsli_id:chararray, wmo_id:chararray, - coop_id:chararray, transmittal_id:chararray, ghcnd_id:chararray, name_principal:chararray, name_principal_short:chararray, - name_coop:chararray, name_coop_short:chararray, name_publication:chararray, name_alias:chararray, nws_clim_div:chararray, - nws_clim_div_name:chararray, state_prov:chararray, county:chararray, nws_st_code:chararray, fips_country_code:chararray, - fips_country_name:chararray, nws_region:chararray, nws_wfo:chararray, elev_ground:chararray, elev_ground_unit:chararray, - elev_barom:chararray, elev_barom_unit:chararray, elev_air:chararray, elev_air_unit:chararray, elev_zerodat:chararray, - elev_zerodat_unit:chararray, elev_unk:chararray, elev_unk_unit:chararray, lat_dec:chararray, lon_dec:chararray, - lat_lon_precision:chararray, relocation:chararray, utc_offset:chararray, obs_env:chararray, platform:chararray); - -mshr_grouped = GROUP mshr BY (icao_id, wban_id, faa_id); -mshr_flattened = FOREACH mshr_grouped GENERATE FLATTEN(group) AS (wban_id, icao_id, faa_id); -mshr_filtered = FILTER mshr_flattened BY (icao_id is not null and wban_id is not null); - -mshr_final = FOREACH mshr_filtered GENERATE wban_id; -STORE mshr_final INTO '/Users/dlaw/Desktop/stations/wbans'; diff --git a/munging/airports/wbans.txt b/munging/airports/wbans.txt deleted file mode 100644 index 1d82d7d..0000000 --- a/munging/airports/wbans.txt +++ /dev/null @@ -1,2310 +0,0 @@ -16201 -24295 -94153 -25212 -14633 -94234 -25112 -94795 -94241 -25146 -25166 -24290 -94110 -14631 -25149 -25113 -24299 -25305 -25238 -25248 -25108 -25142 -24293 -25141 -14648 -25131 -25232 -24294 -14673 -25116 -25155 -25220 -25125 -94151 -25222 -25105 -25241 -25129 -25151 -04706 -25013 -24298 -94243 -25101 -25345 -25251 -25143 -25148 -25252 -25119 -94810 -94108 -14625 -24292 -25115 -14509 -14646 -25224 -15908 -25121 -25120 -25150 -25122 -04715 -94792 -25123 -24287 -14996 -25247 -25111 -25118 -25231 -25206 -25229 -24288 -25110 -25225 -25218 -94116 -24297 -94111 -94791 -25153 -25253 -25346 -25223 -25124 -15613 -14636 -25255 -94646 -94149 -25165 -15044 -50101 -70701 -03069 -94119 -04204 -94991 -53928 -54779 -03196 -63844 -63875 -53929 -64773 -04201 -94076 -04849 -63834 -94037 -63839 -63876 -63877 -03051 -63878 -12981 -12971 -53866 -53904 -12978 -53862 -94061 -63848 -63879 -63847 -53990 -53966 -53972 -03053 -94298 -03032 -63827 -92807 -92807 -03064 -93226 -63880 -94993 -53964 -94623 -94070 -94943 -04862 -12832 -03974 -94299 -14737 -13962 -23050 -14929 -14929 -03019 -13869 -04863 -14756 -53991 -54916 -13959 -24283 -93730 -94997 -04847 -53930 -93940 -03970 -03043 -94998 -13705 -03034 -03064 -94968 -93915 -93065 -04827 -04993 -54770 -53909 -14762 -12804 -03820 -13873 -24044 -04828 -04864 -14930 -94999 -53931 -63853 -53175 -53870 -24015 -93773 -93773 -14813 -14735 -12932 -53146 -93097 -03958 -94910 -23061 -24160 -63833 -23047 -23047 -13870 -53989 -54816 -94989 -12899 -13871 -93846 -94974 -14847 -94975 -04850 -14736 -93067 -93227 -12897 -13701 -94849 -04837 -04837 -53983 -54917 -53932 -03949 -54768 -53915 -94889 -93991 -12953 -04808 -94790 -04865 -94987 -53865 -93073 -03973 -54754 -93796 -94224 -54809 -94929 -13874 -03035 -13958 -04825 -94287 -14946 -14605 -04901 -04902 -23224 -03892 -13904 -13958 -14897 -93797 -53933 -03812 -14777 -23191 -04903 -94190 -53959 -04205 -04904 -14910 -63901 -04779 -13861 -53129 -94815 -54772 -93216 -13944 -14775 -13803 -24119 -24119 -54817 -12971 -54904 -93942 -94946 -93240 -53881 -23159 -12803 -94961 -14816 -03036 -14740 -94702 -14702 -94871 -04751 -24028 -24234 -23155 -13838 -04905 -03024 -53882 -04725 -04725 -14606 -23005 -14616 -94055 -13876 -94289 -94793 -94947 -23157 -63835 -24033 -24011 -24011 -04839 -13820 -03065 -93068 -14958 -04842 -53145 -53988 -24130 -23036 -04853 -12982 -13726 -12818 -03872 -94902 -03859 -23158 -24217 -54760 -23225 -13802 -94046 -24180 -03893 -54831 -94700 -03999 -53823 -13897 -25312 -94185 -04906 -24131 -24267 -14739 -12809 -54765 -03044 -24164 -53918 -12917 -53883 -53992 -94938 -14931 -12919 -12919 -13904 -14815 -24135 -64705 -13970 -14742 -14964 -14733 -14733 -23156 -23152 -04866 -93783 -04813 -54921 -03959 -94282 -53934 -54733 -93943 -93808 -93721 -54922 -94054 -13814 -24133 -03182 -92804 -12981 -24132 -04867 -04839 -04868 -14817 -13883 -13883 -24046 -14895 -23051 -14607 -04907 -03732 -04908 -04909 -03037 -13825 -94625 -04101 -53884 -23254 -03038 -54764 -14966 -93129 -94977 -53916 -24017 -23007 -54743 -24286 -14703 -53850 -13884 -93069 -54901 -54828 -93967 -04805 -03935 -94866 -03914 -13882 -53128 -53935 -93736 -13880 -93203 -14990 -04910 -93809 -04869 -93798 -03802 -04911 -03881 -04912 -54920 -03894 -14820 -14820 -54832 -13999 -03904 -94266 -13881 -53845 -23136 -14821 -94870 -14858 -94940 -54923 -04913 -13984 -23086 -93033 -03179 -13981 -93075 -24045 -24136 -12867 -14745 -04914 -93037 -12947 -03945 -13983 -93799 -03701 -24089 -03960 -53981 -03039 -53936 -03027 -54905 -93134 -94199 -94624 -93718 -53860 -12924 -12924 -03177 -53912 -13866 -12879 -93842 -03932 -04915 -03847 -24137 -12833 -03727 -53867 -53993 -63840 -94032 -93814 -23077 -24202 -23008 -04870 -94890 -13941 -94979 -53902 -54774 -14751 -24018 -24018 -94204 -03164 -94056 -93728 -12834 -23161 -13960 -13728 -54791 -93235 -93815 -63881 -94908 -13743 -53852 -13985 -13985 -54781 -03887 -04916 -03017 -23062 -53925 -14822 -94119 -25515 -04851 -03927 -22015 -94057 -13839 -93843 -93042 -04135 -24012 -93771 -04871 -14747 -03073 -22001 -14913 -14913 -54833 -24138 -24219 -23109 -93784 -23078 -03994 -03724 -94984 -63903 -13837 -53885 -04917 -94891 -13707 -93770 -94878 -94892 -24103 -03702 -63842 -03160 -03976 -54844 -93005 -22010 -14933 -94704 -23162 -94962 -53905 -03991 -53853 -94847 -04830 -53937 -53938 -93026 -04872 -04787 -03070 -94928 -94982 -94982 -54924 -03184 -53910 -54734 -04918 -54786 -03809 -13910 -04919 -03987 -53965 -03049 -14905 -94239 -14991 -03983 -03983 -12983 -04920 -13786 -13787 -73805 -23098 -03703 -03197 -23114 -23179 -94721 -94050 -53864 -12906 -54758 -04873 -23063 -03844 -54838 -93076 -63843 -53886 -24213 -04806 -13729 -24121 -63882 -93992 -03977 -14748 -24220 -94964 -23044 -23154 -54757 -24165 -13989 -03165 -03704 -53112 -13909 -53887 -24193 -04845 -54766 -53876 -04921 -53851 -24141 -14608 -03020 -53872 -14860 -25318 -12961 -04874 -14824 -94853 -13935 -03756 -94971 -53110 -04875 -04922 -63872 -24221 -94195 -92808 -04923 -93817 -04111 -53825 -94726 -53939 -93719 -14734 -12971 -53888 -53842 -03705 -12836 -53114 -03706 -53969 -53907 -93735 -93996 -14914 -93193 -93740 -93737 -94969 -24146 -23167 -24047 -94963 -94015 -53831 -13730 -03981 -03981 -03022 -14825 -04876 -04924 -94006 -03736 -53819 -53819 -04925 -94966 -13840 -53841 -03185 -04926 -94276 -03124 -54792 -04780 -04927 -94868 -13763 -54818 -04840 -03103 -12849 -13744 -03918 -13921 -93733 -14704 -23090 -12835 -94957 -94035 -94062 -14826 -53889 -23256 -94933 -13920 -14719 -04928 -04877 -12895 -53890 -54787 -13947 -94948 -14944 -14944 -04929 -13945 -13964 -23091 -04930 -53113 -03018 -13807 -13961 -03888 -03166 -04836 -03707 -14827 -53891 -54793 -03985 -03985 -12885 -03734 -93993 -54773 -03896 -13975 -93764 -12993 -13940 -94959 -03148 -23168 -94023 -23064 -53940 -03195 -04999 -53977 -23055 -24087 -13764 -24157 -53126 -53892 -24048 -53866 -24112 -14916 -14750 -03901 -14976 -94008 -94008 -64774 -04931 -12876 -23066 -03025 -04843 -53907 -23065 -23065 -93929 -13939 -04854 -12923 -53941 -13886 -94992 -03056 -24142 -12990 -94626 -93057 -12816 -53913 -14707 -04878 -24146 -53984 -93874 -94919 -14898 -14898 -53874 -24201 -14935 -03902 -03992 -94860 -94860 -13713 -14829 -13723 -03870 -03870 -14715 -24143 -53893 -53942 -93007 -23081 -94833 -03030 -53838 -13926 -03929 -13978 -03708 -53837 -24051 -53979 -63889 -53967 -04932 -03186 -54762 -04807 -53820 -54850 -53855 -13833 -03709 -93986 -12994 -04933 -04934 -03908 -04935 -94025 -12962 -03023 -03710 -53894 -94038 -63873 -03961 -14752 -93747 -93706 -93218 -03980 -93046 -03167 -94931 -54728 -24101 -94261 -04998 -53119 -53869 -13927 -03810 -93990 -14894 -24144 -03933 -53895 -23002 -94187 -53896 -53127 -04936 -53111 -03711 -93034 -14936 -13806 -03962 -12918 -94745 -94745 -53839 -94225 -63836 -53970 -04113 -03712 -12904 -13971 -03852 -63852 -53897 -93729 -94949 -93757 -12826 -03856 -94814 -64761 -03860 -53857 -63883 -03868 -93823 -14609 -12927 -26522 -13986 -23170 -14758 -94012 -93228 -92809 -54790 -63837 -94720 -12979 -94973 -03968 -04829 -04997 -04857 -53842 -63884 -03923 -93738 -04724 -12960 -94073 -04937 -94990 -03928 -03928 -53118 -24145 -64706 -94039 -53943 -53135 -93167 -04879 -93785 -04989 -53898 -54767 -53944 -04880 -25626 -04938 -03972 -13781 -13748 -13841 -13841 -04833 -04833 -24091 -94893 -93819 -93819 -53972 -23040 -14918 -23141 -93807 -23194 -03984 -14937 -53899 -03144 -14778 -14938 -54827 -92813 -94014 -93726 -04781 -04881 -04826 -94761 -03026 -23104 -23104 -94926 -94623 -12969 -04844 -93909 -93194 -93244 -54772 -24166 -03940 -53945 -13889 -13889 -03953 -13973 -53978 -94051 -03963 -04110 -94789 -03730 -53824 -04720 -63885 -04939 -03889 -03889 -13987 -04940 -14919 -03713 -14834 -63801 -53971 -04726 -53946 -94854 -53947 -53966 -53982 -14833 -54906 -63846 -04882 -03714 -14989 -25314 -24223 -53995 -03013 -14835 -12883 -93091 -14836 -24022 -23169 -03950 -23174 -23042 -23042 -54735 -24023 -23020 -53963 -13776 -12976 -54925 -03937 -03937 -54736 -13812 -04883 -94765 -12819 -94709 -93820 -13702 -93987 -13976 -14732 -23129 -03821 -24148 -94128 -53973 -53844 -03875 -23067 -03731 -93010 -13963 -53813 -53813 -14623 -04941 -03715 -94285 -04114 -53919 -94236 -53975 -24021 -14939 -14886 -04809 -63802 -14921 -54737 -94049 -24172 -64752 -53861 -04831 -04831 -13810 -03849 -23243 -04849 -12907 -03930 -04942 -93041 -04108 -14920 -13829 -23112 -03982 -03982 -13902 -93774 -53964 -23111 -93812 -04995 -12975 -23285 -24150 -04943 -23054 -53827 -54826 -53801 -03997 -94991 -94723 -24149 -24036 -13809 -93767 -04944 -14994 -53879 -93009 -13733 -54926 -03952 -03952 -63803 -63851 -93242 -23023 -23023 -03818 -03817 -94296 -94052 -94894 -14845 -94048 -93919 -23208 -54820 -23257 -12810 -03947 -94040 -03813 -12815 -14940 -03071 -93810 -63804 -14711 -14819 -54911 -93782 -24152 -13865 -13893 -23203 -53996 -14891 -12959 -94985 -24225 -03716 -13864 -04945 -04789 -13895 -04884 -53817 -13736 -53859 -94950 -03936 -23206 -24215 -14710 -03183 -93768 -12839 -94011 -94960 -94895 -13735 -94988 -04946 -13988 -14839 -14840 -63805 -03811 -53997 -93953 -14954 -04834 -12838 -12838 -93950 -24151 -04992 -23176 -14923 -63886 -24154 -24037 -14610 -13942 -03181 -54788 -94976 -94887 -03858 -54738 -94273 -03040 -94896 -04855 -13894 -13894 -23258 -04885 -24013 -26621 -04948 -54789 -94705 -94983 -04947 -04949 -14753 -13766 -94850 -94850 -13827 -13734 -93035 -93765 -54907 -63859 -53832 -23259 -13896 -14837 -24153 -14922 -94725 -54746 -04103 -12916 -14804 -12896 -93013 -93744 -53802 -54780 -63806 -94897 -04104 -54723 -24106 -04950 -93236 -04951 -54771 -93894 -94986 -94724 -03865 -94869 -24110 -63807 -93985 -04952 -14755 -63887 -93136 -53921 -13821 -04953 -53826 -03131 -94182 -03041 -13717 -93205 -04954 -94852 -93831 -93901 -12958 -14855 -93727 -53847 -14780 -63890 -53917 -13762 -03154 -53848 -93102 -13911 -12926 -13750 -12946 -23239 -13721 -14611 -93104 -93837 -12925 -93776 -23199 -93743 -53849 -13754 -03190 -53115 -93107 -23110 -03866 -12974 -24228 -03855 -93839 -93839 -12928 -12850 -94297 -03853 -23240 -93115 -93841 -93116 -93111 -93114 -13769 -93117 -03757 -23244 -24255 -12973 -93121 -14793 -94728 -13773 -03145 -93832 -93101 -14790 -93112 -94299 -93241 -93753 -23230 -93217 -94980 -12861 -53948 -03741 -03031 -94958 -04886 -54912 -14949 -14941 -93775 -94063 -53854 -24126 -26632 -53833 -03967 -53121 -13967 -54807 -53949 -03717 -94703 -14622 -94017 -24227 -03196 -94951 -63808 -53822 -04955 -14942 -03718 -94197 -92822 -04956 -04957 -93040 -24162 -24285 -03102 -03737 -54819 -54919 -12882 -63809 -53868 -54752 -04958 -04959 -94846 -54756 -13737 -53998 -94746 -12841 -04224 -53962 -14808 -94855 -04804 -94927 -24284 -14950 -04106 -03948 -93210 -04960 -94994 -04961 -53803 -54704 -93786 -64707 -93110 -04962 -03850 -04887 -03957 -03163 -54813 -94898 -94899 -94173 -03170 -04109 -04856 -12968 -24222 -03816 -03816 -13846 -23289 -27506 -93988 -04742 -64776 -54913 -12844 -04832 -92824 -54852 -04963 -53863 -24155 -24155 -24229 -04964 -54778 -03058 -54927 -03882 -03162 -12812 -13783 -04852 -93741 -13739 -04888 -24024 -23183 -14842 -53808 -12873 -24156 -12957 -24025 -94823 -03804 -94967 -14962 -12986 -94733 -14841 -23182 -25625 -92805 -94086 -13969 -94732 -54914 -13899 -04889 -13923 -13714 -03180 -03931 -03993 -03975 -14757 -03052 -03998 -53950 -14604 -53858 -04965 -93209 -23184 -63810 -63874 -25330 -93955 -14761 -24163 -14763 -13868 -04743 -93983 -93138 -12935 -93714 -26526 -94817 -12936 -93978 -23149 -54782 -93058 -93141 -94129 -03021 -64708 -14765 -53951 -24174 -23097 -03954 -54915 -53952 -04838 -14764 -94263 -54769 -03045 -54825 -04134 -32416 -31502 -33209 -33602 -94818 -03171 -14806 -24090 -12995 -93772 -03971 -24231 -24216 -12984 -24006 -54928 -03738 -24257 -14712 -14767 -04966 -24230 -94925 -13722 -23021 -94022 -94107 -94822 -04967 -53104 -04803 -63811 -93232 -13740 -03846 -03016 -23069 -23119 -24061 -24061 -94601 -12972 -53953 -24027 -53834 -14717 -64775 -93801 -03735 -54821 -12911 -04968 -23043 -53120 -23185 -54824 -94248 -13741 -14768 -53954 -04969 -23009 -04970 -54909 -53965 -04890 -14864 -03029 -53999 -92802 -04891 -04971 -93997 -53958 -14925 -63841 -12894 -23052 -94288 -53920 -63812 -94737 -53908 -14992 -93759 -24057 -23901 -94194 -54834 -63813 -04972 -93781 -04113 -23232 -93084 -23049 -23188 -63814 -12921 -03822 -94836 -54910 -23190 -23122 -04841 -14848 -93206 -94033 -93720 -04741 -23237 -04973 -23187 -93821 -03192 -03178 -94028 -24233 -03172 -53143 -14770 -03969 -53904 -94290 -12854 -94990 -94176 -64709 -23234 -03719 -04107 -04996 -64710 -13995 -13995 -63888 -92814 -12977 -04974 -13925 -23186 -93760 -04975 -94227 -24029 -13957 -13957 -03763 -24259 -23293 -23293 -93027 -23034 -23034 -04892 -24114 -12909 -03012 -14846 -04976 -24127 -24232 -53955 -04893 -53141 -94740 -03919 -93998 -03879 -93937 -63815 -93225 -24196 -93197 -24237 -54785 -23273 -93184 -03050 -93911 -23233 -04112 -24030 -03059 -03720 -03101 -53117 -93804 -04201 -03028 -92806 -93822 -13966 -14972 -04894 -93231 -53985 -23902 -12871 -93083 -53906 -13849 -12970 -13878 -14847 -14926 -04895 -13993 -13994 -14927 -23213 -23274 -92815 -04824 -94161 -03966 -03721 -23202 -54908 -14943 -93063 -24258 -63816 -13824 -14714 -03965 -23033 -24235 -54930 -14771 -13930 -04129 -12975 -12957 -03044 -12984 -12980 -23070 -54777 -63817 -03938 -63818 -92801 -23048 -93806 -24207 -93045 -03722 -24241 -04848 -94741 -54822 -03011 -04102 -93778 -94087 -13919 -04896 -94274 -12898 -54931 -53914 -54953 -93805 -93805 -03979 -12888 -63819 -04977 -03191 -03122 -04978 -03878 -94830 -13996 -94053 -12842 -92825 -23153 -93984 -94978 -53956 -13877 -93201 -53911 -03104 -03723 -24242 -54823 -14792 -12886 -13968 -73804 -93862 -23160 -14850 -94956 -93230 -03996 -94178 -04979 -03986 -13977 -13972 -13891 -54776 -63825 -94194 -04114 -04116 -04110 -94184 -94281 -94794 -63820 -03742 -94047 -04897 -94981 -14880 -94240 -93989 -63821 -23275 -64753 -04980 -03946 -53901 -04898 -54739 -53806 -23903 -53903 -14787 -12985 -53871 -13857 -53856 -93780 -93214 -03978 -93234 -93241 -12912 -23131 -92816 -63822 -94030 -53123 -13997 -93144 -53818 -93999 -03838 -93845 -54829 -03995 -23130 -94930 -53873 -13858 -04846 -63823 -12843 -54740 -04858 -04981 -24032 -03042 -53101 -63824 -94298 -92817 -93795 -04982 -04983 -63845 -04899 -63832 -04221 -93775 -03725 -26618 -93739 -53986 -04820 -03832 -24198 -53130 -03159 -13932 -24128 -94923 -13860 -14706 -24062 -94613 -14794 -23277 -14615 -03726 -53957 -25267 -94163 -92802 -53976 -12868 -53922 -04984 -14853 -24243 -94911 -14852 -23195 -53116 -93044 -93824 -13025 -33126 -34108 -34113 -13201 -11715 -11706 -11813 -12717 -61705 -87601 -32401 -33311 -25708 -25715 -25348 -25375 -25331 -27401 -26615 -26415 -26645 -46402 -27502 -26533 -26497 -25316 -25624 -26418 -25361 -26410 -26445 -26633 -26646 -26643 -26649 -25704 -25513 -25501 -26486 -25616 -26641 -96401 -26401 -26422 -25623 -26407 -25357 -26650 -26650 -26523 -26554 -26411 -26498 -25369 -26555 -26551 -26519 -26501 -26501 -26509 -26465 -26513 -26425 -26628 -26703 -25310 -25322 -25335 -26521 -26548 -26548 -26552 -25323 -25507 -26651 -26522 -26499 -25376 -25521 -25404 -25519 -25506 -26535 -46403 -25518 -25309 -96402 -25503 -26602 -25503 -26542 -25325 -27408 -26502 -25367 -26652 -26491 -26546 -27517 -26480 -26631 -26557 -26510 -25402 -25403 -26512 -26524 -25377 -26621 -26635 -26409 -46404 -26622 -26451 -26516 -26435 -25308 -26644 -25378 -26617 -26653 -26412 -26616 -25628 -26639 -24690 -25329 -25508 -25604 -25613 -26623 -27515 -46405 -26654 -26561 -26704 -27406 -25617 -26625 -25333 -26558 -26553 -26647 -25713 -25516 -26439 -25341 -26414 -26534 -26514 -26559 -45715 -26529 -26634 -25629 -26528 -26536 -26492 -26492 -27518 -46406 -26508 -26627 -26560 -26442 -26479 -26642 -26442 -26438 -25338 -27503 -27503 -26630 -26648 -26444 -26443 -26445 -26484 -25339 -46407 -26413 -41419 -41418 -41414 -41417 -41406 -41415 -41420 -22501 -22508 -22552 -22551 -22548 -22550 -21510 -22536 -22534 -22547 -22539 -22514 -22519 -22521 -22517 -22524 -22516 -22526 -21508 -21504 -22513 -21603 -40710 -40604 -22701 -27403 -26624 -26638 -40505 -40309 -40310 -40504 -40308 -41606 -42218 -43285 -43324 -44402 -43319 -43323 -43313 -43219 -43216 -42204 -42219 -42215 -41231 -54702 -11814 -51601 -51701 -11640 -11624 -11603 -11653 -11630 -11630 -11641 -11655 -11649 -11634 -13601 diff --git a/munging/geo/geo_json.rb b/munging/geo/geo_json.rb deleted file mode 100644 index 0fcc957..0000000 --- a/munging/geo/geo_json.rb +++ /dev/null @@ -1,54 +0,0 @@ -# -*- coding: utf-8 -*- -module Wukong - module Data - class GeoJson ; include Gorillib::Model ; end - class GeoJson::Geometry ; include Gorillib::Model ; end - - class GeoJson - include Gorillib::Model::LoadFromJson - include Gorillib::Model::Indexable - field :type, String - field :id, String - field :geometry, GeoJson::Geometry - field :properties, GenericModel - - def self.load(*args) - load_json(*args) do |val| - p val.properties - p val.properties.to_place - end - end - - end - - class GeoJson::Geometry - field :type, String - field :coordinates, Array - - def point? - type == 'Point' - end - - def longitude - return nil if coordinates.blank? - raise "Longitude only available for Point objects" unless point? - coordinates[0] - end - def latitude - return nil if coordinates.blank? - raise "Latitude only available for Point objects" unless point? - coordinates[1] - end - end - - class GeonamesGeoJson < GeoJson - def receive_properties(hsh) - if hsh.respond_to?(:merge) - super(hsh.merge(geo_json_id: id, longitude: geometry.longitude, latitude: geometry.latitude)) - else - super - end - end - end - end -end diff --git a/munging/geo/geo_models.rb b/munging/geo/geo_models.rb deleted file mode 100644 index 4035cdc..0000000 --- a/munging/geo/geo_models.rb +++ /dev/null @@ -1,69 +0,0 @@ -module Geo - - class Place - include Gorillib::Model - include Gorillib::Model::Indexable - - field :geonames_id, String - field :country_id, String, doc: "ISO 3166 2-letter alphanumeric id ('us', 'mx', etc). Must be lowercase" - field :admin1_id, String - field :feature_cat, String - field :feature_subcat, String - # - field :name, String - # - field :timezone, String - field :elevation, Float - field :longitude, Float - field :latitude, Float - # - field :alternate_names, String, default: "" - - def names - ([name] + alternate_names.split("|")).compact_blank - end - - def coordinates - { longitude: longitude, latitude: latitude, elevation: elevation }.compact - end - - def self.slugify_name(val) - val.downcase. - gsub(/(?:\s+and\s+|\s+-\s+|[^[:alpha:]\-]+)/, '-'). - gsub(/\A-*(.+?)-*\z/, '\1') - end - end - - class AdministrativeArea < Place - field :population, Integer - field :official_name, String - def names ; super.tap{|arr| arr.insert(1, official_name) }.uniq.compact_blank ; end - end - - class Country < AdministrativeArea - field :country_al3id, String, identifier: true, doc: "ISO 3166 3-letter alphanumeric id ('usa', 'mex', etc). Must be lowercase." - field :country_numid, Integer, identifier: true, doc: "ISO 3166 numeric identifier ('usa' = 840)" - field :tld_id, String, doc: "TLD (top-level domain) identifier" - end - - class CountryNameLookup - include Gorillib::Model - include Gorillib::Model::Indexable - include Gorillib::Model::LoadFromTsv - index_on :slug - - field :country_id, String - field :country_al3id, String - field :country_numid, Integer - field :tld_id, String - field :geonames_id, String - field :name, String - field :slug, String - field :alt_name, String - - def self.load(filename=nil) - filename ||= :country_name_lookup - @values = load_tsv(filename) - end - end -end diff --git a/munging/geo/geonames_models.rb b/munging/geo/geonames_models.rb deleted file mode 100644 index faacb3a..0000000 --- a/munging/geo/geonames_models.rb +++ /dev/null @@ -1,78 +0,0 @@ -# -*- coding: utf-8 -*- -# {"type":"Feature", -# "id":"3cc54602f2d69c1111dc35f0aaa92240", -# "geometry":{"type":"Point","coordinates":[42.5,11.5]}, -# "properties":{ -# "geonameid":"223816","country_code":"DJ","admin1_code":"00", -# "feature_code":"PCLI","feature_class":"A", -# "asciiname":"Republic of Djibouti","name":"Republic of Djibouti","alternatenames":"Cîbûtî,...", -# "modification_date":"2011-07-09", -# "timezone":"Africa/Djibouti","gtopo30":"668","population":"740528"}} - - -# {"type":"Feature","id":"5b66ac7270763facfe1e9ab9c1bf99f8", -# "geometry":{"type":"Point","coordinates":[-98.5,39.76]}, -# "properties":{ -# "modification_date":"2011-04-27","_type":"geo/geonames_country", -# "asciiname":"United States","name":"United States","gtopo30":"537","geonameid":"6252001", -# "feature_code":"PCLI","country_code":"US","feature_class":"A", -# "alternatenames":"...","admin1_code":"00","population":"310232863"}} - -module Geo - - class GeonamesPlace - include Gorillib::Model - class_attribute :place_klass ; self.place_klass = ::Geo::Place - - field :name, String - field :asciiname, String - field :geonameid, String - field :country_code, String - field :admin1_code, String, blankish: [0, "0", "00", nil, ""] - field :feature_code, String - field :feature_class, String - # - field :modification_date, String - field :timezone, String - # - field :gtopo30, Float, blankish: ["-9999", -9999, nil, ""], doc: "Elevation in the [GTOPO30](http://en.wikipedia.org/wiki/GTOPO30) model" - field :longitude, Float - field :latitude, Float - # - field :population, Integer, blankish: [0, "0", nil, ""] - field :alternatenames, String - - # because 'Saint Helena, Ascension and Tristan da Cunha' is an official - # country name (and others like it - def alternate_names_with_pipes - # comma ',' with no spaces separates names; comma space ', ' is internal. - an = alternatenames.gsub(/,/, '|').gsub(/\| /, ', ') - ([name, asciiname] + an.split('|')).uniq.join("|") - end - - def to_place - attrs = { - name: asciiname, - official_name: name, - geonames_id: "gn:#{geonameid}", - country_id: country_code.downcase, - admin1_id: admin1_code, - feature_cat: feature_class, - feature_subcat: feature_code, - alternate_names: alternate_names_with_pipes, - updated_at: modification_date, - timezone: timezone, - elevation: gtopo30, - longitude: longitude, - latitude: latitude, - population: population, - } - place_klass.receive(attrs) - end - end - - # Stub class: Geonames JSON elements have :_type = geo/geonames_country - class GeonamesCountry < GeonamesPlace - self.place_klass = Geo::Country - end -end diff --git a/munging/geo/iso_codes.rb b/munging/geo/iso_codes.rb deleted file mode 100644 index 4ef446c..0000000 --- a/munging/geo/iso_codes.rb +++ /dev/null @@ -1,172 +0,0 @@ -require 'active_support/lazy_load_hooks' -require 'active_support/i18n' -require 'active_support/inflector/transliterate' - -module Wukong - - module Data - - # These classes use data from the - # [isocodes](http://pkg-isocodes.alioth.debian.org/) debian project. That - # package provides lists of various ISO standards (e.g. country, language, - # language scripts, and currency names) in one place, rather than repeated in - # many programs throughout the system. - # - class IsoCode - include Gorillib::Model - include Gorillib::Model::LoadFromTsv - include Gorillib::Model::Indexable - - class_attribute :handle, instance_writer: false - def self.load(filename=nil) - filename ||= [:geo_data, 'iso_codes', "iso_3166.tsv"] - @values = load_tsv(filename, num_fields: 4..6) - end - end - - # - # ISO 3166 Country code - # - # Lists the 2-letter country code and "short" country name. The official ISO - # 3166 maintenance agency is ISO. The gettext domain is - # "iso_3166". [origin](http://www.iso.org/iso/country_codes) - # - class CountryCode < IsoCode - include ActiveSupport::Inflector - - self.handle = :iso_3166 - index_on :alpha_2_code, :alpha_3_code, :country_numid, :name, :common_name, :official_name - field :alpha_2_code, String, identifier: true - field :alpha_3_code, String, identifier: true - field :country_numid, Integer, identifier: true - field :name, String - field :official_name, String, blankish: ["", nil] - field :common_name, String, blankish: ["", nil] - - def names - [common_name, name, official_name].compact_blank - end - def self.for_any_name(val) - for_name(val){ for_common_name(val){ for_official_name(val) } } - end - - def to_place - attrs = { - name: transliterate(names.first), - official_name: names.last, - country_id: alpha_2_code.downcase, - alternate_names: names.join('|'), - country_al3id: alpha_3_code.downcase, - country_numid: country_numid, - } - Geo::Country.receive(attrs.compact_blank) - end - end - - class CountryCode < IsoCode - self.handle = :iso_3166_3 - field :alpha_3_code, String, identifier: true - field :alpha_4_code, String, identifier: true - field :country_numid, Integer, identifier: true - field :country_names, String - field :comment, String - field :date_withdrawn, String - end - - # - # ISO 3166-2 Country Subdivision (Admin 1: state, region, etc) Code - # - # The ISO 3166 standard includes a "Country Subdivision Code", giving a code - # for the names of the principal administrative subdivisions of the - # countries coded in ISO 3166. The official ISO 3166-2 maintenance agency is - # ISO. The gettext domain is "iso_3166_2". - # - # - class RegionCode < IsoCode - self.handle = :iso_3166_2 - field :region_code, String, identifier: true - field :country_code, String - field :parent_region, String - field :region_kind, String - field :name, String - alias_method :state_code, :region_code - end - - # - # ISO 639 Language Code - # - # This lists the 2-letter and 3-letter language codes and language - # names. The official ISO 639 maintenance agency is the Library of - # Congress. The gettext domain is "iso_639". - # [origin](http://www.loc.gov/standards/iso639-2/) - # - class BasicLanguageCode < IsoCode - self.handle = :iso_639 - field :iso_639_1_code, String, identifier: true - field :iso_639_2B_code, String, identifier: true - field :iso_639_2T_code, String, identifier: true - field :name, String, identifier: true - end - - # ISO 639-3 - # - # This is a further development of ISO 639-2, see above. All codes of ISO - # 639-2 are included in ISO 639-3. ISO 639-3 attempts to provide as complete - # an enumeration of languages as possible, including living, extinct, - # ancient, and constructed languages, whether major or minor, written or - # unwritten. The gettext domain is "iso_639_3". The official ISO 639-3 - # maintenance agency is SIL International. - # [origin](http://www.sil.org/iso639-3/) - # - class LanguageCode < BasicLanguageCode - self.handle = :iso_639_3 - field :language_id, String, identifier: true - field :part1_code, String - field :part2_code, String - field :scope, String - field :status, String - field :language_kind, String - field :name, String - field :inverted_name, String - field :reference_name, String - end - - # - # ISO 15924 Language Scripts (alphabet) names - # - # This lists the language scripts names. The official ISO 15924 maintenance - # agency is the Unicode Consortium. The gettext domain is "iso_15924". - # [origin](http://unicode.org/iso15924/) - # - class LanguageScriptCode < IsoCode - self.handle = :iso_15924 - field :alpha_4_code, String, identifier: true - field :script_numid, Integer, identifier: true - field :name, String - end - - # - # ISO 4217 Currency Code - # - # This lists the currency codes and names. The official ISO 4217 maintenance - # agency is the British Standards Institution. The gettext domain is - # "iso_4217". - # [origin](http://www.bsi-global.com/en/Standards-and-Publications/Industry-Sectors/Services/BSI-Currency-Code-Service/) - # - class CurrencyCode < IsoCode - self.handle = :iso_4217 - field :currency_code, String, identifier: true - field :currency_numid, Integer, identifier: true - field :name, String - end - - # - # Historic Currency Code - # - class HistoricCurrencyCode < CurrencyCode - self.handle = :historic_iso_4217 - field :date_withdrawn, String - end - - end -end diff --git a/munging/geo/reconcile_countries.rb b/munging/geo/reconcile_countries.rb deleted file mode 100644 index c6085a6..0000000 --- a/munging/geo/reconcile_countries.rb +++ /dev/null @@ -1,124 +0,0 @@ -require 'gorillib/model/reconcilable' -# require_relative('./geo_models') -# require_relative('./geo_json') - -module Geo - - Place.class_eval do - include Gorillib::Model::Reconcilable - - def adopt_alternate_names(that_val, _) - return true if that_val.blank? - names = "#{alternate_names}|#{that_val}".split("|") - names.uniq! - names.delete(name) - write_attribute :alternate_names, names.compact_blank.join("|") - true - end - - def conflicting_attribute!(attr, this_val, that_val) - case attr - when :name, :official_name then return :pass - end - super - end - - end - - Country.class_eval do - index_on :country_id - field :iso_3166_active, :boolean - end - - - class FullIso3166 - include Gorillib::Model - include Gorillib::Model::Reconcilable - include Gorillib::Model::LoadFromTsv - self.tsv_options = self.tsv_options.merge(num_fields: 6..8, pop_headers: true) - - field :country_id, String - field :tld_id, String - field :iso_3166_3, String - field :name, String - field :code_status, String - field :iso_3166_active, :boolean, blankish: ['N', false, nil, ''] - field :year_granted, String - field :notes, String - - def active? - iso_3166_active == "Y" - end - - def to_place - Geo::Country.receive({ - country_id: country_id, - name: name, - tld_id: tld_id, - iso_3166_active: iso_3166_active, - }) - end - end - -end - -# cd Congo (Kinshasa) -# um Baker Island -# um Howland Island -# um Jarvis Island -# um Johnston Atoll -# um Kingman Reef -# um Midway Islands -# um Navassa Island -# um Palmyra Atoll -# um Wake Island -# mi Midway Islands -# na Netherlands Antilles -# gs South Georgia and the Islands -# sj Svalbard -# wk Wake Island -# ps West Bank -# ps West Bank and the Gaza Strip -# ps Gaza Strip - -class CountryReconciler - - def self.load_reconciled_countries - - Geo::FullIso3166.load_tsv([:geo_data, 'iso_codes/full_iso_3166.tsv']) do |raw_country| - Geo::Country.values << raw_country.to_place - end - - Wukong::Data::CountryCode.load - Wukong::Data::CountryCode.values.each do |raw_country| - iso_country = raw_country.to_place - country = Geo::Country.for_country_id(iso_country.country_id){ Geo::Country.new } - country.adopt(iso_country) - end - - Wukong::Data::GeonamesGeoJson.load_json(:geonames_countries) do |raw_feature| - gn_country = raw_feature.properties.to_place - country = Geo::Country.for_country_id(gn_country.country_id){ Geo::Country.new } - country.adopt(gn_country) - end - - Geo::Country.values.sort_by!(&:country_id) - end -end - - - -# { -# :xx => { :name => 'Iran' }, -# :xx => 'Tanzania, United Republic', -# :xx => 'Palestinian Territory, Occupied', -# :xx => -# } -# :kp => "North Korea" Korea, Democratic People's Republic -# :kr => "South Korea" Korea, Republic of -# :bn => "Brunei" -# :bq => "Caribbean Netherlands" -# Lao People's Democratic Republic -# -# :va Holy See (Vatican City State) -# :vi Virgin Islands, U.S. diff --git a/munging/geo/tasks.rake b/munging/geo/tasks.rake deleted file mode 100644 index 74b3864..0000000 --- a/munging/geo/tasks.rake +++ /dev/null @@ -1,71 +0,0 @@ -require_relative('../../rake_helper') - -Pathname.register_paths( - geo_data: [:data, 'geo'], - geo_work: [:work, 'geo'], - geo_code: File.dirname(__FILE__), - # - iso_3166: [:geo_data, 'iso_codes', "iso_3166.tsv" ], - geonames_countries: [:geo_data, 'geonames', "geonames_countries.json" ], - # - countries_json: [:geo_work, "countries.json" ], - country_name_lookup: [:geo_work, "country_name_lookup.tsv" ], - ) - -chain :geo do - code_files = FileList[Pathname.of(:geo_code, '*.rb').to_s] - chain(:countries) do - - task(:load) do - require_relative('./geo_models') - require_relative('./geo_json') - require_relative('./geonames_models') - require_relative('./iso_codes') - require_relative('./reconcile_countries') - CountryReconciler.load_reconciled_countries - end - - # desc 'load the ISO 3166 countries' - # task(:countries_iso_3166, after: [code_files, :force]) do |dest| - # require_relative('./iso_codes') - # p Wukong::Data::CountryCode.for_any_name('Bolivia') - # end - - # step(:geonames_countries, doc: 'load the Geonames countries', - # invoke: 'geo:countries:load', - # # , after: [code_files, :force] - # ) do |dest| - # Wukong::Data::GeonamesGeoJson.load(:geonames_countries) - # end - - desc 'Add the iso_codes data to the geonames countries' - create_file(:countries_json, invoke: 'geo:countries:load', after: [code_files, :force]) do |dest| - Geo::Country.values.each do |country| - dest << country.to_json << "\n" - end - end - - desc 'Add the iso_codes data to the geonames countries' - create_file(:country_name_lookup, invoke: 'geo:countries:load', after: [code_files, :force]) do |dest| - Geo::Country.values.each do |ct| - ct.names.each do |alt_name| - dest << [ct.country_id, ct.country_al3id, ct.country_numid, - ct.tld_id, ct.geonames_id, - ct.name, - Geo::Place.slugify_name(alt_name), alt_name - ].join("\t") << "\n" - end - end - end - - # task(:country_name_lookup => :load) do - # Geo::CountryNameLookup.load - # end - - end -end - -task :default => [ - # 'geo:countries', - 'geo:countries:country_name_lookup' -] diff --git a/munging/rake_helper.rb b/munging/rake_helper.rb deleted file mode 100644 index 194b6a4..0000000 --- a/munging/rake_helper.rb +++ /dev/null @@ -1,62 +0,0 @@ -require 'gorillib' -require 'gorillib/data_munging' -require 'configliere' - -S3_BUCKET = 'bigdata.chimpy.us' -S3_DATA_ROOT = "s3n://#{S3_BUCKET}/data" -HDFS_DATA_ROOT = '/data' - -Settings.define :orig_data_root, default: HDFS_DATA_ROOT, description: "directory root for input data" -Settings.define :scratch_data_root, default: HDFS_DATA_ROOT, description: "directory root for scratch data" -Settings.define :results_data_root, default: HDFS_DATA_ROOT, description: "directory root for results data" -Settings.define :mini, description: 'Run in mini mode - operate inside the mini version of the specified universe',type: :boolean, default: false -Settings.define :universe, description: 'Universe to draw data from', finally: ->(c){ c.universe ||= (c.mini? ? "mini" : "full") } -Settings.define :pig_path, default: '/usr/local/bin/pig' -Settings.define :local, type: :boolean, default: false - -def Settings.mini?; !! Settings.mini ; end # BANG BANG BANG -def Settings.wu_run_cmd; (local ? '--run=local' : '--run') ; end; - -def dir_exists? (dir) - if Settings.local - return File.exists? dir - else - `hadoop fs -test -e #{dir}` - return $?.exitstatus == 0 - end -end - -def wukong(script, input, output, options={}) - input = Pathname.of(input) - output = Pathname.of(output) - if dir_exists? output - puts "#{output} exists. Assuming that this job has already run..." - return - end - opts = ['--rm'] - options.each_pair do |k,v| - opts << "--#{k}=#{v}" - end - opts << input - opts << output - ruby(script, Settings.wu_run_cmd,*opts) -end - -def wukong_xml(script, input, output, split_tag) - wukong(script,input,output,{split_on_xml_tag: split_tag}) -end - -def pig(script_name, options={}) - cmd = Settings.pig_path - options.each_pair do |k,v| - v = Pathname.of(v) if v.is_a? Symbol - if k.to_s.include? '_out' and dir_exists? v - puts "#{v} already exists. Assuming that this job has already run..." - return - else - cmd += " -param #{k.upcase}=#{v}" - end - end - cmd += " #{script_name}" - sh cmd -end diff --git a/munging/weather/.gitignore b/munging/weather/.gitignore deleted file mode 100644 index b844b14..0000000 --- a/munging/weather/.gitignore +++ /dev/null @@ -1 +0,0 @@ -Gemfile.lock diff --git a/munging/weather/Gemfile b/munging/weather/Gemfile deleted file mode 100644 index 6a0e142..0000000 --- a/munging/weather/Gemfile +++ /dev/null @@ -1,4 +0,0 @@ -source 'http://rubygems.org' - -gem 'gorillib', :path => '/Users/dlaw/dev/gorillib' -gem 'wukong', :path =>'/Users/dlaw/dev/wukong_og' diff --git a/munging/weather/Rakefile b/munging/weather/Rakefile deleted file mode 100644 index b00762c..0000000 --- a/munging/weather/Rakefile +++ /dev/null @@ -1,28 +0,0 @@ -require 'configliere' -Settings.use :commandline - -require_relative '../rake_helper' - -Settings.resolve! - -Pathname.register_paths( - project: 'noaa_ish', - universe: 'full', - - orig: [Settings.orig_data_root,'orig'], - scratch: [Settings.scratch_data_root, 'scratch'], - results: [Settings.results_data_root, 'results'], - - #Origin - noaa_ish_orig: [:orig, 'www1.ncdc.noaa.gov','pub','data','noaa'], - noaa_ish_test: [:noaa_ish_orig, '010010-99999-2012'], - #Results - noaa_ish_results: [:results, :project, :universe], -) - -namespace :extract do - desc 'Extract the NOAA ISH weather data from flat files' - task :ish do - wukong('extract_ish.rb', :noaa_ish_test, :noaa_ish_results) - end -end diff --git a/munging/weather/extract_ish.rb b/munging/weather/extract_ish.rb deleted file mode 100755 index 5a64c15..0000000 --- a/munging/weather/extract_ish.rb +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env ruby -# encoding: UTF-8 - -require 'wukong' -require 'wukong/streamer/flatpack_streamer' - -module Weather - class Mapper < Wukong::Streamer::FlatPackStreamer - format "_4 i6 i5 s8 s4 sD6e3 D7e3 s5 i5 s5 s4 i3 ssD4e1ii5 ssbi6 sssD5e1 sD5e1 sD5e1 ss*" - end -end - -Wukong::Script.new(Weather::Mapper, nil).run diff --git a/munging/weather/models/weather.rb b/munging/weather/models/weather.rb deleted file mode 100755 index 01b3569..0000000 --- a/munging/weather/models/weather.rb +++ /dev/null @@ -1,119 +0,0 @@ -require 'gorillib' -require 'gorillib/model' -require 'gorillib/model/serialization' -require 'gorillib/model/positional_fields' - -class RawWeatherReport - include Gorillib::Model - include Gorillib::Model::PositionalFields - - field :usaf_station_id, Integer - - # wban id appears to have 99999 as a blank value even though - # it is not specified as such in the docs - field :wban_station_id, Integer - - field :obs_date, String - field :obs_time, String - - field :obs_data_source, String, blankish: ["9", '', nil] - - field :wstn_latitude, Float, blankish: [99.999, '', nil] - field :wstn_longitude, Float, blankish: [999.999, '' , nil] - - field :report_type_code, String, blankish: ["99999", '', nil] - - field :wstn_elevation, Integer, blankish: [9999, '', nil] - - field :wstn_call_letters, String, blankish: ["99999", '', nil] - - field :quality_control_process_name, String - - field :wind_direction, Integer, blankish: [999, '', nil] - field :wind_direction_qual, String - field :wind_observation_type, String, blankish: ["9", '', nil] - field :wind_speed, Float, blankish: [999.9, '', nil] - field :wind_speed_qual, String - - field :ceiling_height, Integer, blankish: [99999, '', nil] - field :ceiling_qual, String - field :ceiling_determination, String, blankish:['9', '', nil] - field :cavok, :boolean - - field :visibility, Integer, blankish: [999999, '', nil] - field :visibility_qual, String - field :visibility_variability_code, String, blankish: ['9', '', nil] - field :visibility_variability_code_qual, String - - field :air_temp, Float, blankish: [999.9, '', nil] - field :air_temp_qual, String - - field :dew_point, Float, blankish: [999.9, '', nil] - field :dew_point_qual, String - - field :sea_level_pressure, Float, blankish: [9999.9, '' , nil] - field :sea_level_pressure_qual, String - - field :raw_extended_observations, String -end - -class ReportMetadata - include Gorillib::Model - field :wind_direction_qual, String - field :wind_speed_qual, String - field :ceiling_qual, String - field :visibility_qual, String - field :visibility_variability_code_qual, String - field :air_temp_qual, String - field :dew_point_qual, String - field :sea_level_pressure_qual, String - -end - -class WeatherReport - include Gorillib::Model - - field :wstn_id, String #wban-usad - - field :wstn_latitude, Float - field :wstn_longitude, Float - field :wstn_elevation, Float - - field :obs_date, String - field :obs_time, String - - field :wind_direction, Integer - field :wind_observation_type, String - field :wind_speed, Float - - field :ceiling_height, Integer - field :ceiling_determination, String - field :cavok, :boolean - - field :visibility, Integer - field :visibility_variability_code, :boolean - - field :air_temp, Float - - field :dew_point, Float - - field :sea_level_pressure, Float - - field :metadata, ReportMetadata, default: ReportMetadata.new - - def receive!(hsh={}) - # prune the quality fields - hsh.keys.each do |key| - next if (key.to_s =~ /[^_]*_qual/).nil? - val = hsh.delete(key) - metadata.send("receive_#{key.to_s}", val) - end - # transform the ids - if hsh.keys.include? :usaf_station_id and hsh.keys.include? :wban_station_id - id = hsh.delete(:usaf_station_id).to_s - id += "-#{hsh.delete :wban_station_id}" - hsh[:wstn_id] = id - end - super(hsh) - end -end diff --git a/munging/weather/utils/noaa_downloader.rb b/munging/weather/utils/noaa_downloader.rb deleted file mode 100644 index dd76467..0000000 --- a/munging/weather/utils/noaa_downloader.rb +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env ruby -# encoding:UTF-8 - -require 'open-uri' -require 'configliere' - -NOAA_URL = 'http://www1.ncdc.noaa.gov/pub/data/noaa/' -Settings.use :commandline - -Settings({ - years: [1901], - verbose: false, - out_dir: /data/rawd/noaa/isd/, - un_gzip: false, -}) - -Settings.define :years, flag 'y', description: "Years to download" -Settings.define :verbose, flag 'v', description: "Get chatty", type: :boolean -Settings.define :un_gzip, flag 'g', description: "Unzip the files as they are uploaded", type: :boolean -Settings.define :out_dir, flag 'o', description: "The directory in the hdfs to put the files" - -Settings.resolve! - -def get_files_for_year(year) - year_page = open("#{NOAA_URL}/#{year}") - years = [] - year_page.each_line do |line| - next unless line =~ // - match = //.match(line) - years << match[1] if not match.nil? - end - return years -end - -years.each do |year| - puts "Uploading files for year #{year}..." if Settings[:verbose] - get_files_for_year(year).each do |file| - puts " Uploading #{file}..." if Settings[:verbose] - path = "#{NOAA_URL}/#{year}/#{file}" - if Settings[:un_gzip] - `curl '#{path}' | zcat | hdp-put #{Settings[:out_dir]}/#{year}/#{file}` - else - `curl #{file} | hdp-put #{Settings[:out_dir]}/#{year}/#{file}` - end - end -end diff --git a/munging/wikipedia/Gemfile b/munging/wikipedia/Gemfile deleted file mode 100644 index ff33bd0..0000000 --- a/munging/wikipedia/Gemfile +++ /dev/null @@ -1,8 +0,0 @@ -source 'http://rubygems.org' - -gem 'gorillib', :path => '/home/dlaw/dev/gorillib' -gem 'ruby-progressbar' -gem 'crack' -gem 'rake' -gem 'wukong', :path => '/home/dlaw/dev/wukong_og' -gem 'json' diff --git a/munging/wikipedia/README.md b/munging/wikipedia/README.md deleted file mode 100644 index 4661950..0000000 --- a/munging/wikipedia/README.md +++ /dev/null @@ -1,34 +0,0 @@ -## Encodings -All SQL dumps are theoretically encoded in UTF-8, but the Wikipedia dumps contain malformed characters. You might see a 'Invalid UTF-8 byte sequence' error when running a Wukong because of this. - -To fix this, use `guard_encoding` in `MungingUtils` to filter out malformed characters before attempting to process them. `guard_encoding` replaces all invalid characters with '�'. - -If you need to ensure that all characters are valid UTF-8 when piping things around on the command line, then pipe your stream through `char_filter.rb`. - -If you need an invalid UTF-8 character, pretty much any single-byte character above \x79 will do. e.g: - - > char = "\x80" - => "\x80" - > char.encoding.name - => "UTF-8" - > char.valid_encoding? - => false - -[James Gray's blog](http://blog.grayproductions.net/articles/understanding_m17n) is really valuable for further reading on this. - -## Dates -Date information should be formatted as follows: - - +----------+--------+--------------------------+-------------+ - | int | int | long or float | int | - +----------+--------+--------------------------+-------------+ - | YYYYMMDD | HHMMSS | Seconds since Unix epoch | Day of week | - +----------+--------+--------------------------+-------------+ - -Should always be in the UTC time zone. - -Hours go from 0 to 23 - -Months go from 01 to 12 - -Day of week goes from 0 to 6 (Sunday to Saturday) diff --git a/munging/wikipedia/Rakefile b/munging/wikipedia/Rakefile deleted file mode 100644 index dcedacb..0000000 --- a/munging/wikipedia/Rakefile +++ /dev/null @@ -1,193 +0,0 @@ -require 'configliere' -Settings.use :commandline - -require_relative '../rake_helper' - -DUMPS = ['20110722','20110803','20110901','20111007','20111115', - '20111201','20120104','20120211','20120307','20120403', - '20120502','20120601','20120702','20120802'] - -Settings.define :pageviews_date_range_slug_in, description: 'The pageviews date range', default: '2012/2012-08' -Settings.define :pageviews_date_range_slug_out, description: 'The pageviews date range', default: '2012/2012-08' -Settings.define :dump, description: 'The wikipedia dump to use', default: DUMPS[-1] -Settings.define :n1_node_id, description: 'Node to construct the N1 subuniverse around', default: '13692155' -Settings.define :n1_subuniverse, description: 'The output universe for N1 subuniverse generation', finally: ->(c) {c.n1_subuniverse ||= "n1_#{c.n1_node_id}"} -Settings.define :num_reducers, type: Integer, default: nil -Settings.resolve! - -if (not DUMPS.include? Settings.dump) - puts "Invalid dump specified. Must be one of [#{DUMPS.join(', ')}].\nExiting..." - exit -end - -=begin - Universe is the universe that data is drawn from. - It is also the default universe the data is written into. - There are tasks (namely subuniverse generation) that do not write out - into the supplied universe. Be careful -=end - -Pathname.register_paths( - project: 'wikipedia', - universe: [Settings.universe], - - orig: [Settings.orig_data_root,'ripd'], - scratch: [Settings.scratch_data_root, 'scratch'], - results: [Settings.results_data_root, 'results'], - - #Origin - wiki_dumps: [:orig,'dumps.wikimedia.org'], - orig_enwiki: [:wiki_dumps, 'enwiki'], - orig_pageviews: [:wiki_dumps, 'other', 'pagecounts-raw', Settings.pageviews_date_range_slug_in], - orig_articles: [:orig_enwiki, Settings.dump, "enwiki-#{Settings.dump}-pages-articles.xml.gz"], - orig_pages: [:orig_enwiki, Settings.dump, "enwiki-#{Settings.dump}-page.sql.gz"], - orig_pagelinks: [:orig_enwiki, Settings.dump,"enwiki-#{Settings.dump}-pagelinks.sql.gz"], - - # Scratch - wiki_scratch: [:scratch, :project, :universe], - page_metadata_scratch: [:wiki_scratch,'page_metadata'], - articles_scratch: [:wiki_scratch, 'articles'], - pageviews_scratch: [:wiki_scratch, 'pageviews',Settings.pageviews_date_range_slug_out], - pagelinks_scratch: [:wiki_scratch, 'pagelinks'], - - # Results - wiki_results: [:results, :project, :universe], - page_metadata_results: [:wiki_results, 'page_metadata'], - pageviews_results: [:wiki_results, 'pageviews'], - articles_results: [:wiki_results, 'articles'], - pagelinks_results: [:wiki_results, 'pagelinks'], - undirected_pagelinks_results: [:wiki_results, 'undirected_pagelinks'], - redirects_pagelinks_results: [:wiki_results, 'redirects_pagelinks'], - redirects_page_metadata_results: [:wiki_results, 'redirects_page_metadata'], - - # N1 Subuniverse - n1_results: [:results,'wikipedia', Settings.n1_subuniverse], - n1_nodes_results: [:n1_results, 'nodes'], - n1_edges_results: [:n1_results, 'edges'], - n1_page_metadata_results: [:n1_results, 'page_metadata'], - n1_articles_results: [:n1_results, 'articles'], - n1_pageviews_results: [:n1_results, 'pageviews'], - -) - -namespace :utils do - desc 'Fetch a list of all Wikipedia namespaces and their IDs' - task :get_namespaces do - if File.exists 'utils/namespaces.json' - puts 'utils/namespaces.json exists... Assuming that namespaces have already been downloaded' - return - end - ruby('utils/get_namespaces.rb') - end -end -namespace :extract do - desc 'Extract the Wikipedia article corpus from bzipped XML files' - task :articles do - wukong_xml('articles/extract_articles.rb', :orig_articles, :articles_results) - end - - desc 'Extract the Wikipedia pages table from gzipped SQL dumps' - task :page_metadata do - wukong('page_metadata/extract_page_metadata.rb', :orig_pages, :page_metadata_results) - end - - desc 'Extract Wikipedia pageview data from gzipped server logs' - task :pageviews do - if Settings.num_reducers.nil? - wukong('pageviews/extract_pageviews.rb', :orig_pageviews, :pageviews_scratch) - else - wukong('pageviews/extract_pageviews.rb', :orig_pageviews, :pageviews_scratch,{reduce_tasks: Settings.num_reducers}) - end - end - - desc 'Extract Wikipedia pagelinks data from gzipped SQL dumps' - task :pagelinks do - wukong('pagelinks/extract_pagelinks.rb', :orig_pagelinks, :pagelinks_scratch) - end -end -namespace :augment do - desc 'Augment extracted Wikipedia pageview data with page ID and other metadata' - task :pageviews => ["extract:pageviews", "extract:page_metadata"] do - pig('pageviews/augment_pageviews.pig',{ - page_metadata: :page_metadata_results, - extracted_pageviews: :pageviews_scratch, - augmented_pageviews_out: :pageviews_results, - }) - end - - desc 'Augment Wikipedia pagelinks data with page metadata' - task :pagelinks => ["extract:pagelinks","extract:page_metadata"] do - pig('pagelinks/augment_pagelinks.pig',{ - page_metadata: :page_metadata_results, - extracted_pagelinks: :pagelinks_scratch, - augmented_pagelinks_out: :pagelinks_results, - }) - end - - desc 'Undirect the Wikipedia pagelinks graph' - task :pagelinks_undirect => "augment:pagelinks" do - pig('pagelinks/undirect_pagelinks.pig',{ - augmented_pagelinks: :pagelinks_results, - undirected_pagelinks_out: :pagelinks_undirected_results, - }) - end -end -namespace :n1 do - desc 'Generate a list of node ids for the N1 neighborhood of the specified node' - task :nodes => 'augment:pagelinks_undirect' do - pig('n1_subuniverse/n1_nodes.pig',{ - undirected_pagelinks: :undirected_pagelinks_results, - hub: Settings.n1_node_id, - n1_nodes_out: :n1_nodes_results, - }) - end - desc 'Extract pagelinks for the N1 neighborhood of the specified node' - task :undirected_pagelinks => ['augment:pagelinks_undirect', :nodes] do - pig('subuniverse/sub_undirected_pagelinks_within.pig',{ - undirected_pagelinks: :undirected_pagelinks_results, - sub_nodes: :n1_nodes_results, - sub_pagelinks_out: :n1_edges_results, - }) - end - desc 'Extract page metadata for the N1 neighborhood of the specified node' - task :page_metadata => ['augment:page_metadata', :nodes] do - pig('subuniverse/sub_page_metadata.pig',{ - page_metadata: :page_metadata_results, - sub_nodes: :n1_nodes_results, - sub_page_metadata_out: :n1_page_metadata_results, - }) - end - desc 'Extract articles for the N1 neighborhood of the specified node' - task :articles => ['extract:articles', :nodes] do - pig('subuniverse/sub_articles.pig',{ - articles: :articles_results, - sub_nodes: :n1_nodes_results, - sub_articles_out: :n1_articles_results, - }) - end - desc 'Extract pageview data for the N1 neighborhood of the specified node' - task :pageviews => ['augment:pageviews', :nodes] do - pig('subuniverse/sub_pageviews.pig',{ - pageviews: :pageviews_results, - sub_nodes: :n1_nodes_results, - sub_pageviews_out: :n1_pageviews_results, - }) - end - end -namespace :redirects do - desc 'Extract redirects from pagemetadata table' - task :redirects_page_metadata => 'extract:page_metadata' do - pig('redirects/redirects_page_metadata.pig',{ - page_metadata: :page_metadata_results, - redirects_out: :redirects_page_metadata_results, - }) - end - desc 'Extract redirect links from pagelinks table' - task :redirect_pagelinks => ['redirects_page_metadata','augment:pagelinks'] do - pig('subuniverse/sub_pagelinks_from.pig',{ - pagelinks: :pagelinks_results, - sub_nodes: :redirects_page_metadata_results, - sub_pagelinks_out: :redirects_pagelinks_results, - }) - end -end diff --git a/munging/wikipedia/articles/extract_articles.rb b/munging/wikipedia/articles/extract_articles.rb deleted file mode 100755 index 96bbc7e..0000000 --- a/munging/wikipedia/articles/extract_articles.rb +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env ruby - -# This script extracts wikipedia articles from bzipped xml and outputs -# them in TSV. -# -# Sample Pig LOAD Statement: -# all_articles = LOAD '$articles' AS -# (id:int, title:chararray, namespace:int, revision_date:int, revision_time:int, -# revision_epoch_time:long, revision_day_of_week:int, text:chararray); - -require 'wukong' -require 'wukong/streamer/encoding_cleaner' -require 'crack/xml' -require_relative '../utils/munging_utils.rb' - -module ArticlesExtractor - class Mapper < Wukong::Streamer::LineStreamer - include Wukong::Streamer::EncodingCleaner - include MungingUtils - - def lines - @lines ||= [] - end - - def recordize line - lines << line - if line =~ /<\/page>/ - result = Crack::XML::parse(lines.join) - @lines = [] - return [result] - else - return nil - end - end - - def escape text - text.gsub!(/\n/," "); - text.gsub!(/\t/," "); - return text - end - - def process record - if record.has_key? 'mediawiki' - record = record['mediawiki'] - end - result = [] - result << record['page']['id'] - result << record['page']['title'] - result << record['page']['ns'] - result += time_columns_from_time(Time.iso8601(record['page']['revision']['timestamp'])) - result << escape(record['page']['revision']['text']) - yield result - end - end -end - -Wukong::Script.new(ArticlesExtractor::Mapper,nil).run diff --git a/munging/wikipedia/n1_subuniverse/n1_nodes.pig b/munging/wikipedia/n1_subuniverse/n1_nodes.pig deleted file mode 100644 index 1c8bd27..0000000 --- a/munging/wikipedia/n1_subuniverse/n1_nodes.pig +++ /dev/null @@ -1,16 +0,0 @@ -/* - * This script generates the list of all nodes in the 1-neighborhood of the specified node. - * - * Output Format: - * node_id:int - */ - -%default UNDIRECTED_PAGELINKS '/data/results/wikipedia/full/undirected_pagelinks' -- all edges in the pagelink graph -%default HUB 13692155 -- Philosophy -%default N1_NODES_OUT '/data/results/wikipedia/mini/nodes' -- where output will be stored - -undirected_pagelinks = LOAD '$UNDIRECTED_PAGELINKS' AS (node_a:int, node_b:int, a_into_b:int, b_into_a:int, is_symmetric:int); -spokes = FILTER undirected_pagelinks BY (node_a == $HUB) OR (node_b == $HUB); -neighbors = FOREACH spokes GENERATE ((node_a == $HUB) ? node_b : node_a) AS node; -distinct_neighbors = DISTINCT neighbors; -STORE distinct_neighbors INTO '$N1_NODES_OUT'; diff --git a/munging/wikipedia/page_metadata/extract_page_metadata.rb b/munging/wikipedia/page_metadata/extract_page_metadata.rb deleted file mode 100755 index 6146188..0000000 --- a/munging/wikipedia/page_metadata/extract_page_metadata.rb +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env ruby - -# Sample pig load statement: -# -# page_metadata = LOAD '$page_metadata' AS (id:int, namespace:int, title:chararray, -# restrictions:chararray, counter:long, is_redirect:int, is_new:int, random:float, -# touched:int, page_latest:int, len:int); - -require 'wukong' -require 'wukong/streamer/sql_streamer' -require 'wukong/streamer/encoding_cleaner' - -module PageMetadataExtractor - class Mapper < Wukong::Streamer::SQLStreamer - include Wukong::Streamer::EncodingCleaner - columns [:int, :int, :string, :string, :int, - :int, :int, :float, :string, :int, :int] - end -end - -Wukong::Script.new(PageMetadataExtractor::Mapper, nil).run diff --git a/munging/wikipedia/page_metadata/extract_page_metadata.rb.old b/munging/wikipedia/page_metadata/extract_page_metadata.rb.old deleted file mode 100755 index bb1d23a..0000000 --- a/munging/wikipedia/page_metadata/extract_page_metadata.rb.old +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env ruby - -require 'wukong' - -load '/home/dlaw/dev/wukong/examples/wikipedia/munging_utils.rb' - -module PagesToTSV - class Mapper < Wukong::Streamer::LineStreamer - - COLUMNS= [:int, :int, :string, :string, :int, - :int, :int, :float, :string, :int, :int] - - def initialize - @sql_parser = MungingUtils::SQLParser.new(COLUMNS) - end - - def process(line, &blk) - @sql_parser.parse(line,&blk) - end - end -end - -# go to town -Wukong::Script.new( - PagesToTSV::Mapper, - nil -).run diff --git a/munging/wikipedia/pagelinks/augment_pagelinks.pig b/munging/wikipedia/pagelinks/augment_pagelinks.pig deleted file mode 100644 index 033cfdc..0000000 --- a/munging/wikipedia/pagelinks/augment_pagelinks.pig +++ /dev/null @@ -1,29 +0,0 @@ -/* - A script to generate Wikipedia page graph edge list - Accepts as input 2 tsvs: list of pages and list of links - Link table should initially be formatted as from_page_id, into_namespace, into_title - Assumes that the combination of namespace and title uniquely identifies a page - - Output Format: - from_id:int, into_id:int, from_namespace:int, from_title:chararray, into_namespace:int, into_title:chararray -*/ - -%default PAGE_METADATA '/data/results/wikipedia/full/page_metadata' -- page metadata for all Wikipedia pages -%default EXTRACTED_PAGELINKS '/data/scratch/wikipedia/full/pagelinks' -- raw extracted pagelinks -%default AUGMENTED_PAGELINKS_OUT '/data/results/wikipedia/full/pagelinks' -- augmented pagelinks - -page_metadata = LOAD '$PAGE_METADATA' AS (id:int, namespace:int, title:chararray, - restrictions:chararray, counter:long, is_redirect:int, is_new:int, - random:float, touched:int, page_latest:int, len:int); -links = LOAD '$EXTRACTED_PAGELINKS' AS (from_id:int, into_namespace:int, into_title:chararray); - -first_pass_j = JOIN page_metadata BY id RIGHT OUTER, links BY from_id; -first_pass = FOREACH first_pass_j GENERATE - links::from_id AS from_id, page_metadata::namespace AS from_namespace, page_metadata::title AS from_title, - links::into_namespace AS into_namespace, links::into_title AS into_title; -second_pass_j = JOIN page_metadata BY (namespace, title) RIGHT OUTER, first_pass BY (into_namespace, into_title); -second_pass = FOREACH second_pass_j GENERATE - first_pass::from_id, page_metadata::id, - first_pass::from_namespace, first_pass::from_title, - first_pass::into_namespace, first_pass::into_title; -STORE second_pass INTO '$AUGMENTED_PAGELINKS_OUT'; diff --git a/munging/wikipedia/pagelinks/extract_pagelinks.rb b/munging/wikipedia/pagelinks/extract_pagelinks.rb deleted file mode 100755 index e5b3e17..0000000 --- a/munging/wikipedia/pagelinks/extract_pagelinks.rb +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env ruby - -require 'wukong' -require 'wukong/streamer/sql_streamer' -require 'wukong/streamer/encoding_cleaner' - -module PagelinksExtractor - class Mapper < Wukong::Streamer::SQLStreamer - include Wukong::Streamer::EncodingCleaner - columns [:int, :int, :string] - end -end - -Wukong::Script.new(PagelinksExtractor::Mapper, nil).run diff --git a/munging/wikipedia/pagelinks/extract_pagelinks.rb.old b/munging/wikipedia/pagelinks/extract_pagelinks.rb.old deleted file mode 100755 index a0b01b3..0000000 --- a/munging/wikipedia/pagelinks/extract_pagelinks.rb.old +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env ruby -require 'wukong' - -load '/home/dlaw/dev/wukong/examples/wikipedia/munging_utils.rb' - -module PagelinksToTSV - class Mapper < Wukong::Streamer::LineStreamer - - COLUMNS = [:int, :int, :string] - - def initialize - @sql_parser = MungingUtils::SQLParser.new(COLUMNS) - end - - def process(line, &blk) - @sql_parser.parse(line, &blk) - end - end -end - -# go to town -Wukong::Script.new( - PagelinksToTSV::Mapper, - nil -).run diff --git a/munging/wikipedia/pagelinks/undirect_pagelinks.pig b/munging/wikipedia/pagelinks/undirect_pagelinks.pig deleted file mode 100644 index 9d67a01..0000000 --- a/munging/wikipedia/pagelinks/undirect_pagelinks.pig +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Takes a directed edge list and transforms it into an undirected edge list - * that stores edge direction as metadata. - * - * Input table should be of the format (from_id:int, into_id:int ... ) - * - * Output format: - * from_id:int, into_id:int, a_into_b:int , b_into_a:int, symmetric:int - * - * a_into_b, b_into_a, and symmetric are really booleans. - */ - -%default AUGMENTED_PAGELINKS '/data/results/wikipedia/full/pagelinks' -- all wikipedia pagelinks (see augment_pagelinks.pig) -%default UNDIRECTED_PAGELINKS_OUT '/data/results/wikipedia/full/undirected_pagelinks' -- undirected pagelinks - -edges = LOAD '$AUGMENTED_PAGELINKS' AS (from:int, into:int); -edges_sorted = FOREACH edges GENERATE - ((from <= into)? from : into) AS node_a, - ((from <= into)? into : from) AS node_b, - ((from <= into)? 1 : 0) AS a_to_b, - ((from <= into)? 0 : 1) AS b_to_a; -edges_grouped = GROUP edges_sorted by (node_a, node_b); -edges_final = FOREACH edges_grouped GENERATE - group.node_a AS node_a, - group.node_b AS node_b, - ((SUM(edges.$2) > 0) ? 1:0) AS a_into_b, - ((SUM(edges.$3) > 0) ? 1:0) AS b_into_a, - ((SUM(edges.$2) > 0 AND SUM(edges.$3) > 0) ? 1:0) as symmetric:int; -STORE edges final INTO '$UNDIRECTED_PAGELINKS_OUT'; diff --git a/munging/wikipedia/pageviews/augment_pageviews.pig b/munging/wikipedia/pageviews/augment_pageviews.pig deleted file mode 100644 index 83408be..0000000 --- a/munging/wikipedia/pageviews/augment_pageviews.pig +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Augments raw pageview data with page ID. - * Pageview stats are *theoretically* uniquely keyed by namespace - * and title, so that is what is used to join pageviews with page_metadata. - * - * In practice, the original pageview stats only give the URL visited, and - * reliably extracting namespace and title from the URL is difficult. Additionally, - * page names change, redirects happen, and many other small things can go - * wrong with the join. All pageview data is kept in the final table, but - * the page id will be blank in rows where the join failed. - * - * Output format: - * page_id:int, namespace:int, title:chararray, num_visitors:long, - * date:int, time:int, epoch_time:long, day_of_week:int - */ - -%default PAGE_METADATA '/data/results/wikipedia/full/page_metadata' -- page metadata for all Wikipedia pages -%default EXTRACTED_PAGEVIEWS '/data/scratch/wikipedia/full/pageviews' -- raw extracted pageview stats (see extract_pageviews.rb) -%default AUGMENTED_PAGEVIEWS_OUT '/data/results/wikipedia/full/pageviews' -- where output will be stored - -page_metadata = LOAD '$PAGE_METADATA' AS - (id:int, namespace:int, title:chararray, - restrictions:chararray, counter:long, is_redirect:int, is_new:int, - random:float, touched:int, page_latest:int, len:int); -pageviews = LOAD '$EXTRACTED_PAGEVIEWS' AS (namespace:int, title:chararray, - num_visitors:long, date:int, time:int, epoch_time:long, day_of_week:int); - -first_join = JOIN page_metadata BY (namespace, title) RIGHT OUTER, pageviews BY (namespace, title); -final = FOREACH first_join GENERATE - page_metadata::id, pageviews::namespace, pageviews::title, pageviews::num_visitors, - pageviews::date, pageviews::time, pageviews::epoch_time, pageviews::day_of_week; -STORE final INTO '$AUGMENTED_PAGEVIEWS_OUT'; diff --git a/munging/wikipedia/pageviews/extract_pageviews.rb b/munging/wikipedia/pageviews/extract_pageviews.rb deleted file mode 100755 index 7aa08d4..0000000 --- a/munging/wikipedia/pageviews/extract_pageviews.rb +++ /dev/null @@ -1,85 +0,0 @@ -#!/usr/bin/env ruby -# encoding:UTF-8 - -# Pig output format: -# namespace:int, title:chararray, num_visitors:long, date:int, time:int, epoch_time:long, day_of_week:int - -$:.unshift '/home/dlaw/dev/wukong_og/lib' -$:.unshift '/home/dlaw/dev/gorillib/lib' - -require 'uri' -require 'pathname' -require 'json' -require 'wukong' -require 'wukong/streamer' -require 'wukong/streamer/encoding_cleaner' -load '/home/dlaw/dev/wukong/examples/munging/wikipedia/utils/munging_utils.rb' - -ENV['map_input_file'] ||= 'pagecounts-20071222-100000.gz' - -class String - def is_enwiki? - return (not (self =~ /^en /).nil?) - end - - def is_after_enwiki? - return (not (self =~ /^(e[o-z][a-z]*|[f-z][a-z]+) /).nil?) - end -end - -module PageviewsExtractor - class Mapper < Wukong::Streamer::LineStreamer - include Wukong::Streamer::EncodingCleaner - include MungingUtils - - ns_json_file = File.open("/home/dlaw/dev/wukong/examples/munging/wikipedia/utils/namespaces.json",'r:UTF-8') - NAMESPACES = JSON.parse(ns_json_file.read) - - # the filename strings are formatted as - # pagecounts-YYYYMMDD-HH0000.gz - def time_from_filename(filename) - parts = filename.split('-') - year = parts[1][0..3].to_i - month = parts[1][4..5].to_i - day = parts[1][6..7].to_i - hour = parts[2][0..1].to_i - return Time.new(year,month,day,hour) - end - - def process line - # we only want enwiki lines - return if @done - if line.is_after_enwiki? - @done = true - return - end - return if not line.is_enwiki? - # we have an enwiki line - process it! - fields = line.split(' ')[1..-1] - out_fields = [] - # add the namespace - namespace = nil - if fields[0].include? ':' - namespace = NAMESPACES[fields[0].split(':')[0]] - out_fields << (namespace || '0') - else - out_fields << '0' - end - # add the title - if namespace.nil? - out_fields << URI.unescape(fields[0]) - else - out_fields << URI.unescape(fields[0][(fields[0].index(':')||-1)+1..-1]) - end - # add number of visitors in the hour - out_fields << fields[2] - # grab date info from filename - file = Pathname.new(ENV['map_input_file']).basename - time = time_from_filename(file.to_s) - out_fields += time_columns_from_time(time) - yield out_fields - end - end -end - -Wukong::Script.new(PageviewsExtractor::Mapper, Wukong::Streamer::LineStreamer).run diff --git a/munging/wikipedia/pig_style_guide.md b/munging/wikipedia/pig_style_guide.md deleted file mode 100644 index 4a455f7..0000000 --- a/munging/wikipedia/pig_style_guide.md +++ /dev/null @@ -1,25 +0,0 @@ -# Pig Style Guide - -- Everything except names should be in all caps. E.g. - - first_join = JOIN pages BY (namespace,title) - RIGHT OUTER, pageviews BY (namespace, title); - -- Group and align columns in the script in ways that make sense. Don't be afraid of newlines. E.g. - - second_pass = FOREACH second_pass_j GENERATE - first_pass::from_id, pages::id, - first_pass::from_namespace, first_pass::from_title, - first_pass::into_namespace, first_pass::into_title; - -- Columns that form an important sub-set of the table's data should be easily accessible as a unit. - - E.g. The edge list above has the from and into ids in the first and second columns, making it easy to just get an edge list of ids without the additional metadata. - -- When at all possible, you should include sample LOAD statements in the comments for your script. This makes it easy to use the output of your script - -- Parameterize as much as possible. All paths should be parameterized. - -- Parameters should be in all caps, e.g. $NODE. - -- Parameters should have defaults if at all possible. When you define the default, also include a comment describing the parameter. diff --git a/munging/wikipedia/redirects/redirects_page_metadata.pig b/munging/wikipedia/redirects/redirects_page_metadata.pig deleted file mode 100644 index 7d9e015..0000000 --- a/munging/wikipedia/redirects/redirects_page_metadata.pig +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Filters the page metadata table, leaving only pages that - * are redirects. - * - * Output Format (same as page_metadata): - * (id:int, namespace:int, title:chararray, restrictions:chararray, - * counter:long, is_redirect:int, is_new:int, random:float, touched:int, - * page_latest:int, len:int) - */ - -%default PAGE_METADATA '/data/results/wikipedia/full/page_metadata' -- page metdata for all pages in Wikipedia -%default REDIRECTS_OUT '/data/results/wikipedia/full/redirect_page_metadata' -- place to store page metdata for redirects - -page_metadata = LOAD '$PAGE_METADATA' AS (id:int, namespace:int, title:chararray, - restrictions:chararray, counter:long, is_redirect:int, is_new:int, random:float, - touched:int, page_latest:int, len:int); - -redirects = FILTER page_metadata BY (is_redirect == 1); -STORE redirects INTO '$REDIRECTS_OUT'; diff --git a/munging/wikipedia/subuniverse/sub_articles.pig b/munging/wikipedia/subuniverse/sub_articles.pig deleted file mode 100644 index ab0aa43..0000000 --- a/munging/wikipedia/subuniverse/sub_articles.pig +++ /dev/null @@ -1,23 +0,0 @@ -/* - * This script filters the articles table, leaving only the articles - * in the specified subuniverse. - * - * Output format: - * page_id:int, title:chararray, namespace:int, rev_date:int, rev_time:int, - * rev_epoch_time:long, rev_dow:int, article_text:chararray - */ - -%default ARTICLES '/data/results/wikipedia/full/articles' -- all articles in the wikipedia corpus -%default SUB_NODES '/data/results/wikipedia/mini/nodes' -- all nodes in the subuniverse -%default SUB_ARTICLES_OUT '/data/results/wikipedia/mini/articles' -- where output will be stored - -articles = LOAD '$ARTICLES' AS (page_id:int, title:chararray, namespace:int, - rev_date:int, rev_time:int, rev_epoch_time:long, rev_dow:int, article_text:chararray); -sub_nodes = LOAD '$SUB_NODES' AS (node_id:int); -sub_articles_unfiltered = JOIN articles BY id, sub_nodes BY node_id; -sub_articles = FOREACH sub_articles_unfiltered GENERATE - articles::page_id AS page_id, articles::title AS title, articles::namespace AS namespace, - articles::rev_date AS rev_date, articles::rev_time AS rev_time, - articles::rev_epoch_time AS rev_epoch_time, articles::rev_dow AS rev_dow, - articles::article_text AS article_text; -STORE sub_articles INTO '$SUB_ARTICLES_OUT'; diff --git a/munging/wikipedia/subuniverse/sub_page_metadata.pig b/munging/wikipedia/subuniverse/sub_page_metadata.pig deleted file mode 100644 index 38e79f9..0000000 --- a/munging/wikipedia/subuniverse/sub_page_metadata.pig +++ /dev/null @@ -1,24 +0,0 @@ -/* - * This script filters the page metadata table, leaving only the pages - * in the specified subuniverse. - * - * Output format (same as page_metadata): - * id:int, namespace:int, title:chararray, restrictions:chararray, counter:long, - * is_redirect:int, is_new:int, random:float, touched:int, page_latest:int, len:int - */ - -%default PAGE_METADATA '/data/results/wikipedia/full/page_metadata' -- metadata for all pages in the wikipedia corpus -%default SUB_NODES '/data/results/wikipedia/mini/nodes' -- all nodes in the subuniverse -%default SUB_PAGE_METADATA_OUT '/data/results/wikipedia/mini/page_metadata' -- where output will be stored - -page_metadata = LOAD '$PAGE_METADATA' AS (id:int, namespace:int, title:chararray, - restrictions:chararray, counter:long, is_redirect:int, is_new:int, random:float, - touched:int, page_latest:int, len:int); -sub_nodes = LOAD '$SUB_NODES' AS (node_id:int); -sub_page_metadata_unfiltered = JOIN page_metadata BY id, sub_nodes BY node_id; -sub_page_metadata = FOREACH sub_page_metadata_unfiltered GENERATE - page_metadata::id, page_metadata::namespace, page_metadata::title, - page_metadata::restrictions, page_metadata::counter, page_metadata::is_redirect, - page_metadata::is_new, page_metadata::random, page_metadata::touched, - page_metadata::page_latest, page_metadata::len; -STORE sub_page_metadata INTO '$SUB_PAGE_METADATA_OUT'; diff --git a/munging/wikipedia/subuniverse/sub_pagelinks_from.pig b/munging/wikipedia/subuniverse/sub_pagelinks_from.pig deleted file mode 100644 index 5cdca66..0000000 --- a/munging/wikipedia/subuniverse/sub_pagelinks_from.pig +++ /dev/null @@ -1,22 +0,0 @@ -/* - * This script filters the pagelinks table, leaving only the pagelinks - * that start within supplied subuniverse. - * - * Output format (same as augmented_pagelinks): - * from_id:int, into_id:int, from_namespace:int, from_title:chararray, into_namespace:int, into_title:chararray - */ - -%default PAGELINKS '/data/results/wikipedia/full/pagelinks' -- all edges in the pagelink graph (must be *directed*) -%default SUB_NODES '/data/results/wikipedia/mini/nodes' -- all nodes in the subuniverse -%default SUB_PAGELINKS_OUT '/data/results/wikipedia/mini/pagelinks' -- where output will be stored - -all_pagelinks = LOAD '$PAGELINKS' AS (from_id:int, into_id:int, - from_namespace:int, from_title:chararray, into_namespace:int, into_title:chararray); -sub_nodes = LOAD '$SUB_NODES' AS (node_id:int); - -sub_pagelinks_from = JOIN all_pagelinks BY from_id, sub_nodes BY node_id; -sub_pagelinks = FOREACH sub_pagelinks_from GENERATE - all_pagelinks::from_id, all_pagelinks::into_id, - all_pagelinks::from_namespace, all_pagelinks::from_title, - all_pagelinks::into_namespace, all_pagelinks::into_title; -STORE sub_pagelinks INTO '$SUB_PAGELINKS_OUT'; diff --git a/munging/wikipedia/subuniverse/sub_pagelinks_into.pig b/munging/wikipedia/subuniverse/sub_pagelinks_into.pig deleted file mode 100644 index 4d7e5da..0000000 --- a/munging/wikipedia/subuniverse/sub_pagelinks_into.pig +++ /dev/null @@ -1,22 +0,0 @@ -/* - * This script filters the pagelinks table, leaving only the pagelinks - * that terminate within supplied subuniverse. - * - * Output format (same as augment_pagelinks): - * node_a:int, node_b:int, a_into_b:int, b_into_a:int, is_symmetric:int - */ - -%default PAGELINKS '/data/results/wikipedia/full/pagelinks' -- all edges in the pagelink graph (must be *directed*) -%default SUB_NODES '/data/results/wikipedia/mini/nodes' -- all nodes in the subuniverse -%default SUB_PAGELINKS_OUT '/data/results/wikipedia/mini/pagelinks' -- where output will be stored - -all_pagelinks = LOAD '$PAGELINKS' AS (from_id:int, into_id:int, - from_namespace:int, from_title:chararray, into_namespace:int, into_title:chararray); -sub_nodes = LOAD '$SUB_NODES' AS (node_id:int); - -sub_pagelinks_into = JOIN all_pagelinks BY into_id, sub_nodes BY node_id; -sub_pagelinks = FOREACH sub_pagelinks_into GENERATE - all_pagelinks::from_id, all_pagelinks::into_id, - all_pagelinks::from_namespace, all_pagelinks::from_title, - all_pagelinks::into_namespace, all_pagelinks::into_title; -STORE sub_pagelinks INTO '$SUB_PAGELINKS_OUT'; diff --git a/munging/wikipedia/subuniverse/sub_pagelinks_within.pig b/munging/wikipedia/subuniverse/sub_pagelinks_within.pig deleted file mode 100644 index 8955ce8..0000000 --- a/munging/wikipedia/subuniverse/sub_pagelinks_within.pig +++ /dev/null @@ -1,26 +0,0 @@ -/* - * This script filters the pagelinks table, leaving only the pagelinks - * that start and end within supplied subuniverse. - * - * Output format (same as augment_pagelinks): - * from_id:int, into_id:int, from_namespace:int, from_title:chararray, into_namespace:int, into_title:chararray - */ - -%default PAGELINKS '/data/results/wikipedia/full/undirected_pagelinks' -- all edges in the pagelink graph -%default SUB_NODES '/data/results/wikipedia/mini/nodes' -- all nodes in the subuniverse -%default SUB_PAGELINKS_OUT '/data/results/wikipedia/mini/pagelinks' -- where output will be stored - -all_pagelinks = LOAD '$PAGELINKS' AS (from_id:int, into_id:int, - from_namespace:int, from_title:chararray, into_namespace:int, into_title:chararray); -sub_nodes = LOAD '$SUB_NODES' AS (node_id:int); - -sub_pagelinks_in = JOIN all_pagelinks BY from_id, sub_nodes BY node_id; -sub_pagelinks_unfiltered = JOIN sub_pagelinks_in BY into_id, sub_nodes BY node_id; -sub_pagelinks = FOREACH sub_pagelinks_unfiltered GENERATE - sub_pagelinks_in::all_pagelinks::from_id, - sub_pagelinks_in::all_pagelinks::into_id, - sub_pagelinks_in::all_pagelinks::from_namespace, - sub_pagelinks_in::all_pagelinks::from_title, - sub_pagelinks_in::all_pagelinks::into_namespace, - sub_pagelinks_in::all_pagelinks::into_title; -STORE sub_pagelinks INTO '$SUB_PAGELINKS_OUT'; diff --git a/munging/wikipedia/subuniverse/sub_pageviews.pig b/munging/wikipedia/subuniverse/sub_pageviews.pig deleted file mode 100644 index 375d547..0000000 --- a/munging/wikipedia/subuniverse/sub_pageviews.pig +++ /dev/null @@ -1,29 +0,0 @@ -/* - * This script filters the pageviews table, leaving only the pageviews - * in the specified subuniverse. - * - * Parameters: - * pageviews - all pageviews in the wikipedia corpus - * sub_nodes - the list of nodes in your subuniverse - * sub_pageviews_out - the directory where output will be stored - * - * Output format (same as pageviews_augment.pig): - * id:int, namespace:int, - * page_id:int, title:chararray, namespace:int, rev_date:int, rev_time:int, - * rev_epoch_time:long, rev_dow:int, article_text:chararray - */ - -%default PAGEVIEWS '/data/results/wikipedia/full/pageviews' -- all pageview stats for the English Wikipedia -%default SUB_NODES '/data/results/wikipedia/mini/nodes' -- all nodes in the subuniverse -%default SUB_PAGEVIEWS_OUT '/data/results/wikipedia/mini/pageviews' -- where output will be stored - -pageviews = LOAD '$PAGEVIEWS' AS (page_id:int, title:chararray, namespace:int, - rev_date:int, rev_time:int, rev_epoch_time:long, rev_dow:int, article_text:chararray); -sub_nodes = LOAD '$SUB_NODES' AS (node_id:int); -sub_pageviews_unfiltered = JOIN pageviews BY id, sub_nodes BY node_id; -sub_pageviews = FOREACH sub_pageviews_unfiltered GENERATE - articles::page_id AS page_id, articles::title AS title, articles::namespace AS namespace, - articles::rev_date AS rev_date, articles::rev_time AS rev_time, - articles::rev_epoch_time AS rev_epoch_time, articles::rev_dow AS rev_dow, - articles::article_text AS article_text; -STORE sub_pageviews INTO '$SUB_PAGEVIEWS_OUT'; diff --git a/munging/wikipedia/subuniverse/sub_undirected_pagelinks_within.pig b/munging/wikipedia/subuniverse/sub_undirected_pagelinks_within.pig deleted file mode 100644 index 88e8692..0000000 --- a/munging/wikipedia/subuniverse/sub_undirected_pagelinks_within.pig +++ /dev/null @@ -1,24 +0,0 @@ -/* - * This script filters the pagelinks table, leaving only the pagelinks - * that start and end within supplied subuniverse. - * - * Output format (same as undirected_pagelinks): - * node_a:int, node_b:int, a_into_b:int, b_into_a:int, is_symmetric:int - */ - -%default UNDIRECTED_PAGELINKS '/data/results/wikipedia/full/undirected_pagelinks' -- all edges in the pagelink graph -%default SUB_NODES '/data/results/wikipedia/mini/nodes' -- all nodes in the subuniverse -%default SUB_PAGELINKS_OUT '/data/results/wikipedia/mini/pagelinks' -- where output will be stored - -all_pagelinks = LOAD '$UNDIRECTED_PAGELINKS' AS (node_a:int, node_b:int, a_into_b:int, b_into_a:int, is_symmetric:int); -sub_nodes = LOAD '$SUB_NODES' AS (node_id:int); - -sub_pagelinks_in = JOIN all_pagelinks BY node_a, sub_nodes BY node_id; -sub_pagelinks_unfiltered = JOIN sub_pagelinks_in BY node_b, sub_nodes BY node_id; -sub_pagelinks = FOREACH sub_pagelinks_unfiltered GENERATE - sub_pagelinks_in::all_pagelinks::node_a AS node_a, - sub_pagelinks_in::all_pagelinks::node_b AS node_b, - sub_pagelinks_in::all_pagelinks::a_into_b AS a_into_b, - sub_pagelinks_in::all_pagelinks::b_into_a AS b_into_a, - sub_pagelinks_in::all_pagelinks::is_symmetric AS is_symmetric; -STORE sub_pagelinks INTO '$SUB_PAGELINKS_OUT'; diff --git a/munging/wikipedia/utils/get_namespaces.rb b/munging/wikipedia/utils/get_namespaces.rb deleted file mode 100755 index 770534d..0000000 --- a/munging/wikipedia/utils/get_namespaces.rb +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin/env ruby -# encoding:UTF-8 - -# A script that fetches the namespace -> id mapping for -# all wikipedia languages. The output is stored (by default) -# in a json file that represents a hash from namespace name => id - -require 'ruby-progressbar' -require 'open-uri' -require 'set' -require 'configliere' -require 'json' - -Settings.use :commandline - -NS_FILE = 'namespaces' - -Settings.define :out_dir, flag: 'o', description: "Directory to drop the namespace file into.", default: File.expand_path(File.dirname(__FILE__)) -Settings.define :verbose, flag: 'v', description: "Get chatty", type: :boolean, default: false -Settings.define :silent, description: "Say nothing", type: :boolean, default: false -Settings.define :head_length, flag: 'h', description: "The number of lines to read into the wiki xml for the namespace definitions.", type: Integer, default: 100 -Settings.define :std_out, flag: 's', description: "Print output to standard out.", type: :boolean, default: false -Settings.define :to_tsv, flag: 't', description: 'Format the output as a TSV instead of JSON', type: :boolean, default:false - -Settings.resolve! - -Settings.out_dir = File.expand_path(Settings.out_dir) - -namespaces = {} -namespaces_by_wiki = {} - -wikis_page = open('http://dumps.wikimedia.org/backup-index.html') -wikis = Set.new - -# grap the list of wikis -wikis_page.each_line do |line| - next unless line =~ />[a-z]*wiki([a-z]*)wiki<.*/,'\1')[0..-2] -end - -if Settings.verbose - $stderr.puts "Retrieved the names of #{wikis.size} wikis" - $stderr.puts "Grabbing namespace data" -elsif (not Settings.silent) - progressbar = ProgressBar.create(:title => "Retrieving Namespaces...", :total => wikis.size, :format => '%t |%B| %c/%C %e ') -end - -wikis.each_with_index do |prefix,index| - progressbar.increment unless (Settings.silent or Settings.verbose) - namespaces_by_wiki[prefix] = {} - $stderr.puts "Getting namespaces for #{prefix}.wikipedia.org" if Settings.verbose - raw = `curl -s 'http://dumps.wikimedia.org/#{prefix}wiki/latest/#{prefix}wiki-latest-pages-logging.xml.gz' | gzcat | head -n #{Settings.head_length}` - #TODO: Make this actually work - if $?.exitstatus != 0 - out = "Could not access page dump for #{prefix}wiki." + - " This dump is probably being updated now." + - " Namespaces for this wiki will not be included in the final output" - $stderr.puts out - next - end - raw.each_line do |line| - next unless line =~ /.*<\/?namespace[^>]*>/ - match = /<\/?namespace key="(?-?\d+)"[^>]*>(?[^<]*)<\/namespace>/.match(line) - next if match.nil? - namespaces[match[:ns]] = match[:key].to_i - namespaces_by_wiki[prefix][match[:ns]] = match[:key].to_i - $stderr.puts " #{match[:ns]} -> #{match[:key]}" if Settings.verbose - end - $stderr.puts "Finished getting namespaces for #{prefix}.wikipedia.org. #{wikis.size - index} wikis to go" if Settings.verbose -end - -if Settings.to_tsv - output = "" - namespaces.each_pair do |k,v| - output += "#{k}\t#{v}\n" - end -else - output = namespaces.to_json -end - -if Settings.std_out - pp output -else - filename = "#{Settings.out_dir}/#{NS_FILE}.#{Settings.to_tsv ? "tsv" : "json"}" - File.open(filename, 'w') { |f| f.write(output)} -end diff --git a/munging/wikipedia/utils/munging_utils.rb b/munging/wikipedia/utils/munging_utils.rb deleted file mode 100644 index 2f937f7..0000000 --- a/munging/wikipedia/utils/munging_utils.rb +++ /dev/null @@ -1,11 +0,0 @@ -# encoding:UTF-8 -module MungingUtils - def time_columns_from_time(time) - columns = [] - columns << "%04d%02d%02d" % [time.year, time.month, time.day] - columns << "%02d%02d%02d" % [time.hour, time.min, time.sec] - columns << time.to_i - columns << time.wday - return columns - end -end diff --git a/munging/wikipedia/utils/namespaces.json b/munging/wikipedia/utils/namespaces.json deleted file mode 100644 index 27ab060..0000000 --- a/munging/wikipedia/utils/namespaces.json +++ /dev/null @@ -1 +0,0 @@ -{"Phương tiện":-2,"Đặc biệt":-1,"Thảo luận":1,"Thành viên":2,"Thảo luận Thành viên":3,"Wikipedia":4,"Thảo luận Wikipedia":5,"Tập tin":6,"Thảo luận Tập tin":7,"MediaWiki":8,"Thảo luận MediaWiki":9,"Bản mẫu":10,"Thảo luận Bản mẫu":11,"Trợ giúp":12,"Thảo luận Trợ giúp":13,"Thể loại":14,"Thảo luận Thể loại":15,"Chủ đề":100,"Thảo luận Chủ đề":101,"Медіа":-2,"Спеціальна":-1,"Обговорення":1,"Користувач":2,"Обговорення користувача":3,"Вікіпедія":4,"Обговорення Вікіпедії":5,"Файл":6,"Обговорення файлу":7,"Обговорення MediaWiki":9,"Шаблон":10,"Обговорення шаблону":11,"Довідка":12,"Обговорення довідки":13,"Категорія":14,"Обговорення категорії":15,"Портал":100,"Обговорення порталу":101,"Média":-2,"Speciális":-1,"Vita":1,"Szerkesztő":2,"Szerkesztővita":3,"Wikipédia":4,"Wikipédia-vita":5,"Fájl":6,"Fájlvita":7,"MediaWiki-vita":9,"Sablon":10,"Sablonvita":11,"Segítség":12,"Segítségvita":13,"Kategória":14,"Kategóriavita":15,"Téma":90,"Témavita":91,"Összefoglaló":92,"Összefoglaló-vita":93,"Portál":100,"Portálvita":101,"Media":-2,"Khas":-1,"Perbincangan":1,"Pengguna":2,"Perbincangan pengguna":3,"Perbincangan Wikipedia":5,"Fail":6,"Perbincangan fail":7,"Perbincangan MediaWiki":9,"Templat":10,"Perbincangan templat":11,"Bantuan":12,"Perbincangan bantuan":13,"Kategori":14,"Perbincangan kategori":15,"Portal":100,"Perbualan Portal":101,"מעדיע":-2,"באַזונדער":-1,"רעדן":1,"באַניצער":2,"באַניצער רעדן":3,"װיקיפּעדיע":4,"װיקיפּעדיע רעדן":5,"טעקע":6,"טעקע רעדן":7,"מעדיעװיקי":8,"מעדיעװיקי רעדן":9,"מוסטער":10,"מוסטער רעדן":11,"הילף":12,"הילף רעדן":13,"קאַטעגאָריע":14,"קאַטעגאָריע רעדן":15,"פארטאל":100,"פארטאל רעדן":101,"Sipeciås":-1,"Copene":1,"Uzeu":2,"Uzeu copene":3,"Wikipedia copene":5,"Imådje":6,"Imådje copene":7,"MediaWiki copene":9,"Modele":10,"Modele copene":11,"Aidance":12,"Aidance copene":13,"Categoreye":14,"Categoreye copene":15,"Special":-1,"Talk":1,"User":2,"User talk":3,"Wikipedia talk":5,"File":6,"File talk":7,"MediaWiki talk":9,"Template":10,"Template talk":11,"Help":12,"Help talk":13,"Category":14,"Category talk":15,"ܡܝܕܝܐ":-2,"ܕܝܠܢܝܐ":-1,"ܡܡܠܠܐ":1,"ܡܦܠܚܢܐ":2,"ܡܡܠܠܐ ܕܡܦܠܚܢܐ":3,"ܘܝܩܝܦܕܝܐ":4,"ܡܡܠܠܐ ܕܘܝܩܝܦܕܝܐ":5,"ܠܦܦܐ":6,"ܡܡܠܠܐ ܕܠܦܦܐ":7,"ܡܝܕܝܐܘܝܩܝ":8,"ܡܡܠܠܐ ܕܡܝܕܝܐܘܝܩܝ":9,"ܩܠܒܐ":10,"ܡܡܠܠܐ ܕܩܠܒܐ":11,"ܥܘܕܪܢܐ":12,"ܡܡܠܠܐ ܕܥܘܕܪܢܐ":13,"ܣܕܪܐ":14,"ܡܡܠܠܐ ܕܣܕܪܐ":15,"Istimiwa":-1,"Pamandiran":1,"Pamakai":2,"Pamandiran Pamakai":3,"Wikipidia":4,"Pamandiran Wikipidia":5,"Barakas":6,"Pamandiran Barakas":7,"Pamandiran MediaWiki":9,"Citakan":10,"Pamandiran Citakan":11,"Patulung":12,"Pamandiran Patulung":13,"Tumbung":14,"Pamandiran Tumbung":15,"Медиа":-2,"Служебная":-1,"Обсуждение":1,"Участник":2,"Обсуждение участника":3,"Википедия":4,"Баитам Википедия йылiсь":5,"Обсуждение файла":7,"Обсуждение MediaWiki":9,"Обсуждение шаблона":11,"Справка":12,"Обсуждение справки":13,"Категория":14,"Обсуждение категории":15,"Спецӹлӹштӓш":-1,"Кӓнгӓшӹмӓш":1,"Сирӹшӹ":2,"Сирӹшӹм кӓнгӓшӹмӓш":3,"Википеди":4,"Википедим кӓнгӓшӹмӓш":5,"Файлым кӓнгӓшӹмӓш":7,"MediaWiki-м кӓнгӓшӹмӓш":9,"Шаблоным кӓнгӓшӹмӓш":11,"Палшык":12,"Палшыкым кӓнгӓшӹмӓш":13,"Категори":14,"Категорим кӓнгӓшӹмӓш":15,"Strategic Planning":4,"Strategic Planning talk":5,"Thread":90,"Thread talk":91,"Summary":92,"Summary talk":93,"Proposal":106,"Proposal talk":107,"მედია":-2,"სპეციალური":-1,"განხილვა":1,"მომხმარებელი":2,"მომხმარებლის განხილვა":3,"ვიკიპედია":4,"ვიკიპედია სხუნუა":5,"ფაილი":6,"ფაილის განხილვა":7,"მედიავიკი":8,"მედიავიკის განხილვა":9,"თარგი":10,"თარგის განხილვა":11,"დახმარება":12,"დახმარების განხილვა":13,"კატეგორია":14,"კატეგორიის განხილვა":15,"Project":4,"Project talk":5,"Manual":100,"Manual talk":101,"Extension":102,"Extension talk":103,"API":104,"API talk":105,"Skin":106,"Skin talk":107,"Module":828,"Module talk":829,"Translations":1198,"Translations talk":1199,"VisualEditor":2500,"VisualEditor talk":2501,"Bolediša":1,"Mošomi":2,"Boledišana le Mošomi":3,"Dipolelo tša Wikipedia":5,"Seswantšho":6,"Poledišano ya Seswantšho":7,"Poledišano ya MediaWiki":9,"Poledišano ya Template":11,"Thušo":12,"Poledišano ya Thušo":13,"Setensele":14,"Poledišano ya Setensele":15,"Speciaal":-1,"Euverlèk":1,"Gebroeker":2,"Euverlèk gebroeker":3,"Euverlèk Wikipedia":5,"Plaetje":6,"Euverlèk plaetje":7,"Euverlèk MediaWiki":9,"Sjabloon":10,"Euverlèk sjabloon":11,"Euverlèk help":13,"Categorie":14,"Euverlèk categorie":15,"Portaol":100,"Euverlèk portaol":101,"Güiquipeya":4,"Güiquipeya talk":5,"Prantilla":10,"Midia":-2,"Espesial":-1,"Tungtungan":1,"Agar-aramat":2,"Agar-aramat tungtungan":3,"Wikipedia tungtungan":5,"Papeles":6,"Papeles tungtungan":7,"MediaWiki tungtungan":9,"Plantilia":10,"Plantilia tungtungan":11,"Tulong":12,"Tulong tungtungan":13,"Kategoria":14,"Kategoria tungtungan":15,"Μέσον":-2,"Ειδικόν":-1,"Καλάτσεμαν":1,"Χρήστες":2,"Καλάτσεμαν χρήστε":3,"Βικιπαίδεια":4,"Βικιπαίδεια καλάτσεμαν":5,"Αρχείον":6,"Καλάτσεμαν αρχείονος":7,"Πρότυπον":10,"Καλάτσεμαν πρότυπι":11,"Βοήθειαν":12,"Καλάτσεμαν βοήθειας":13,"Κατηγορίαν":14,"Καλάτσεμαν κατηγορίας":15,"Medium":-2,"Spezial":-1,"Diskussion":1,"Benutzer":2,"Benutzer Diskussion":3,"Wikipedia Diskussion":5,"Datei":6,"Datei Diskussion":7,"MediaWiki Diskussion":9,"Vorlage":10,"Vorlage Diskussion":11,"Hilfe":12,"Hilfe Diskussion":13,"Kategorie":14,"Kategorie Diskussion":15,"Mediya":-2,"Maasus":-1,"Dartışma":1,"Kullanıcı":2,"Kullanıcı dartışma":3,"Vikipediya":4,"Vikipediyanın laflanması":5,"Dosye":6,"Dosye dartışma":7,"MediaWiki dartışma":9,"Şablon":10,"Şablon dartışma":11,"Yardım":12,"Yardım dartışma":13,"Kategoriya":14,"Kategoriya dartışma":15,"Dischbedutt":1,"Yuuser":2,"Yuuser Dischbedutt":3,"Wikipedia Dischbedutt":5,"Feil":6,"Feil Dischbedutt":7,"MediaWiki Dischbedutt":9,"Moddel":10,"Moddel Dischbedutt":11,"Hilf":12,"Hilf Dischbedutt":13,"Abdeeling":14,"Abdeeling Dischbedutt":15,"Уикипедиэ":4,"Уикипедиэм и тепсэлъыхьыгъуэ":5,"Medeja":-2,"Seviškuo":-1,"Sprīža":1,"Lītuotuojs":2,"Sprīža ap lītuotuoju":3,"Vikipedeja":4,"Vikipedejis sprīža":5,"Fails":6,"Sprīža ap failu":7,"Sprīža ap MediaWiki":9,"Taiss":10,"Sprīža ap taisu":11,"Paleigs":12,"Sprīža ap paleigu":13,"Kategoreja":14,"Sprīža ap kategoreju":15,"Mèdia":-2,"Spiciali":-1,"Discussioni":1,"Utenti":2,"Discussioni utenti":3,"Discussioni Wikipedia":5,"Discussioni file":7,"Discussioni MediaWiki":9,"Discussioni template":11,"Aiutu":12,"Discussioni aiutu":13,"Catigurìa":14,"Discussioni catigurìa":15,"Purtali":100,"Discussioni purtali":101,"Pruggettu":102,"Discussioni pruggettu":103,"Portal Diskussion":101,"Commons":4,"Commons talk":5,"Creator":100,"Creator talk":101,"TimedText":710,"TimedText talk":711,"Sequence":104,"Sequence talk":105,"Institution":106,"Institution talk":107,"ميديا":-2,"خاص":-1,"نقاش":1,"مستخدم":2,"نقاش المستخدم":3,"ويكيبيديا":4,"نقاش ويكيبيديا":5,"ملف":6,"نقاش الملف":7,"ميدياويكي":8,"نقاش ميدياويكي":9,"قالب":10,"نقاش القالب":11,"مساعدة":12,"نقاش المساعدة":13,"تصنيف":14,"نقاش التصنيف":15,"بوابة":100,"نقاش البوابة":101,"ملحق":104,"نقاش الملحق":105,"Medya":-2,"Özel":-1,"Tartışma":1,"Kullanıcı mesaj":3,"Vikipedi":4,"Vikipedi tartışma":5,"Dosya":6,"Dosya tartışma":7,"MediaWiki tartışma":9,"Şablon tartışma":11,"Yardım tartışma":13,"Kategori tartışma":15,"Portal tartışma":101,"मीडिया":-2,"विशेष":-1,"वार्तालाप":1,"प्रयोगकर्ता":2,"प्रयोगकर्ता वार्ता":3,"विकिपीडिया":4,"विकिपीडिया वार्ता":5,"चित्र":6,"चित्र वार्ता":7,"मीडियाविकि":8,"मीडियाविकि वार्ता":9,"टेम्पलेट":10,"टेम्पलेट वार्ता":11,"मदद":12,"मदद वार्ता":13,"श्रेणी":14,"श्रेणी वार्ता":15,"Aide":12,"Discussion Aide":103,"Hjælp":12,"Hjælp diskussion":105,"Helpo":12,"Helpa diskuto":107,"Hjälp":12,"Hjälp diskussion":109,"Ayuda":12,"Ayuda Discusión":111,"Aiuto":12,"Discussioni aiuto":13,"ヘルプ":114,"ヘルプ‐ノート":115,"NL Help":116,"Overleg help":13,"Pomoc":12,"Dyskusja pomocy":13,"Ajuda":12,"Ajuda Discussão":13,"CA Ajuda":122,"CA Ajuda Discussió":123,"Hjelp":12,"Hjelp diskusjon":125,"帮助":12,"帮助 对话":127,"Помощь":12,"Помощь Дискуссия":129,"Pomoč":12,"Pogovor o pomoči":13,"Medio":-2,"Especial":-1,"Discusión":1,"Usuario":2,"Usuario discusión":3,"Wikipidiya":4,"Wikipidiyan Aruskipäwi":5,"Archivo":6,"Archivo discusión":7,"MediaWiki discusión":9,"Plantilla":10,"Plantilla discusión":11,"Ayuda discusión":13,"Categoría":14,"Categoría discusión":15,"Conversa":1,"Conversa usuario":3,"Conversa Wikipedia":5,"Ficheiro":6,"Conversa ficheiro":7,"Conversa MediaWiki":9,"Modelo":10,"Conversa modelo":11,"Axuda":12,"Conversa axuda":13,"Conversa categoría":15,"Portal talk":101,"Libro":102,"Conversa libro":103,"माध्यमम्":-2,"विशेषम्":-1,"सम्भाषणम्":1,"योजकः":2,"योजकसम्भाषणम्":3,"विकिपीडियासम्भाषणम्":5,"चित्रम्":6,"चित्रसम्भाषणम्":7,"मिडीयाविकी":8,"मिडियाविकीसम्भाषणम्":9,"फलकम्":10,"फलकस्य सम्भाषणम्":11,"सहाय्यम्":12,"सहाय्यस्य सम्भाषणम्":13,"वर्गः":14,"वर्गसम्भाषणम्":15,"Uslig":-1,"Mmeslay":1,"Amseqdac":2,"Amyannan umsqedac":3,"Amyannan n Wikipedia":5,"Tugna":6,"Amyannan n tugna":7,"Amyannan n MediaWiki":9,"Talɣa":10,"Amyannan n talɣa":11,"Tallat":12,"Amyannan n tallat":13,"Taggayt":14,"Amyannan n taggayt":15,"Speciaol":-1,"Overleg":1,"Gebruker":2,"Overleg gebruker":3,"Overleg Wikipedia":5,"Overleg plaetje":7,"Overleg MediaWiki":9,"Overleg sjabloon":11,"Ulpe":12,"Overleg ulpe":13,"Overleg categorie":15,"Espesiál":-1,"Diskusaun":1,"Uza-na'in":2,"Diskusaun Uza-na'in":3,"Diskusaun Wikipedia":5,"Imajen":6,"Diskusaun Imajen":7,"Diskusaun MediaWiki":9,"Diskusaun Template":11,"Diskusaun Ajuda":13,"Diskusaun Kategoria":15,"Berezi":-1,"Eztabaida":1,"Lankide":2,"Lankide eztabaida":3,"Wikipedia eztabaida":5,"Fitxategi":6,"Fitxategi eztabaida":7,"MediaWiki eztabaida":9,"Txantiloi":10,"Txantiloi eztabaida":11,"Laguntza":12,"Laguntza eztabaida":13,"Kategoria eztabaida":15,"Atari":100,"Atari eztabaida":101,"Wikiproiektu":102,"Wikiproiektu eztabaida":103,"Spesial":-1,"Diskusjon":1,"Bruker":2,"Brukerdiskusjon":3,"Wikipedia-diskusjon":5,"Fil":6,"Fildiskusjon":7,"MediaWiki-diskusjon":9,"Mal":10,"Maldiskusjon":11,"Hjelpdiskusjon":13,"Kategoridiskusjon":15,"Portaldiskusjon":101,"미디어":-2,"특수기능":-1,"토론":1,"사용자":2,"사용자토론":3,"위키백과":4,"위키백과토론":5,"파일":6,"파일토론":7,"미디어위키":8,"미디어위키토론":9,"틀":10,"틀토론":11,"도움말":12,"도움말토론":13,"분류":14,"분류토론":15,"들머리":100,"들머리토론":101,"위키프로젝트":102,"위키프로젝트토론":103,"Meta":4,"Meta talk":5,"Grants":200,"Grants talk":201,"Research":202,"Research talk":203,"Participation":204,"Participation talk":205,"مدیا":-2,"ویژه":-1,"بحث":1,"کاربر":2,"بحث کاربر":3,"ویکی‌پدیا":4,"بحث ویکی‌پدیا":5,"پرونده":6,"بحث پرونده":7,"مدیاویکی":8,"بحث مدیاویکی":9,"الگو":10,"بحث الگو":11,"راهنما":12,"بحث راهنما":13,"رده":14,"بحث رده":15,"درگاه":100,"بحث درگاه":101,"کتاب":102,"بحث کتاب":103,"Ciciarada":1,"Druvadur":2,"Ciciarada Druvadur":3,"Wikipedia Ciciarada":5,"Archivi":6,"Ciciarada Archivi":7,"Ciciarada MediaWiki":9,"Mudel":10,"Ciciarada Mudel":11,"Jüt":12,"Ciciarada Jüt":13,"Categuria":14,"Ciciarada Categuria":15,"Descüssiú Portal":101,"Purtaal":102,"Descüssiun Purtaal":103,"මාධ්‍යය":-2,"විශේෂ":-1,"සාකච්ඡාව":1,"පරිශීලක":2,"පරිශීලක සාකච්ඡාව":3,"විකිපීඩියා":4,"විකිපීඩියා සාකච්ඡාව":5,"ගොනුව":6,"ගොනුව සාකච්ඡාව":7,"මාධ්‍යවිකි":8,"මාධ්‍යවිකි සාකච්ඡාව":9,"සැකිල්ල":10,"සැකිලි සාකච්ඡාව":11,"උදවු":12,"උදවු සාකච්ඡාව":13,"ප්‍රවර්ගය":14,"ප්‍රවර්ග සාකච්ඡාව":15,"ද්වාරය":100,"ද්වාරය සාකච්ඡාව":101,"Spécial":-1,"Discussion":1,"Utilisateur":2,"Discussion utilisateur":3,"Discussion Wikipédia":5,"Fichier":6,"Discussion fichier":7,"Discussion MediaWiki":9,"Modèle":10,"Discussion modèle":11,"Discussion aide":13,"Catégorie":14,"Discussion catégorie":15,"Portail":100,"Discussion Portail":101,"Projet":102,"Discussion Projet":103,"Référence":104,"Discussion Référence":105,"Toiminnot":-1,"Keskustelu":1,"Käyttäjä":2,"Keskustelu käyttäjästä":3,"Keskustelu Wikipediasta":5,"Tiedosto":6,"Keskustelu tiedostosta":7,"Järjestelmäviesti":8,"Keskustelu järjestelmäviestistä":9,"Malline":10,"Keskustelu mallineesta":11,"Ohje":12,"Keskustelu ohjeesta":13,"Luokka":14,"Keskustelu luokasta":15,"Teemasivu":100,"Keskustelu teemasivusta":101,"Metasivu":102,"Keskustelu metasivusta":103,"Kirja":104,"Keskustelu kirjasta":105,"Discussió":1,"Usuari":2,"Usuari Discussió":3,"Viquipèdia":4,"Viquipèdia Discussió":5,"Fitxer":6,"Fitxer Discussió":7,"MediaWiki Discussió":9,"Plantilla Discussió":11,"Ajuda Discussió":13,"Categoria":14,"Categoria Discussió":15,"Portal Discussió":101,"Viquiprojecte":102,"Viquiprojecte Discussió":103,"Медиј":-2,"Посебно":-1,"Разговор":1,"Корисник":2,"Разговор са корисником":3,"Википедија":4,"Разговор о Википедији":5,"Датотека":6,"Разговор о датотеци":7,"Медијавики":8,"Разговор о Медијавикију":9,"Разговор о шаблону":11,"Помоћ":12,"Разговор о помоћи":13,"Категорија":14,"Разговор о категорији":15,"Разговор о порталу":101,"वार्ता":1,"सदस्य":2,"सदस्य वार्ता":3,"साँचा":10,"साँचा वार्ता":11,"सहायता":12,"सहायता वार्ता":13,"प्रवेशद्वार":100,"प्रवेशद्वार वार्ता":101,"Mediji":-2,"Posebno":-1,"Razgovor":1,"Korisnik":2,"Razgovor sa korisnikom":3,"Razgovor s Wikipediom":5,"Datoteka":6,"Razgovor o datoteci":7,"MediaWiki razgovor":9,"Šablon":10,"Razgovor o šablonu":11,"Pomoć":12,"Razgovor o pomoći":13,"Kategorija":14,"Razgovor o kategoriji":15,"Razgovor o portalu":101,"Wikispecies":4,"Wikispecies talk":5,"شا":-1,"گپ":1,"کارور":2,"کارور گپ":3,"ویکی‌پدیا گپ":5,"پرونده گپ":7,"مدیاویکی گپ":9,"شابلون":10,"شابلون گپ":11,"رانما":12,"رانما گپ":13,"رج":14,"رج گپ":15,"پورتال":100,"پورتال گپ":101,"Обсуждение Wikipedia":5,"Discussion Wikipedia":5,"بحث Wikipedia":5,"Ihü kárírí":-1,"Okwu":1,"Ọbanife":2,"Okwu ọbanife":3,"Okwu Wikipedia":5,"Usòrò":6,"Okwu usòrò":7,"MidiaWiki":8,"Okwu MidiaWiki":9,"Àtụ":10,"Okwu àtụ":11,"Nkwadọ":12,"Okwu nkwadọ":13,"Òtù":14,"Okwu òtù":15,"Mba'echĩchĩ":-1,"Myangekõi":1,"Puruhára":2,"Puruhára myangekõi":3,"Vikipetã":4,"Vikipetã myangekõi":5,"Ta'ãnga":6,"Ta'ãnga myangekõi":7,"MediaWiki myangekõi":9,"Tembiecharã":10,"Tembiecharã myangekõi":11,"Pytyvõ":12,"Pytyvõ myangekõi":13,"Ñemohenda":14,"Ñemohenda myangekõi":15,"Ярҙамсы":-1,"Фекерләшеү":1,"Ҡатнашыусы":2,"Ҡатнашыусы менән һөйләшеү":3,"Wikipedia буйынса фекерләшеү":5,"Рәсем":6,"Рәсем буйынса фекерләшеү":7,"MediaWiki буйынса фекерләшеү":9,"Ҡалып":10,"Ҡалып буйынса фекерләшеү":11,"Белешмә":12,"Белешмә буйынса фекерләшеү":13,"Төркөм":14,"Төркөм буйынса фекерләшеү":15,"メディア":-2,"特別":-1,"ノート":1,"利用者":2,"利用者‐会話":3,"Wikipedia‐ノート":5,"ファイル":6,"ファイル‐ノート":7,"MediaWiki‐ノート":9,"Template‐ノート":11,"Help‐ノート":13,"Category‐ノート":15,"Portal‐ノート":101,"プロジェクト":102,"プロジェクト‐ノート":103,"Multimédia":-2,"Discussão":1,"Usuário(a)":2,"Usuário(a) Discussão":3,"Wikipédia Discussão":5,"Ficheiro Discussão":7,"MediaWiki Discussão":9,"Predefinição":10,"Predefinição Discussão":11,"Categoria Discussão":15,"Portal Discussão":101,"Anexo":104,"Anexo Discussão":103,"Livro":104,"Livro Discussão":105,"สื่อ":-2,"พิเศษ":-1,"พูดคุย":1,"ผู้ใช้":2,"คุยกับผู้ใช้":3,"วิกิพีเดีย":4,"คุยเรื่องวิกิพีเดีย":5,"ไฟล์":6,"คุยเรื่องไฟล์":7,"มีเดียวิกิ":8,"คุยเรื่องมีเดียวิกิ":9,"แม่แบบ":10,"คุยเรื่องแม่แบบ":11,"วิธีใช้":12,"คุยเรื่องวิธีใช้":13,"หมวดหมู่":14,"คุยเรื่องหมวดหมู่":15,"สถานีย่อย":100,"คุยเรื่องสถานีย่อย":101,"Speciální":-1,"Diskuse":1,"Wikipedista":2,"Diskuse s wikipedistou":3,"Wikipedie":4,"Diskuse k Wikipedii":5,"Soubor":6,"Diskuse k souboru":7,"Diskuse k MediaWiki":9,"Šablona":10,"Diskuse k šabloně":11,"Nápověda":12,"Diskuse k nápovědě":13,"Diskuse ke kategorii":15,"Diskuse k portálu":101,"Rejstřík":102,"Diskuse k rejstříku":103,"Discuție":1,"Utilizator":2,"Discuție Utilizator":3,"Discuție Wikipedia":5,"Fișier":6,"Discuție Fișier":7,"Discuție MediaWiki":9,"Format":10,"Discuție Format":11,"Ajutor":12,"Discuție Ajutor":13,"Discuție Categorie":15,"Discuție Portal":101,"Proiect":102,"Discuție Proiect":103,"Медия":-2,"Специални":-1,"Беседа":1,"Потребител":2,"Потребител беседа":3,"Уикипедия":4,"Уикипедия беседа":5,"Файл беседа":7,"МедияУики":8,"МедияУики беседа":9,"Шаблон беседа":11,"Помощ":12,"Помощ беседа":13,"Категория беседа":15,"Портал беседа":101,"Μέσο":-2,"Ειδικό":-1,"Συζήτηση":1,"Χρήστης":2,"Συζήτηση χρήστη":3,"Βικιπαίδεια συζήτηση":5,"Αρχείο":6,"Συζήτηση αρχείου":7,"Συζήτηση MediaWiki":9,"Πρότυπο":10,"Συζήτηση προτύπου":11,"Βοήθεια":12,"Συζήτηση βοήθειας":13,"Κατηγορία":14,"Συζήτηση κατηγορίας":15,"Πύλη":100,"Συζήτηση πύλης":101,"Nünamakanäd":-2,"Patikos":-1,"Bespik":1,"Geban":2,"Gebanibespik":3,"Vükiped":4,"Bespik dö Vükiped":5,"Ragiv":6,"Ragivibespik":7,"Sitanuns":8,"Bespik dö sitanuns":9,"Samafomot":10,"Samafomotibespik":11,"Yuf":12,"Yufibespik":13,"Klad":14,"Kladibespik":15,"Xibaarukaay":-2,"Jagleel":-1,"Waxtaan":1,"Jëfandikukat":2,"Waxtaani jëfandikukat":3,"Wikipedia waxtaan":5,"Dencukaay":6,"Waxtaani dencukaay":7,"Waxtaani MediaWiki":9,"Royuwaay":10,"Waxtaani royuwaay":11,"Ndimbal":12,"Waxtaani ndimbal":13,"Wàll":14,"Waxtaani wàll":15,"ສື່":-2,"ພິເສດ":-1,"ສົນທະນາ":1,"ຜູ້ໃຊ້":2,"ສົນທະນາຂອງຜູ້ໃຊ້":3,"ວິກິພີເດຍ":4,"ສົນທະນາກ່ຽວກັບວິກິພີເດຍ":5,"ຮູບ":6,"ສົນທະນາກ່ຽວກັບຮູບ":7,"ມີເດຍວິກິ":8,"ສົນທະນາກ່ຽວກັບມີເດຍວິກິ":9,"ແມ່ແບບ":10,"ສົນທະນາກ່ຽວກັບແມ່ແບບ":11,"ຊ່ວຍເຫຼືອ":12,"ສົນທະນາກ່ຽວກັບຊ່ວຍເຫຼືອ":13,"ໝວດ":14,"ສົນທະນາກ່ຽວກັບໝວດ":15,"Miðill":-2,"Kerfissíða":-1,"Spjall":1,"Notandi":2,"Notandaspjall":3,"Wikipediaspjall":5,"Mynd":6,"Myndaspjall":7,"Melding":8,"Meldingarspjall":9,"Snið":10,"Sniðaspjall":11,"Hjálp":12,"Hjálparspjall":13,"Flokkur":14,"Flokkaspjall":15,"Gátt":100,"Gáttaspjall":101,"Suradnik":2,"Razgovor sa suradnikom":3,"Wikipedija":4,"Razgovor Wikipedija":5,"Predložak":10,"Razgovor o predlošku":11,"Dodatak":102,"Razgovor o dodatku":103,"Arbennig":-1,"Sgwrs":1,"Defnyddiwr":2,"Sgwrs Defnyddiwr":3,"Wicipedia":4,"Sgwrs Wicipedia":5,"Delwedd":6,"Sgwrs Delwedd":7,"MediaWici":8,"Sgwrs MediaWici":9,"Nodyn":10,"Sgwrs Nodyn":11,"Cymorth":12,"Sgwrs Cymorth":13,"Categori":14,"Sgwrs Categori":15,"Porth":100,"Sgwrs Porth":101,"Spesiaal":-1,"Bespreking":1,"Gebruiker":2,"Gebruikerbespreking":3,"Wikipediabespreking":5,"Lêer":6,"Lêerbespreking":7,"MediaWikibespreking":9,"Sjabloonbespreking":11,"Hulp":12,"Hulpbespreking":13,"Kategoriebespreking":15,"Portaal":100,"Portaalbespreking":101,"মিডিয়া":-2,"বিশেষ":-1,"য়্যারী":1,"আতাকুরা":2,"আতাকুরার য়্যারী":3,"উইকিপিডিয়া":4,"উইকিপিডিয়া য়্যারী":5,"ছবি":6,"ছবি য়্যারী":7,"মিডিয়াউইকি":8,"মিডিয়াউইকির য়্যারী":9,"মডেল":10,"মডেলর য়্যারী":11,"পাংলাক":12,"পাংলাকর য়্যারী":13,"থাক":14,"থাকর য়্যারী":15,"হমিলদুৱার":100,"হমিলদুৱার য়্যারী":101,"Pogovor":1,"Uporabnik":2,"Uporabniški pogovor":3,"Pogovor o Wikipediji":5,"Slika":6,"Pogovor o sliki":7,"Pogovor o MediaWiki":9,"Predloga":10,"Pogovor o predlogi":11,"Pogovor o kategoriji":15,"Pogovor o portalu":101,"Speçiale":-1,"Discûscion":1,"Utente":2,"Discûscioîn ûtente":3,"Discûscioîn Wikipedia":5,"Immaggine":6,"Discûscioîn immaggine":7,"Discûscioîn MediaWiki":9,"Discûscioîn template":11,"Agiûtto":12,"Discûscioîn agiûtto":13,"Categorîa":14,"Discûscioîn categorîa":15,"ميدياويكى":8,"نقاش ميدياويكى":9,"مناقشة بوابة":101,"Usator":2,"Usator Discussion":3,"Wikipedia Discussion":5,"File Discussion":7,"MediaWiki Discussion":9,"Avise":10,"Avise Discussion":11,"Auxilie":12,"Auxilie Discussion":13,"Categorie Discussion":15,"Xüsusi":-1,"Müzakirə":1,"İstifadəçi":2,"İstifadəçi müzakirəsi":3,"Vikipediya müzakirəsi":5,"Şəkil":6,"Şəkil müzakirəsi":7,"MediaWiki müzakirəsi":9,"Şablon müzakirəsi":11,"Kömək":12,"Kömək müzakirəsi":13,"Kateqoriya":14,"Kateqoriya müzakirəsi":15,"Portal müzakirəsi":101,"媒体文件":-2,"特殊":-1,"讨论":1,"用户":2,"用户讨论":3,"Wikipedia讨论":5,"文件":6,"文件讨论":7,"MediaWiki讨论":9,"模板":10,"模板讨论":11,"帮助讨论":13,"分类":14,"分类讨论":15,"Transwiki":100,"Transwiki talk":101,"Speciàle":-1,"Chiàcchiera":1,"Utente chiàcchiera":3,"Wikipedia chiàcchiera":5,"Fiùra":6,"Fiùra chiàcchiera":7,"MediaWiki chiàcchiera":9,"Modello":10,"Modello chiàcchiera":11,"Ajùto":12,"Ajùto chiàcchiera":13,"Categurìa":14,"Categurìa chiàcchiera":15,"Arbennek":-1,"Kescows":1,"Devnydhyer":2,"Kescows Devnydhyer":3,"Kescows Wikipedia":5,"Restren":6,"Kescows Restren":7,"Kescows MediaWiki":9,"Scantlyn":10,"Kescows Scantlyn":11,"Gweres":12,"Kescows Gweres":13,"Class":14,"Kescows Class":15,"Keskows Porth":101,"Отсасян":-1,"Сёрнитанiн":1,"Пырысь":2,"Пырыськӧд сёрнитанiн":3,"Википедия донъялӧм":5,"Файл донъялӧм":7,"МедиаВики":8,"МедиаВики донъялӧм":9,"Шаблон донъялӧм":11,"Къуллугъирал лажин":-1,"Ихтилат":1,"Гьуртту хьума":2,"Гьуртту хьуминнал ихтилат":3,"Википедиялиясса ихтилат":5,"Сурат":6,"Суратраясса ихтилат":7,"MediaWikiлиясса ихтилат":9,"Шаблондалиясса ихтилат":11,"Кумаг":12,"Кумаграясса ихтилат":13,"Категориялиясса ихтилат":15,"Specjalnô":-1,"Diskùsëjô":1,"Brëkòwnik":2,"Diskùsëjô brëkòwnika":3,"Wiki":-1,"Diskùsëjô Wiki":5,"Òbrôzk":6,"Diskùsëjô òbrôzków":7,"Diskùsëjô MediaWiki":9,"Szablóna":10,"Diskùsëjô Szablónë":11,"Pòmòc":12,"Diskùsëjô Pòmòcë":13,"Kategòrëjô":14,"Diskùsëjô Kategòrëji":15,"Википедия веревирд авун":5,"Срѣдьства":-2,"Нарочьна":-1,"Бєсѣда":1,"Польꙃєватєл҄ь":2,"Польꙃєватєлꙗ бєсѣда":3,"Википєдїꙗ":4,"Википєдїѩ бєсѣ́да":5,"Дѣло":6,"Дѣла бєсѣда":7,"MediaWiki бєсѣда":9,"Обраꙁьць":10,"Обраꙁьца бєсѣда":11,"Помощи бєсѣда":13,"Катигорїꙗ":14,"Катигорїѩ бєсѣда":15,"Istimewa":-1,"Pembicaraan":1,"Pembicaraan Pengguna":3,"Pembicaraan Wikipedia":5,"Berkas":6,"Pembicaraan Berkas":7,"Pembicaraan MediaWiki":9,"Pembicaraan Templat":11,"Pembicaraan Bantuan":13,"Pembicaraan Kategori":15,"Pembicaraan Portal":101,"Cumbersa":1,"Outelizador":2,"Cumbersa outelizador":3,"Biquipédia":4,"Biquipédia cumbersa":5,"Fexeiro":6,"Cumbersa fexeiro":7,"Biqui":8,"Cumbersa Biqui":9,"Cumbersa Modelo":11,"Cumbersa ajuda":13,"Catadorie":14,"Cumbersa catadorie":15,"Speciel":-1,"Bruger":2,"Brugerdiskussion":3,"Wikipedia-diskussion":5,"Fildiskussion":7,"MediaWiki-diskussion":9,"Skabelon":10,"Skabelondiskussion":11,"Hjælp-diskussion":13,"Kategoridiskussion":15,"Portaldiskussion":101,"Artikeldata":102,"Artikeldatadiskussion":103,"Midya":-2,"Natatangi":-1,"Usapan":1,"Tagagamit":2,"Usapang tagagamit":3,"Usapang Wikipedia":5,"Talaksan":6,"Usapang talaksan":7,"Usapang MediaWiki":9,"Suleras":10,"Usapang suleras":11,"Usapang tulong":13,"Kategorya":14,"Usapang kategorya":15,"Portada":100,"Usapang Portada":101,"Атайын":-1,"Баарлашуу":1,"Колдонуучу":2,"Колдонуучунун баарлашуулары":3,"Wikipedia баарлашуу":5,"Калып":10,"Жардам":12,"ذريعات":-2,"يوزر":2,"يوزر بحث":3,"Wikipedia بحث":5,"عڪس":6,"عڪس بحث":7,"ذريعات وڪي":8,"ذريعات وڪي بحث":9,"سانچو":10,"سنچو بحث":11,"مدد":12,"مدد بحث":13,"زمرو":14,"زمرو بحث":15,"Белхан":-1,"Дийцаре":1,"Декъашхо":2,"Декъашхон дийцаре":3,"Википедийа":4,"Википедийа дийцаре":5,"Хlум":6,"Хlуман дийцаре":7,"MediaWiki дийцаре":9,"Куцкеп":10,"Куцкеп дийцаре":11,"Гlо":12,"Гlон дийцаре":13,"Кадегар":14,"Кадегар дийцаре":15,"Ков":100,"Ков дийцаре":101,"میدیا":-2,"تایبەت":-1,"وتووێژ":1,"بەکارھێنەر":2,"لێدوانی بەکارھێنەر":3,"ویکیپیدیا":4,"لێدوانی ویکیپیدیا":5,"پەڕگە":6,"وتووێژی پەڕگە":7,"میدیاویکی":8,"وتووێژی میدیاویکی":9,"داڕێژە":10,"وتووێژی داڕێژە":11,"یارمەتی":12,"وتووێژی یارمەتی":13,"پۆل":14,"وتووێژی پۆل":15,"دەروازە":100,"لێدوانی دەروازە":101,"Doaimmat":-1,"Ságastallan":1,"Geavaheaddji":2,"Geavaheaddjeságastallan":3,"Wikipedia-ságastallan":5,"Fiila":6,"Fiilaságastallan":7,"MediaWiki-ságastallan":9,"Málle":10,"Málleságastallan":11,"Veahkki":12,"Veahkkeságastallan":13,"Kategoriija":14,"Kategoriijaságastallan":15,"Аһар":-2,"Көдлхнә":-1,"Меткән":1,"Демнч":2,"Демнчна туск меткән":3,"Wikipedia туск меткән":5,"Боомг":6,"Боомгин туск меткән":7,"MediaWiki туск меткән":9,"Кевләр":10,"Зуран туск меткән":11,"Цәәлһлһн":12,"Цәәлһлһин туск меткән":13,"Әәшл":14,"Әәшлин туск меткән":15,"Ispetziale":-1,"Cuntierra":1,"Usuàriu":2,"Cuntierra usuàriu":3,"Cuntierra Wikipedia":5,"Cuntierra file":7,"Cuntierra MediaWiki":9,"Cuntierra template":11,"Agiudu":12,"Cuntierra agiudu":13,"Cuntierra categoria":15,"Шпеціална":-1,"Діскузія":1,"Хоснователь":2,"Діскузія з хоснователём":3,"Діскузія ку Вікіпедії":5,"Діскузія ку файлу":7,"Діскузія ку MediaWiki":9,"Шаблона":10,"Діскузія ку шаблонї":11,"Поміч":12,"Діскузія ку помочі":13,"Катеґорія":14,"Діскузія ку катеґорії":15,"Diskusyón":1,"Usador":2,"Messaje de Usador":3,"Vikipedya":4,"Diskusyón de Vikipedya":5,"Diskusyón de Dosya":7,"MedyaViki":8,"Diskusyón de MedyaViki":9,"Xablón":10,"Diskusyón de Xablón":11,"Ayudo":12,"Diskusyón de Ayudo":13,"Katēggoría":14,"Diskusyón de Katēggoría":15,"Alat":-2,"Kusuih":-1,"Marit":1,"Ureuëng Nguy":2,"Marit Ureuëng Nguy":3,"Marit Wikipedia":5,"Beureukaih":6,"Marit Beureukaih":7,"AlatWiki":8,"Marit AlatWiki":9,"Pola":10,"Marit Pola":11,"Beunantu":12,"Marit Beunantu":13,"Kawan":14,"Marit Kawan":15,"Wikisource":4,"Wikisource talk":5,"Page":104,"Page talk":105,"Index":104,"Index talk":107,"Author":108,"Author talk":109,"Башка":-1,"Корхнема":1,"Тиись":2,"Тиись корхнема":3,"Википедиесь":4,"Википедиесь корхнема":5,"Няйф":6,"Няйф корхнема":7,"МедиаВики корхнема":9,"Шаблон корхнема":11,"Лезкс":12,"Лезкс корхнема":13,"Категорие":14,"Категорие корхнема":15,"Discutir":1,"Utilizaire":2,"Discussion Utilizaire":3,"Wikipèdia":4,"Discussion Wikipèdia":5,"Fichièr":6,"Discussion Fichièr":7,"Modèl":10,"Discussion Modèl":11,"Discussion Ajuda":13,"Discussion Categoria":15,"Discussion Portal":101,"Projècte":102,"Discussion Projècte":103,"Speciale":-1,"Diskutim":1,"Përdoruesi":2,"Përdoruesi diskutim":3,"Wikipedia diskutim":5,"Skeda":6,"Skeda diskutim":7,"MediaWiki diskutim":9,"Stampa":6,"Stampa diskutim":11,"Ndihmë":12,"Ndihmë diskutim":13,"Kategoria diskutim":15,"Portal diskutim":101,"Къуллукъ":-1,"Сюзюу":1,"Къошулуучу":2,"Къошулуучуну сюзюу":3,"Википедия сюзюу":5,"Файлны сюзюу":7,"MediaWiki-ни сюзюу":9,"Шаблонну сюзюу":11,"Болушлукъ":12,"Болушлукъну сюзюу":13,"Категорияны сюзюу":15,"Медиум":-2,"Специјална":-1,"Разговор со корисник":3,"Разговор за Википедија":5,"Податотека":6,"Разговор за податотека":7,"МедијаВики":8,"Разговор за МедијаВики":9,"Разговор за шаблон":11,"Помош":12,"Разговор за помош":13,"Разговор за категорија":15,"Разговор за Портал":101,"ᐅᐃᑭᐱᑎᐊ":4,"ᐅᐃᑭᐱᑎᐊ talk":5,"Aŭdvidaĵo":-2,"Specialaĵo":-1,"Diskuto":1,"Uzanto":2,"Uzanto-Diskuto":3,"Vikipedio":4,"Vikipedia diskuto":5,"Dosiero":6,"Dosiero-Diskuto":7,"MediaWiki-Diskuto":9,"Ŝablono":10,"Ŝablono-Diskuto":11,"Helpo-Diskuto":13,"Kategorio":14,"Kategorio-Diskuto":15,"Portalo":100,"Portala diskuto":101,"Projekto":102,"Projekta diskuto":103,"Diskussioun":1,"Benotzer":2,"Benotzer Diskussioun":3,"Wikipedia Diskussioun":5,"Fichier Diskussioun":7,"MediaWiki Diskussioun":9,"Schabloun":10,"Schabloun Diskussioun":11,"Hëllef":12,"Hëllef Diskussioun":13,"Kategorie Diskussioun":15,"Taybet":-1,"Nîqaş":1,"Bikarhêner":2,"Bikarhêner nîqaş":3,"Wîkîpediya":4,"Gotûbêja Wîkîpediyayê":5,"Wêne":6,"Wêne nîqaş":7,"MediaWiki nîqaş":9,"Şablon nîqaş":11,"Alîkarî":12,"Alîkarî nîqaş":13,"Kategorî":14,"Kategorî nîqaş":15,"Gotûbêja portalê":101,"मिडिया":-2,"चर्चा":1,"सदस्य चर्चा":3,"विकिपीडिया चर्चा":5,"चित्र चर्चा":7,"मिडियाविकी":8,"मिडियाविकी चर्चा":9,"साचा":10,"साचा चर्चा":11,"सहाय्य":12,"सहाय्य चर्चा":13,"वर्ग":14,"वर्ग चर्चा":15,"दालन":100,"दालन चर्चा":101,"Specialine":-1,"Lodu":1,"Kävutai":2,"Lodu kävutajas":3,"Vikipedii":4,"Paginad Vikipedii":5,"Lodu failas":7,"Lodu MediaWikiš":9,"Lodu šablonas":11,"Abu":12,"Lodu abus":13,"Kategorii":14,"Lodu kategorijas":15,"മീഡിയ":-2,"പ്രത്യേകം":-1,"സംവാദം":1,"ഉപയോക്താവ്":2,"ഉപയോക്താവിന്റെ സംവാദം":3,"വിക്കിപീഡിയ":4,"വിക്കിപീഡിയ സംവാദം":5,"പ്രമാണം":6,"പ്രമാണത്തിന്റെ സംവാദം":7,"മീഡിയവിക്കി":8,"മീഡിയവിക്കി സംവാദം":9,"ഫലകം":10,"ഫലകത്തിന്റെ സംവാദം":11,"സഹായം":12,"സഹായത്തിന്റെ സംവാദം":13,"വർഗ്ഗം":14,"വർഗ്ഗത്തിന്റെ സംവാദം":15,"കവാടം":100,"കവാടത്തിന്റെ സംവാദം":101,"Diskuschoon":1,"Bruker Diskuschoon":3,"Wikipedia Diskuschoon":5,"Bild":6,"Bild Diskuschoon":7,"MediaWiki Diskuschoon":9,"Vörlaag":10,"Vörlaag Diskuschoon":11,"Hülp":12,"Hülp Diskuschoon":13,"Kategorie Diskuschoon":15,"Portal Diskuschoon":101,"Utent":2,"Ciaciarade":3,"Discussion ant sla Wikipedia":5,"Figura":6,"Discussion dla figura":7,"Discussion dla MediaWiki":9,"Stamp":10,"Discussion dlë stamp":11,"Agiut":12,"Discussion ant sl'agiut":13,"Categorìa":14,"Discussion ant sla categorìa":15,"ހާއްޞަ":-1,"ޚިޔާލު":1,"މެމްބަރު":2,"ފައިލް":6,"ފައިލް ޚިޔާލު":7,"މީޑިއާވިކީ":8,"މީޑިޔާވިކި ޚިޔާލު":9,"ފަންވަތް":10,"ފަންވަތް ޚިޔާލު":11,"އެހީ":12,"އެހީ ޚިޔާލު":13,"ޤިސްމު":14,"ޤިސްމު ޚިޔާލު":15,"ނެރު":100,"ނެރު ޚ ޔާލު":101,"Espesiat":-1,"Kombetsasion":1,"Muna'sesetbi":2,"Kombetsasion ni muna'sesetbi":3,"Kombetsasion nu Wikipedia":5,"Litratu":6,"Kombetsasion ni litratu":7,"Kombetsasion ni ayudo":13,"Katigoria":14,"Kombetsasion ni katigoria":15,"Spèciâl":-1,"Utilisator":2,"Discussion utilisator":3,"Vouiquipèdia":4,"Discussion Vouiquipèdia":5,"Fichiér":6,"Discussion fichiér":7,"Modèlo":10,"Discussion modèlo":11,"Éde":12,"Discussion éde":13,"Catègorie":14,"Discussion catègorie":15,"Medija":-2,"Specialus":-1,"Aptarimas":1,"Naudotojas":2,"Naudotojo aptarimas":3,"Vikipedija":4,"Vikipedijos aptarimas":5,"Vaizdas":6,"Vaizdo aptarimas":7,"MediaWiki aptarimas":9,"Šablonas":10,"Šablono aptarimas":11,"Pagalba":12,"Pagalbos aptarimas":13,"Kategorijos aptarimas":15,"Vikisritis":100,"Vikisrities aptarimas":101,"Vikiprojektas":102,"Vikiprojekto aptarimas":103,"Sąrašas":104,"Sąrašo aptarimas":105,"Filpeikar":-2,"Brukar":2,"Brukardiskusjon":3,"Tema":100,"Temadiskusjon":101,"Specialis":-1,"Disputatio":1,"Usor":2,"Disputatio Usoris":3,"Vicipaedia":4,"Disputatio Vicipaediae":5,"Fasciculus":6,"Disputatio Fasciculi":7,"Disputatio MediaWiki":9,"Formula":10,"Disputatio Formulae":11,"Auxilium":12,"Disputatio Auxilii":13,"Disputatio Categoriae":15,"Porta":100,"Disputatio Portae":101,"Махсус":-1,"Бәхәс":1,"Кулланучы":2,"Кулланучы бәхәсе":3,"Википедия бәхәсе":5,"Файл бәхәсе":7,"МедиаВики бәхәсе":9,"Калып бәхәсе":11,"Ярдәм":12,"Ярдәм бәхәсе":13,"Төркем":14,"Төркем бәхәсе":15,"Портал бәхәсе":101,"Razgovor o Wikipedia":5,"Mediawiki razgovor":9,"ፋይል":-2,"ልዩ":-1,"ውይይት":1,"አባል":2,"አባል ውይይት":3,"ውክፔዲያ":4,"ውክፔዲያ ውይይት":5,"ስዕል":6,"ስዕል ውይይት":7,"መልዕክት":8,"መልዕክት ውይይት":9,"መለጠፊያ":10,"መለጠፊያ ውይይት":11,"እርዳታ":12,"እርዳታ ውይይት":13,"መደብ":14,"መደብ ውይይት":15,"በር":100,"በር ውይይት":101,"Buech":102,"Buech Diskussion":103,"Wort":104,"Wort Diskussion":105,"Text":106,"Text Diskussion":107,"Spruch":108,"Spruch Diskussion":109,"Nochricht":110,"Nochricht Diskussion":111,"Amóhùnmáwòrán":-2,"Pàtàkì":-1,"Ọ̀rọ̀":1,"Oníṣe":2,"Ọ̀rọ̀ oníṣe":3,"Ọ̀rọ̀ Wikipedia":5,"Fáìlì":6,"Ọ̀rọ̀ fáìlì":7,"Ọ̀rọ̀ mediaWiki":9,"Àdàkọ":10,"Ọ̀rọ̀ àdàkọ":11,"Ìrànlọ́wọ́":12,"Ọ̀rọ̀ ìrànlọ́wọ́":13,"Ẹ̀ka":14,"Ọ̀rọ̀ ẹ̀ka":15,"Èbúté":100,"Ọ̀rọ̀ èbúté":101,"Ìwé":108,"Ọ̀rọ̀ ìwé":109,"Ятарлă":-1,"Сӳтсе явасси":1,"Хутшăнакан":2,"Хутшăнаканăн канашлу страници":3,"Википеди сӳтсе явмалли":5,"Ӳкерчĕк":6,"Ӳкерчĕке сӳтсе явмалли":7,"MediaWiki сӳтсе явмалли":9,"Шаблона сӳтсе явмалли":11,"Пулăшу":12,"Пулăшăва сӳтсе явмалли":13,"Категорине сӳтсе явмалли":15,"ढाँचा":10,"ढाँचा वार्ता":11,"मद्दत":12,"मद्दत वार्ता":13,"Miðil":-2,"Serstakt":-1,"Kjak":1,"Brúkari":2,"Brúkarakjak":3,"Wikipedia-kjak":5,"Myndakjak":7,"MediaWiki-kjak":9,"Fyrimynd":10,"Fyrimyndakjak":11,"Hjálparkjak":13,"Bólkur":14,"Bólkakjak":15,"Szpecyjalna":-1,"Dyskusyjo":1,"Używacz":2,"Dyskusyjo używacza":3,"Wikipedyjo":4,"Dyskusyjo Wikipedyjo":5,"Plik":6,"Dyskusyjo plika":7,"Dyskusyjo MediaWiki":9,"Muster":10,"Dyskusyjo mustra":11,"Půmoc":12,"Dyskusyjo půmocy":13,"Kategoryjo":14,"Dyskusyjo kategoryji":15,"Specjalna":-1,"Dyskusja":1,"Wikipedysta":2,"Dyskusja wikipedysty":3,"Dyskusja Wikipedii":5,"Dyskusja pliku":7,"Dyskusja MediaWiki":9,"Szablon":10,"Dyskusja szablonu":11,"Dyskusja kategorii":15,"Dyskusja portalu":101,"Wikiprojekt":102,"Dyskusja wikiprojektu":103,"Overleg gebruiker":3,"Bestand":6,"Overleg bestand":7,"Overleg portaal":101,"Diskusija":1,"Lietotājs":2,"Lietotāja diskusija":3,"Vikipēdija":4,"Vikipēdijas diskusija":5,"Attēls":6,"Attēla diskusija":7,"MediaWiki diskusija":9,"Veidne":10,"Veidnes diskusija":11,"Palīdzība":12,"Palīdzības diskusija":13,"Kategorijas diskusija":15,"Portāls":100,"Portāla diskusija":101,"Vikiprojekts":102,"Vikiprojekta diskusija":103,"আলাপ":1,"ব্যবহারকারী":2,"ব্যবহারকারী আলাপ":3,"উইকিপিডিয়া আলোচনা":5,"চিত্র":6,"চিত্র আলোচনা":7,"মিডিয়াউইকি আলোচনা":9,"টেমপ্লেট":10,"টেমপ্লেট আলোচনা":11,"সাহায্য":12,"সাহায্য আলোচনা":13,"বিষয়শ্রেণী":14,"বিষয়শ্রেণী আলোচনা":15,"প্রবেশদ্বার":100,"প্রবেশদ্বার আলোচনা":101,"Wikimedia":4,"Wikimedia talk":5,"Mēdiatl":-2,"Nōncuahquīzqui":-1,"Tēixnāmiquiliztli":1,"Tlatequitiltilīlli":2,"Tlatequitiltilīlli tēixnāmiquiliztli":3,"Huiquipedia":4,"Huiquipedia tēixnāmiquiliztli":5,"Īxiptli":6,"Īxiptli tēixnāmiquiliztli":7,"Huiquimedia":8,"Huiquimedia tēixnāmiquiliztli":9,"Nemachiyōtīlli":10,"Nemachiyōtīlli tēixnāmiquiliztli":11,"Tēpalēhuiliztli":12,"Tēpalēhuiliztli tēixnāmiquiliztli":13,"Neneuhcāyōtl":14,"Neneuhcāyōtl tēixnāmiquiliztli":15,"Rakitra":-2,"Manokana":-1,"Dinika":1,"Mpikambana":2,"Dinika amin'ny mpikambana":3,"Dinika amin'ny Wikipedia":5,"Sary":6,"Dinika amin'ny sary":7,"Dinika amin'ny MediaWiki":9,"Endrika":10,"Dinika amin'ny endrika":11,"Fanoroana":12,"Dinika amin'ny fanoroana":13,"Sokajy":14,"Dinika amin'ny sokajy":15,"Medie":-2,"Extra":-1,"Klaaf":1,"Metmaacher":2,"Metmaacher Klaaf":3,"Wikipedia Klaaf":5,"Dateie Klaaf":7,"MediaWiki Klaaf":9,"Schablon":10,"Schablone Klaaf":11,"Hölp":12,"Hölp Klaaf":13,"Saachjrupp":14,"Saachjruppe Klaaf":15,"Xısusi":-1,"Werênayış":1,"Karber":2,"Karber mesac":3,"Wikipedia werênayış":5,"Dosya werênayış":7,"MediaWiki werênayış":9,"Şablon werênayış":11,"Desteg":12,"Desteg werênayış":13,"Kategori werênayış":15,"ಮೀಡಿಯ":-2,"ವಿಶೇಷ":-1,"ಚರ್ಚೆಪುಟ":1,"ಸದಸ್ಯ":2,"ಸದಸ್ಯರ ಚರ್ಚೆಪುಟ":3,"ವಿಕಿಪೀಡಿಯ":4,"ವಿಕಿಪೀಡಿಯ ಚರ್ಚೆ":5,"ಚಿತ್ರ":6,"ಚಿತ್ರ ಚರ್ಚೆಪುಟ":7,"ಮೀಡಿಯವಿಕಿ":8,"ಮೀಡೀಯವಿಕಿ ಚರ್ಚೆ":9,"ಟೆಂಪ್ಲೇಟು":10,"ಟೆಂಪ್ಲೇಟು ಚರ್ಚೆ":11,"ಸಹಾಯ":12,"ಸಹಾಯ ಚರ್ಚೆ":13,"ವರ್ಗ":14,"ವರ್ಗ ಚರ್ಚೆ":15,"మీడియా":-2,"ప్రత్యేక":-1,"చర్చ":1,"వాడుకరి":2,"వాడుకరి చర్చ":3,"వికీపీడియా":4,"వికీపీడియా చర్చ":5,"దస్త్రం":6,"దస్త్రంపై చర్చ":7,"మీడియావికీ":8,"మీడియావికీ చర్చ":9,"మూస":10,"మూస చర్చ":11,"సహాయం":12,"సహాయం చర్చ":13,"వర్గం":14,"వర్గం చర్చ":15,"వేదిక":100,"వేదిక చర్చ":101,"Вижа":-1,"Баҳс":1,"Корбар":2,"Баҳси корбар":3,"Википедиа":4,"Баҳси Википедиа":5,"Акс":6,"Баҳси акс":7,"Медиавики":8,"Баҳси медиавики":9,"Баҳси шаблон":11,"Роҳнамо":12,"Баҳси роҳнамо":13,"Гурӯҳ":14,"Баҳси гурӯҳ":15,"Баҳси портал":101,"Ýörite":-1,"Çekişme":1,"Ulanyjy":2,"Ulanyjy çekişme":3,"Wikipediýa":4,"Wikipediýa çekişme":5,"Faýl":6,"Faýl çekişme":7,"MediaWiki çekişme":9,"Şablon çekişme":11,"Ýardam":12,"Ýardam çekişme":13,"Kategoriýa":14,"Kategoriýa çekişme":15,"Амедиа":-2,"Цастәи":-1,"Ахцәажәара":1,"Алахәыла":2,"Алахәыла ахцәажәара":3,"Авикипедиа":4,"Авикипедиа ахцәажәара":5,"Афаил":6,"Афаил ахцәажәара":7,"Амедиавики":8,"Амедиавики ахцәажәара":9,"Ашаблон":10,"Ашаблон ахцәажәара":11,"Ацхыраара":12,"Ацхыраара ахцәажәара":13,"Акатегориа":14,"Акатегориа ахцәажәара":15,"Oerlis":1,"Meidogger":2,"Meidogger oerlis":3,"Wikipedy":4,"Wikipedy oerlis":5,"Ofbyld":6,"Ofbyld oerlis":7,"MediaWiki oerlis":9,"Berjocht":10,"Berjocht oerlis":11,"Hulp oerlis":13,"Kategory":14,"Kategory oerlis":15,"Astamiwa":-1,"Dhiskusi":1,"Panganggo":2,"Dhiskusi Panganggo":3,"Dhiskusi Wikipedia":5,"Gambar":6,"Dhiskusi Gambar":7,"Dhiskusi MediaWiki":9,"Cithakan":10,"Dhiskusi Cithakan":11,"Pitulung":12,"Dhiskusi Pitulung":13,"Dhiskusi Kategori":15,"Сæрмагонд":-1,"Тæрхон":1,"Архайæг":2,"Архайæджы ныхас":3,"Википедийы тæрхон":5,"Файлы тæрхон":7,"MediaWiki-йы тæрхон":9,"Шаблоны тæрхон":11,"Æххуыс":12,"Æххуысы тæрхон":13,"Категорийы тæрхон":15,"Мультымедыя":-2,"Адмысловае":-1,"Размовы":1,"Удзельнік":2,"Размовы з удзельнікам":3,"Вікіпедыя":4,"Размовы пра Вікіпедыя":5,"Выява":6,"Размовы пра выяву":7,"Размовы пра MediaWiki":9,"Размовы пра шаблон":11,"Даведка":12,"Размовы пра даведку":13,"Катэгорыя":14,"Размовы пра катэгорыю":15,"Партал":100,"Размовы пра партал":101,"Descusión":1,"Descusión usuario":3,"Descusión Wikipedia":5,"Imachen":6,"Descusión imachen":7,"Descusión MediaWiki":9,"Descusión plantilla":11,"Aduya":12,"Descusión aduya":13,"Descusión categoría":15,"Descusión Portal":101,"ვიკიპედია განხილვა":5,"პორტალი":100,"პორტალი განხილვა":101,"Тусгай":-1,"Хэлэлцүүлэг":1,"Хэрэглэгч":2,"Хэрэглэгчийн яриа":3,"Wikipedia-н хэлэлцүүлэг":5,"Файлын хэлэлцүүлэг":7,"МедиаВикигийн хэлэлцүүлэг":9,"Загвар":10,"Загварын хэлэлцүүлэг":11,"Тусламж":12,"Тусламжийн хэлэлцүүлэг":13,"Ангилал":14,"Ангиллын хэлэлцүүлэг":15,"Benutser":2,"Benutser Diskussion":3,"Bielde":6,"Bielde Diskussion":7,"Foarloage":10,"Foarloage Diskussion":11,"Hälpe":12,"Hälpe Diskussion":13,"Medja":-2,"Speċjali":-1,"Diskussjoni":1,"Diskussjoni utent":3,"Diskussjoni Wikipedija":5,"Diskussjoni stampa":7,"Diskussjoni MediaWiki":9,"Mudell":10,"Diskussjoni mudell":11,"Għajnuna":12,"Diskussjoni għajnuna":13,"Diskussjoni kategorija":15,"Diskussjoni portal":101,"Meanyn":-2,"Er lheh":-1,"Resooney":1,"Ymmydeyr":2,"Resooney ymmydeyr":3,"Resooney Wikipedia":5,"Coadan":6,"Resooney coadan":7,"Resooney MediaWiki":9,"Clowan":10,"Resooney clowan":11,"Cooney":12,"Resooney cooney":13,"Ronney":14,"Resooney ronney":15,"Spesyal":-1,"Taki":1,"Masyin":2,"Taki fu masyin":3,"Taki fu Wikipedia":5,"Gefre":6,"Taki fu gefre":7,"Taki fu MediaWiki":9,"Ankra":10,"Taki fu ankra":11,"Yepi":12,"Taki fu yepi":13,"Guru":14,"Taki fu guru":15,"Панель":-1,"Вераськон":1,"Викиавтор":2,"Викиавтор сярысь вераськон":3,"Wikipedia сярысь вераськон":5,"Суред":6,"Суред сярысь вераськон":7,"MediaWiki сярысь вераськон":9,"Шаблон сярысь вераськон":11,"Валэктон":12,"Валэктон сярысь вераськон":13,"Категория сярысь вераськон":15,"ۋاسىتە":-2,"ئالاھىدە":-1,"مۇنازىرە":1,"ئىشلەتكۈچى":2,"ئىشلەتكۈچى مۇنازىرىسى":3,"مۇنازىرىسىWikipedia":5,"ھۆججەت":6,"ھۆججەت مۇنازىرىسى":7,"MediaWiki مۇنازىرىسى":9,"قېلىپ":10,"قېلىپ مۇنازىرىسى":11,"ياردەم":12,"ياردەم مۇنازىرىسى":13,"تۈر":14,"تۈر مۇنازىرىسى":15,"ਮੀਡੀਆ":-2,"ਖਾਸ":-1,"ਚਰਚਾ":1,"ਮੈਂਬਰ":2,"ਮੈਂਬਰ ਚਰਚਾ":3,"ਵਿਕਿਪੀਡਿਆ":4,"ਵਿਕਿਪੀਡਿਆ ਚਰਚਾ":5,"ਤਸਵੀਰ":6,"ਤਸਵੀਰ ਚਰਚਾ":7,"ਮੀਡੀਆਵਿਕਿ":8,"ਮੀਡੀਆਵਿਕਿ ਚਰਚਾ":9,"ਨਮੂਨਾ":10,"ਨਮੂਨਾ ਚਰਚਾ":11,"ਮਦਦ":12,"ਮਦਦ ਚਰਚਾ":13,"ਸ਼੍ਰੇਣੀ":14,"ਸ਼੍ਰੇਣੀ ਚਰਚਾ":15,"Meadhan":-2,"Sònraichte":-1,"Deasbaireachd":1,"Cleachdaiche":2,"Deasbaireachd a' chleachdaiche":3,"Uicipeid":4,"An deasbaireachd aig Uicipeid":5,"Faidhle":6,"Deasbaireachd an fhaidhle":7,"Deasbaireachd MediaWiki":9,"Teamplaid":10,"Deasbaireachd na teamplaid":11,"Cobhair":12,"Deasbaireachd na cobharach":13,"Roinn-seòrsa":14,"Deasbaireachd na roinn-seòrsa":15,"દ્રશ્ય-શ્રાવ્ય (મિડિયા)":-2,"વિશેષ":-1,"ચર્ચા":1,"સભ્ય":2,"સભ્યની ચર્ચા":3,"વિકિપીડિયા":4,"વિકિપીડિયા ચર્ચા":5,"ચિત્ર":6,"ચિત્રની ચર્ચા":7,"મીડિયાવિકિ":8,"મીડિયાવિકિ ચર્ચા":9,"ઢાંચો":10,"ઢાંચાની ચર્ચા":11,"મદદ":12,"મદદની ચર્ચા":13,"શ્રેણી":14,"શ્રેણીની ચર્ચા":15,"Pinaurog":-1,"Hiruhimangraw":1,"Gumaramit":2,"Hiruhimangaw hiton gumaramit":3,"Hiruhimangraw hiton Wikipedia":5,"Paypay":6,"Hiruhimangraw hiton paypay":7,"MedyaWiki":8,"Hiruhimangraw hiton MedyaWiki":9,"Batakan":10,"Hiruhimangraw hiton batakan":11,"Bulig":12,"Hiruhimangaw hiton bulig":13,"Kaarangay":14,"Hiruhimangraw hiton kaarangay":15,"माध्यम":-2,"खँलाबँला":1,"छ्येलेमि":2,"छ्येलेमि खँलाबँला":3,"विकिपिडिया":4,"विकिपिडिया खँलाबँला":5,"किपा":6,"किपा खँलाबँला":7,"मिडियाविकि":8,"मिडियाविकि खँलाबँला":9,"ग्वाहालि":12,"ग्वाहालि खँलाबँला":13,"पुचः":14,"पुचः खँलाबँला":15,"दबू":100,"दबू खँलाबँला":101,"ମାଧ୍ୟମ":-2,"ବିଶେଷ":-1,"ଆଲୋଚନା":1,"ବ୍ୟବହାରକାରୀ":2,"ବ୍ୟବହାରକାରୀଙ୍କ ଆଲୋଚନା":3,"ଉଇକିପିଡ଼ିଆ":4,"ଉଇକିପିଡ଼ିଆ ଆଲୋଚନା":5,"ଫାଇଲ":6,"ଫାଇଲ ଆଲୋଚନା":7,"ମିଡ଼ିଆଉଇକି":8,"ମିଡ଼ିଆଉଇକି ଆଲୋଚନା":9,"ଛାଞ୍ଚ":10,"ଛାଞ୍ଚ ଆଲୋଚନା":11,"ସହଯୋଗ":12,"ସହଯୋଗ ଆଲୋଚନା":13,"ଶ୍ରେଣୀ":14,"ଶ୍ରେଣୀ ଆଲୋଚନା":15,"មេឌា":-2,"ពិសេស":-1,"ការពិភាក្សា":1,"អ្នកប្រើប្រាស់":2,"ការពិភាក្សារបស់អ្នកប្រើប្រាស់":3,"វិគីភីឌា":4,"ការពិភាក្សាអំពីវិគីភីឌា":5,"ឯកសារ":6,"ការពិភាក្សាអំពីឯកសារ":7,"មេឌាវិគី":8,"ការពិភាក្សាអំពីមេឌាវិគី":9,"ទំព័រគំរូ":10,"ការពិភាក្សាអំពីទំព័រគំរូ":11,"ជំនួយ":12,"ការពិភាក្សាអំពីជំនួយ":13,"ចំណាត់ថ្នាក់ក្រុម":14,"ការពិភាក្សាអំពីចំណាត់ថ្នាក់ក្រុម":15,"Mahsus":-1,"Muzakere":1,"Qullanıcı":2,"Qullanıcı muzakeresi":3,"Vikipediya muzakeresi":5,"Fayl":6,"Fayl muzakeresi":7,"MediaViki":8,"MediaViki muzakeresi":9,"Şablon muzakeresi":11,"Yardım muzakeresi":13,"Kategoriya muzakeresi":15,"Speciâl":-1,"Discussion utent":3,"Vichipedie":4,"Discussion Vichipedie":5,"Figure":6,"Discussion figure":7,"Model":10,"Discussion model":11,"Jutori":12,"Discussion jutori":13,"Discussion categorie":15,"Espesyal":-1,"Diskite":1,"Itilizatè":2,"Diskisyon Itilizatè":3,"Wikipedya":4,"Diskisyon Wikipedya":5,"Fichye":6,"Diskisyon Fichye":7,"Diskisyon MedyaWiki":9,"Diskisyon Modèl":11,"Èd":12,"Diskisyon Èd":13,"Diskisyon Kategori":15,"Discussione":1,"Discussioni utente":3,"Discussioni categoria":15,"Sapaq":-1,"Rimanakuy":1,"Ruraq":2,"Ruraq rimanakuy":3,"Wikipedia rimanakuy":5,"Rikcha":6,"Rikcha rimanakuy":7,"MediaWiki rimanakuy":9,"Plantilla rimanakuy":11,"Yanapa":12,"Yanapa rimanakuy":13,"Katiguriya":14,"Katiguriya rimanakuy":15,"Մեդիա":-2,"Սպասարկող":-1,"Քննարկում":1,"Մասնակից":2,"Մասնակցի քննարկում":3,"Վիքիպեդիա":4,"Վիքիպեդիայի քննարկում":5,"Պատկեր":6,"Պատկերի քննարկում":7,"MediaWiki քննարկում":9,"Կաղապար":10,"Կաղապարի քննարկում":11,"Օգնություն":12,"Օգնության քննարկում":13,"Կատեգորիա":14,"Կատեգորիայի քննարկում":15,"Պորտալ":100,"Պորտալի քննարկում":101,"Specioal":-1,"Discuusje":1,"Discuusje gebruker":3,"Discuusje Wikipedia":5,"Ofbeeldienge":6,"Discuusje ofbeeldienge":7,"Discuusje MediaWiki":9,"Patrôon":10,"Discuusje patrôon":11,"Discuusje ulpe":13,"Discuusje categorie":15,"Multimedia":-2,"Multimedia talk":101,"Arnawlı":-1,"Sa'wbet":1,"Paydalanıwshı":2,"Paydalanıwshı sa'wbeti":3,"Wikipedia sa'wbeti":5,"Su'wret":6,"Su'wret sa'wbeti":7,"MediaWiki sa'wbeti":9,"Shablon":10,"Shablon sa'wbeti":11,"Anıqlama":12,"Anıqlama sa'wbeti":13,"Kategoriya sa'wbeti":15,"Discussion Usator":3,"Discussion File":7,"Patrono":10,"Discussion Patrono":11,"Adjuta":12,"Discussion Adjuta":13,"Appendice":102,"Discussion Appendice":103,"מדיה":-2,"מיוחד":-1,"שיחה":1,"משתמש":2,"שיחת משתמש":3,"ויקיפדיה":4,"שיחת ויקיפדיה":5,"קובץ":6,"שיחת קובץ":7,"מדיה ויקי":8,"שיחת מדיה ויקי":9,"תבנית":10,"שיחת תבנית":11,"עזרה":12,"שיחת עזרה":13,"קטגוריה":14,"שיחת קטגוריה":15,"פורטל":100,"שיחת פורטל":101,"ספר":108,"שיחת ספר":109,"Husus":-1,"Obrolan":1,"Pamaké":2,"Obrolan pamaké":3,"Obrolan Wikipedia":5,"Obrolan gambar":7,"MédiaWiki":8,"Obrolan MédiaWiki":9,"Obrolan citakan":11,"Obrolan pitulung":13,"Obrolan kategori":15,"Obrolan portal":101,"Meedia":-2,"Eri":-1,"Arutelu":1,"Kasutaja":2,"Kasutaja arutelu":3,"Vikipeedia":4,"Vikipeedia arutelu":5,"Pilt":6,"Pildi arutelu":7,"MediaWiki arutelu":9,"Mall":10,"Malli arutelu":11,"Juhend":12,"Juhendi arutelu":13,"Kategooria":14,"Kategooria arutelu":15,"Portaali arutelu":101,"زریعہ":-2,"تبادلۂ خیال":1,"صارف":2,"تبادلۂ خیال صارف":3,"منصوبہ":4,"تبادلۂ خیال منصوبہ":5,"تصویر":6,"تبادلۂ خیال تصویر":7,"میڈیاوکی":8,"تبادلۂ خیال میڈیاوکی":9,"سانچہ":10,"تبادلۂ خیال سانچہ":11,"معاونت":12,"تبادلۂ خیال معاونت":13,"زمرہ":14,"تبادلۂ خیال زمرہ":15,"باب":100,"تبادلۂ خیال باب":101,"Donate":4,"Donate talk":5,"Аналлаах":-1,"Ырытыы":1,"Кыттааччы":2,"Кыттааччы ырытыыта":3,"Бикипиэдьийэ":4,"Бикипиэдьийэ ырытыыта":5,"Билэ":6,"Билэ ырытыыта":7,"Халыып":10,"Халыып ырытыыта":11,"Көмө":12,"Көмө ырытыыта":13,"Категория ырытыыта":15,"Specialnje":-1,"Wužiwar":2,"Diskusija z wužiwarjom":3,"Wikipedija diskusija":5,"Dataja":6,"Diskusija k dataji":7,"Předłoha":10,"Diskusija k předłoze":11,"Pomoc diskusija":13,"Diskusija ke kategoriji":15,"Immikkut":-1,"Oqallinneq":1,"Atuisoq":2,"Atuisup oqalliffia":3,"Wikipedia-p oqalliffia":5,"Fiileq":6,"Fiilip oqalliffia":7,"Mediawikip oqalliffia":9,"Ilisserut":10,"Ilisserummi oqallinneq":11,"Ikiuutit":12,"Ikiuutini oqallinneq":13,"Sumut atassuseq":14,"Sumut atassusermi oqallinneq":15,"Specialne":-1,"Wužywaŕ":2,"Diskusija wužywarja":3,"Diskusija wó dataji":7,"Pśedłoga":10,"Diskusija wó pśedłoze":11,"Diskusija wó pomocy":13,"Diskusija wó kategoriji":15,"Alderique":1,"Usuariu":2,"Usuariu alderique":3,"Uiquipedia":4,"Uiquipedia alderique":5,"Archivu":6,"Archivu alderique":7,"MediaWiki alderique":9,"Plantía":10,"Plantía alderique":11,"Aida":12,"Aida alderique":13,"Categoría alderique":15,"Meán":-2,"Speisialta":-1,"Plé":1,"Úsáideoir":2,"Plé úsáideora":3,"Vicipéid":4,"Plé Vicipéide":5,"Íomhá":6,"Plé íomhá":7,"Plé MediaWiki":9,"Teimpléad":10,"Plé teimpléid":11,"Cabhair":12,"Plé cabhrach":13,"Catagóir":14,"Plé catagóire":15,"Naaltsoos baa yáshtiʼ":1,"Choyoołʼįįhí":2,"Choyoołʼįįhí bichʼįʼ yáshtiʼ":3,"Wikiibíídiiya":4,"Wikiibíídiiya baa yáshtiʼ":5,"Eʼelyaaígíí":6,"Eʼelyaaígíí baa yáshtiʼ":7,"MediaWiki baa yáshtiʼ":9,"Bee álnééhí":10,"Bee álnééhí baa yáshtiʼ":11,"Anáʼálwoʼ":12,"Anáʼálwoʼ baa yáshtiʼ":13,"Tʼááłáhági átʼéego":14,"Tʼááłáhági átʼéego baa yáshtiʼ":15,"Dibar":-1,"Kaozeal":1,"Implijer":2,"Kaozeadenn Implijer":3,"Kaozeadenn Wikipedia":5,"Restr":6,"Kaozeadenn Restr":7,"Kaozeadenn MediaWiki":9,"Patrom":10,"Kaozeadenn Patrom":11,"Skoazell":12,"Kaozeadenn Skoazell":13,"Rummad":14,"Kaozeadenn Rummad":15,"Médiá":-2,"Špeciálne":-1,"Diskusia":1,"Redaktor":2,"Diskusia s redaktorom":3,"Diskusia k Wikipédii":5,"Súbor":6,"Diskusia k súboru":7,"Diskusia k MediaWiki":9,"Šablóna":10,"Diskusia k šablóne":11,"Diskusia k pomoci":13,"Diskusia ku kategórii":15,"Diskusia k portálu":101,"Discussiun":1,"Utilisader":2,"Utilisader discussiun":3,"Wikipedia discussiun":5,"Datoteca":6,"Datoteca discussiun":7,"MediaWiki discussiun":9,"Model discussiun":11,"Agid":12,"Agid discussiun":13,"Categoria discussiun":15,"Maxsus":-1,"Munozara":1,"Foydalanuvchi":2,"Foydalanuvchi munozarasi":3,"Vikipediya munozarasi":5,"Tasvir":6,"Tasvir munozarasi":7,"MediaWiki munozarasi":9,"Andoza":10,"Andoza munozarasi":11,"Yordam":12,"Yordam munozarasi":13,"Turkum":14,"Turkum munozarasi":15,"Uzalutno":-1,"Vakyarimata":1,"Jeno":2,"Jeno vakyarimata":3,"Vikipidiya":4,"Vikipidiyake vakyarimata":5,"Chitro":6,"Chitro vakyarimata":7,"MediyaViki":8,"MediyaViki vakyarimata":9,"Sikavno":10,"Sikavno vakyarimata":11,"Zhutipen":12,"Zhutipen vakyarimata":13,"Shopni":14,"Shopni vakyarimata":15,"মাধ্যম":-2,"বাৰ্তা":1,"সদস্য":2,"সদস্য বাৰ্তা":3,"ৱিকিপিডিয়া":4,"ৱিকিপিডিয়া বাৰ্তা":5,"চিত্ৰ":6,"চিত্ৰ বাৰ্তা":7,"মেডিয়াৱিকি":8,"মেডিয়াৱিকি বাৰ্তা":9,"সাঁচ":10,"সাঁচ বাৰ্তা":11,"সহায়":12,"সহায় বাৰ্তা":13,"শ্ৰেণী":14,"শ্ৰেণী বাৰ্তা":15,"ৱিকিচ'ৰা":100,"ৱিকিচ'ৰা আলোচনা":101,"Pāpaho":-2,"Papa nui":-1,"Kūkākūkā":1,"Mea hoʻohana":2,"Kūkākūkā o mea hoʻohana":3,"Kūkākūkā o Wikipikia":5,"Waihona":6,"Kūkākūkā o waihona":7,"Kūkākūkā o MediaWiki":9,"Anakuhi":10,"Kūkākūkā o anakuhi":11,"Kōkua":12,"Kūkākūkā o kōkua":13,"Māhele":14,"Kūkākūkā o māhele":15,"Башка тевень":-1,"Кортамо":1,"Теиця":2,"Теицянь кортамось":3,"Википедиясь":4,"Википедиясь кортамось":5,"Артовкс":6,"Артовксто кортамось":7,"MediaWiki-нь кортамось":9,"ЛопаПарцун":10,"ЛопаПарцундо кортамось":11,"Лезкстэ кортамось":13,"Категориядо кортамось":15,"رسنۍ":-2,"ځانګړی":-1,"خبرې اترې":1,"کارن":2,"د کارن خبرې اترې":3,"ويکيپېډيا":4,"د ويکيپېډيا خبرې اترې":5,"دوتنه":6,"د دوتنې خبرې اترې":7,"ميډياويکي":8,"د ميډياويکي خبرې اترې":9,"کينډۍ":10,"د کينډۍ خبرې اترې":11,"لارښود":12,"د لارښود خبرې اترې":13,"وېشنيزه":14,"د وېشنيزې خبرې اترې":15,"Таспа":-2,"Арнайы":-1,"Талқылау":1,"Қатысушы":2,"Қатысушы талқылауы":3,"Уикипедия талқылауы":5,"Сурет":6,"Сурет талқылауы":7,"МедиаУики":8,"МедиаУики талқылауы":9,"Үлгі":10,"Үлгі талқылауы":11,"Анықтама":12,"Анықтама талқылауы":13,"Санат":14,"Санат талқылауы":15,"Портал талқылауы":101,"Specala":-1,"Debato":1,"Uzanto Debato":3,"Wikipedio":4,"Wikipedio Debato":5,"Arkivo":6,"Arkivo Debato":7,"MediaWiki Debato":9,"Shablono":10,"Shablono Debato":11,"Helpo Debato":13,"Kategorio Debato":15,"ஊடகம்":-2,"சிறப்பு":-1,"பேச்சு":1,"பயனர்":2,"பயனர் பேச்சு":3,"விக்கிப்பீடியா":4,"விக்கிப்பீடியா பேச்சு":5,"படிமம்":6,"படிமப் பேச்சு":7,"மீடியாவிக்கி":8,"மீடியாவிக்கி பேச்சு":9,"வார்ப்புரு":10,"வார்ப்புரு பேச்சு":11,"உதவி":12,"உதவி பேச்சு":13,"பகுப்பு":14,"பகுப்பு பேச்சு":15,"வலைவாசல்":100,"வலைவாசல் பேச்சு":101,"Лӱмын ыштыме":-1,"Каҥашымаш":1,"Пайдаланыше":2,"Пайдаланышын каҥашымаш":3,"Википедий":4,"Википедийын каҥашымаш":5,"Файл шотышто каҥашымаш":7,"Кышкар":10,"Кышкар шотышто каҥашымаш":11,"Полшык":12,"Полшык шотышто каҥашымаш":13,"Категорий":14,"Категорий шотышто каҥашымаш":15,"Incubator":4,"Incubator talk":5,"Användare":2,"Användardiskussion":3,"Wikipediadiskussion":5,"Malldiskussion":11,"Hjälpdiskussion":13,"Syndrig":-1,"Gesprec":1,"Biliþ":6,"Biliþgesprec":7,"Bysen":10,"Bysengesprec":11,"Helpgesprec":13,"Flocc":14,"Floccgesprec":15,"Faili":-2,"Maalum":-1,"Majadiliano":1,"Mtumiaji":2,"Majadiliano ya mtumiaji":3,"Majadiliano ya Wikipedia":5,"Picha":6,"Majadiliano ya faili":7,"Majadiliano ya MediaWiki":9,"Kigezo":10,"Majadiliano ya kigezo":11,"Msaada":12,"Majadiliano ya msaada":13,"Jamii":14,"Majadiliano ya jamii":15,"Lango":100,"Majadiliano ya lango":101,"Discussion utente":3,"Discussion file":7,"Discussion modèl":11,"Ajuto":12,"Discussion ajuto":13,"Discussion categoria":15,"Portałe":100,"Discussion portałe":101,"Projeto":102,"Discussion projeto":103,"विसेस":-1,"सम्भासित":1,"अवयव":2,"अवयव सम्भासित":3,"Wikipedia सम्भासित":5,"पटिमा":6,"पटिमा सम्भासित":7,"मीडियाविकि सम्भासित":9,"पटिरूप":10,"पटिरूप सम्भासित":11,"अवस्सय":12,"अवस्सय सम्भासित":13,"विभाग":14,"विभाग सम्भासित":15,"Seite":102,"Seite Diskussion":103,"Index Diskussion":105,"Olay":1,"Paragamit":2,"Olay kan paragamit":3,"Olay sa Wikipedia":5,"Ladawan":6,"Olay sa ladawan":7,"Olay sa MediaWiki":9,"Plantilya":10,"Olay sa plantilya":11,"Tabang":12,"Olay sa tabang":13,"Olay sa kategorya":15,"Обсуждение Википедии":5,"Обсуждение портала":101,"Инкубатор":102,"Обсуждение Инкубатора":103,"Проект":104,"Обсуждение проекта":105,"Арбитраж":106,"Обсуждение арбитража":107,"Wikipedia discusión":5,"Portal Discusión":101,"Wikiproyecto":102,"Wikiproyecto Discusión":103,"Anexo Discusión":105,"Portale":100,"Discussioni portale":101,"Progetto":102,"Discussioni progetto":103,"Book":108,"Book talk":109} \ No newline at end of file diff --git a/wikipedia/README.md b/wikipedia/README.md new file mode 100644 index 0000000..1ac528e --- /dev/null +++ b/wikipedia/README.md @@ -0,0 +1,56 @@ + 0 contents-all.txt 34 01-Aug-2012 11:26 9.3K + 0 contents-nq.txt 38 01-Aug-2012 11:26 2.1K + 0 contents-nt.txt 36 01-Aug-2012 11:26 2.5K + 0 contents-tql.txt 37 01-Aug-2012 11:26 2.1K + 0 contents-ttl.txt 35 01-Aug-2012 11:26 2.5K + 0 instance_types_en.nq.bz2 0 29-Jun-2012 13:18 97M Contains triples of the form $object rdf:type $class from the ontology-based extraction. + 2 mappingbased_properties_unredirected_en.nq.bz2 10 29-Jun-2012 03:11 251M High-quality data extracted from Infoboxes using the ontology-based extraction. The predicates in this dataset are in the /ontology/ namespace. Used to be called Mapping Based Properties in previous releases. + 2 specific_mappingbased_properties_en.nq.bz2 30 29-Jun-2012 08:05 11M Infobox data from the ontology-based extraction, using units of measurement more convenient for the resource type, e.g. square kilometres instead of square metres for the area of a city. + 3 labels_en.nq.bz2 16 25-Jul-2012 15:29 208M Titles of all Wikipedia Articles in the corresponding language. + 4 short_abstracts_en.nq.bz2 7 25-Jul-2012 18:29 382M Short Abstracts (max. 500 characters long) of Wikipedia articles + 5 long_abstracts_en.nq.bz2 6 25-Jul-2012 15:33 682M Full abstracts of Wikipedia articles, usually the first section. + 7 geo_coordinates_en.nq.bz2 24 28-Jun-2012 21:25 20M Geographic coordinates extracted from Wikipedia. + 9 homepages_en.nq.bz2 29 29-Jun-2012 13:18 13M Links to homepages of persons, organizations etc. +10 persondata_unredirected_en.nq.bz2 21 29-Jun-2012 04:39 72M +14 article_categories_en.nq.bz2 12 28-Jun-2012 22:23 249M Links from concepts to categories using the SKOS vocabulary +14 category_labels_en.nq.bz2 27 29-Jun-2012 11:00 16M Labels for Categories. +16 external_links_en.nq.bz2 18 28-Jun-2012 21:23 185M Links to external web pages about a concept. +16 page_links_unredirected_en.nq.bz2 2 29-Jun-2012 00:15 1700G Dataset containing internal links between DBpedia instances. The dataset was created from the internal links between Wikipedia articles. The dataset might be useful for structural analysis, data mining or for ranking DBpedia instances using Page Rank or similar algorithms. +16 redirects_transitive_en.nt.bz2 1 12-Jul-2012 11:00 92M Redirects dataset in which multiple redirects have been resolved and redirect cycles have been removed. +17 disambiguations_unredirected_en.nq.bz2 28 29-Jun-2012 14:49 15M Links extracted from Wikipedia disambiguation pages. Since Wikipedia has no syntax to distinguish disambiguation links from ordinary links, DBpedia has to use heuristics. +18 page_ids_en.nq.bz2 15 27-Jul-2012 22:58 216M Dataset linking a DBpedia resource to the page ID of the Wikipedia article the data was extracted from. +l1 geonames_links.nt.bz2 40 xx xxM Links between geographic places in DBpedia and data about them from GeoNames. Links created by Silk link specifications. +n1 topical_concepts_unredirected_en.nq.bz2 31 09-Jul-2012 18:37 1.7M + + - 6 images_en.nq.bz2 20 29-Jun-2012 01:20 103M Main image and corresponding thumbnail from Wikipedia article. + - 8 infobox_properties_en.nq.bz2 4 25-Jul-2012 14:49 723M Information that has been extracted from Wikipedia infoboxes. Note that this data is in the less clean /property/ namespace. The Ontology Infobox Properties (/ontology/ namespace) should always be preferred over this data. + - 8 infobox_properties_unredirected_en.nq.bz2 5 29-Jun-2012 01:06 722M Information that has been extracted from Wikipedia infoboxes. Note that this data is in the less clean /property/ namespace. The Ontology Infobox Properties (/ontology/ namespace) should always be preferred over this data. + - 8 infobox_property_definitions_en.nq.bz2 39 29-Jun-2012 03:38 1.4M All properties / predicates used in infoboxes. + - 8 infobox_test_en.nq.bz2 9 28-Jun-2012 22:14 262M + - 1 mappingbased_properties_en.nq.bz2 11 25-Jul-2012 15:39 251M High-quality data extracted from Infoboxes using the ontology-based extraction. The predicates in this dataset are in the /ontology/ namespace. Used to be called Mapping Based Properties in previous releases. + -10 pnd_en.nq.bz2 33 29-Jun-2012 14:49 45K Dataset containing PND (Personennamendatei) identifiers. + -12 interlanguage_links_en.nq.bz2 17 25-Jul-2012 15:15 205M Dataset linking a DBpedia resource to the same or a related resource in other languages, extracted from the inter-language links of a Wikipedia article. + -15 skos_categories_en.nq.bz2 23 29-Jun-2012 04:17 41M Information which concept is a category and how categories are related using the SKOS Vocabulary. + -16 redirects_en.nq.bz2 19 12-Jul-2012 12:14 119M Dataset containing redirects between articles in Wikipedia. + -16 page_links_en.nq.bz2 3 25-Jul-2012 16:25 1700G Dataset containing internal links between DBpedia instances. The dataset was created from the internal links between Wikipedia articles. The dataset might be useful for structural analysis, data mining or for ranking DBpedia instances using Page Rank or similar algorithms. + -17 disambiguations_en.nq.bz2 26 25-Jul-2012 14:30 16M Links extracted from Wikipedia disambiguation pages. Since Wikipedia has no syntax to distinguish disambiguation links from ordinary links, DBpedia has to use heuristics. + -17 iri_same_as_uri_en.nq.bz2 25 25-Jul-2012 17:39 16M owl:sameAs links between the IRI and URI format of DBpedia resources. Only extracted when IRI and URI are actually different. + -19 revision_ids_en.nq.bz2 14 27-Jul-2012 23:07 225M Dataset linking a DBpedia resource to the revision ID of the Wikipedia article the data was extracted from. Until DBpedia 3.7, these files had names like 'revisions_en.nt'. Since DBpedia 3.8, they were renamed to 'revisions_ids_en.nt' to distinguish them from the new 'revision_uris_en.nt' files. + -19 revision_uris_en.nq.bz2 13 27-Jul-2012 23:17 243M Dataset linking DBpedia resource to the specific Wikipedia article revision used in this DBpedia release. + -10 persondata_en.nq.bz2 22 25-Jul-2012 18:26 72M Information about persons (date and place of birth etc.) extracted from the English and German Wikipedia, represented using the FOAF vocabulary. + -n1 topical_concepts_en.nq.bz2 32 25-Jul-2012 18:32 1.7M We tokenize all Wikipedia paragraphs linking to DBpedia resources and aggregate them in a Vector Space Model of terms weighted by their co-occurrence with the target resource. We use those vectors to select the strongest related terms and build topic signatures for those entities. + -16 wikipedia_links_en.nq.bz2 8 29-Jun-2012 04:05 311M Dataset linking DBpedia resource to corresponding article in Wikipedia. + +parallel -j3 wget -nc -x http://downloads.dbpedia.org/3.8/en/{} ::: contents-all.txt contents-nq.txt contents-nt.txt contents-tql.txt contents-ttl.txt instance_types_en.nq.bz2 mappingbased_properties_unredirected_en.nq.bz2 specific_mappingbased_properties_en.nq.bz2 labels_en.nq.bz2 short_abstracts_en.nq.bz2 long_abstracts_en.nq.bz2 geo_coordinates_en.nq.bz2 homepages_en.nq.bz2 persondata_unredirected_en.nq.bz2 article_categories_en.nq.bz2 category_labels_en.nq.bz2 external_links_en.nq.bz2 redirects_transitive_en.nt.bz2 wikipedia_links_en.nq.bz2 disambiguations_unredirected_en.nq.bz2 page_ids_en.nq.bz2 topical_concepts_unredirected_en.nq.bz2 + +parallel -j3 wget -nc -x http://downloads.dbpedia.org/3.8/links/{} ::: geonames_links.nt.bz2 musicbrainz_links.nt.bz2 nytimes_links.nt.bz2 uscensus_links.nt.bz2 wordnet_links.nt.bz2 yago_links.nt.bz2 + + + + +Links to Music Brainz +Links to New York Times +Links to US Census +Links to Word Net Classes + +