Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Comparing changes

Choose two branches to see what's changed or to start a new pull request. If you need to, you can also compare across forks.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also compare across forks.
base fork: infobrightdb/ibwikistats
base: eea8fe0759
...
head fork: infobrightdb/ibwikistats
compare: 6b2116c22c
Checking mergeability… Don't worry, you can still create the pull request.
  • 3 commits
  • 6 files changed
  • 0 commit comments
  • 1 contributor
View
17 infobright/ddl.sql
@@ -0,0 +1,17 @@
+ CREATE TABLE `pagecounts` (
+ `summarytime` datetime DEFAULT NULL,
+ `projectcode` varchar(25) DEFAULT NULL,
+ `pagename` varchar(1024) DEFAULT NULL,
+ `pageviews` bigint(20) DEFAULT NULL,
+ `bytes` bigint(20) DEFAULT NULL
+) ENGINE=BRIGHTHOUSE;
+
+ CREATE TABLE `types` (
+ `c1` varchar(5) DEFAULT NULL,
+ `wikitype` varchar(128) DEFAULT NULL
+) ENGINE=BRIGHTHOUSE;
+
+ CREATE TABLE `langlookup` (
+ `languagecode` varchar(25) DEFAULT NULL,
+ `languagename` varchar(255) DEFAULT NULL
+) ENGINE=BRIGHTHOUSE DEFAULT CHARSET=utf8 ;
View
13 infobright/getdata.pl
@@ -0,0 +1,13 @@
+#!/usr/bin/perl
+
+use lib qw(./lib);
+use ibLib;
+
+my $outfile = makefilename(".gz");
+my $URL = makeurl($outfile);
+print "#".$URL ;
+print "\n#getting now...\n";
+getdata( $URL, $outfile );
+print "\n\n";
+print "gunzip $outfile\n";
+
View
281 infobright/langlookup.txt
@@ -0,0 +1,281 @@
+"en";"English"
+"de";"German"
+"fr";"French"
+"it";"Italian"
+"pl";"Polish"
+"es";"Spanish"
+"ja";"Japanese"
+"ru";"Russian"
+"nl";"Dutch"
+"pt";"Portuguese"
+"sv";"Swedish"
+"zh";"Chinese"
+"ca";"Catalan"
+"no";"Norwegian (Bokmål)"
+"uk";"Ukrainian"
+"fi";"Finnish"
+"vi";"Vietnamese"
+"cs";"Czech"
+"hu";"Hungarian"
+"tr";"Turkish"
+"id";"Indonesian"
+"ko";"Korean"
+"ro";"Romanian"
+"fa";"Persian"
+"da";"Danish"
+"ar";"Arabic"
+"eo";"Esperanto"
+"sr";"Serbian"
+"lt";"Lithuanian"
+"sk";"Slovak"
+"ms";"Malay"
+"he";"Hebrew"
+"vo";"Volapük"
+"bg";"Bulgarian"
+"sl";"Slovenian"
+"war";"Waray-Waray"
+"eu";"Basque"
+"hr";"Croatian"
+"hi";"Hindi"
+"et";"Estonian"
+"gl";"Galician"
+"simple";"Simple English"
+"new";"Newar / Nepal Bhasa"
+"nn";"Norwegian (Nynorsk)"
+"th";"Thai"
+"az";"Azerbaijani"
+"el";"Greek"
+"roa-rup";"Aromanian"
+"la";"Latin"
+"ht";"Haitian"
+"tl";"Tagalog"
+"ka";"Georgian"
+"te";"Telugu"
+"mk";"Macedonian"
+"ceb";"Cebuano"
+"nap";"Neapolitan"
+"sh";"Serbo-Croatian"
+"pms";"Piedmontese"
+"br";"Breton"
+"be-x-old";"Belarusian (Taraškievica)"
+"lv";"Latvian"
+"mr";"Marathi"
+"jv";"Javanese"
+"lb";"Luxembourgish"
+"ta";"Tamil"
+"sq";"Albanian"
+"cy";"Welsh"
+"is";"Icelandic"
+"bs";"Bosnian"
+"be";"Belarusian"
+"oc";"Occitan"
+"an";"Aragonese"
+"bpy";"Bishnupriya Manipuri"
+"bn";"Bengali"
+"io";"Ido"
+"sw";"Swahili"
+"kk";"Kazakh"
+"lmo";"Lombard"
+"fy";"West Frisian"
+"gu";"Gujarati"
+"ml";"Malayalam"
+"af";"Afrikaans"
+"nds";"Low Saxon"
+"mg";"Malagasy"
+"ur";"Urdu"
+"scn";"Sicilian"
+"qu";"Quechua"
+"ku";"Kurdish"
+"zh-yue";"Cantonese"
+"su";"Sundanese"
+"ast";"Asturian"
+"ne";"Nepali"
+"hy";"Armenian"
+"pnb";"Western Panjabi"
+"yo";"Yoruba"
+"bat-smg";"Samogitian"
+"ga";"Irish"
+"cv";"Chuvash"
+"wa";"Walloon"
+"kn";"Kannada"
+"am";"Amharic"
+"als";"Alemannic"
+"tg";"Tajik"
+"vec";"Venetian"
+"roa-tara";"Tarantino"
+"zh-min-nan";"Min Nan"
+"yi";"Yiddish"
+"tt";"Tatar"
+"bug";"Buginese"
+"gd";"Scottish Gaelic"
+"os";"Ossetian"
+"uz";"Uzbek"
+"sah";"Sakha"
+"pam";"Kapampangan"
+"arz";"Egyptian Arabic"
+"mi";"Maori"
+"li";"Limburgian"
+"hsb";"Upper Sorbian"
+"sco";"Scots"
+"nah";"Nahuatl"
+"mn";"Mongolian"
+"co";"Corsican"
+"gan";"Gan"
+"glk";"Gilaki"
+"my";"Burmese"
+"ia";"Interlingua"
+"bcl";"Central_Bicolano"
+"fo";"Faroese"
+"sa";"Sanskrit"
+"si";"Sinhalese"
+"fiu-vro";"Võro"
+"nds-nl";"Dutch Low Saxon"
+"vls";"West Flemish"
+"tk";"Turkmen"
+"bar";"Bavarian"
+"ckb";"Sorani"
+"mrj";"Hill Mari"
+"gv";"Manx"
+"ilo";"Ilokano"
+"se";"Northern Sami"
+"map-bms";"Banyumasan"
+"dv";"Divehi"
+"nrm";"Norman"
+"pag";"Pangasinan"
+"diq";"Zazaki"
+"hif";"Fiji Hindi"
+"rm";"Romansh"
+"mzn";"Mazandarani"
+"bo";"Tibetan"
+"wuu";"Wu"
+"fur";"Friulian"
+"ug";"Uyghur"
+"lij";"Ligurian"
+"mt";"Maltese"
+"csb";"Kashubian"
+"km";"Khmer"
+"bh";"Bihari"
+"ang";"Anglo-Saxon"
+"lad";"Ladino"
+"nov";"Novial"
+"udm";"Udmurt"
+"sc";"Sardinian"
+"zh-classical";"Classical Chinese"
+"cbk-zam";"Zamboanga Chavacano"
+"ps";"Pashto"
+"mhr";"Meadow Mari"
+"pi";"Pali"
+"kv";"Komi"
+"ksh";"Ripuarian"
+"frp";"Franco-Provençal/Arpitan"
+"rue";"Rusyn"
+"hak";"Hakka"
+"kw";"Cornish"
+"nv";"Navajo"
+"pa";"Punjabi"
+"so";"Somali"
+"szl";"Silesian"
+"xal";"Kalmyk"
+"ie";"Interlingue"
+"rw";"Kinyarwanda"
+"koi";"Komi-Permyak"
+"stq";"Saterland Frisian"
+"haw";"Hawaiian"
+"pdc";"Pennsylvania German"
+"ln";"Lingala"
+"krc";"Karachay-Balkar"
+"to";"Tongan"
+"pcd";"Picard"
+"ext";"Extremaduran"
+"crh";"Crimean Tatar"
+"ky";"Kirghiz"
+"ace";"Acehnese"
+"myv";"Erzya"
+"gn";"Guarani"
+"ba";"Bashkir"
+"eml";"Emilian-Romagnol"
+"ce";"Chechen"
+"arc";"Assyrian Neo-Aramaic"
+"pap";"Papiamentu"
+"ay";"Aymara"
+"kl";"Greenlandic"
+"bjn";"Banjar"
+"frr";"North Frisian"
+"jbo";"Lojban"
+"pfl";"Palatinate German"
+"wo";"Wolof"
+"or";"Oriya"
+"tpi";"Tok Pisin"
+"kab";"Kabyle"
+"ty";"Tahitian"
+"srn";"Sranan"
+"zea";"Zealandic"
+"gag";"Gagauz"
+"dsb";"Lower Sorbian"
+"ab";"Abkhazian"
+"lo";"Lao"
+"ig";"Igbo"
+"mdf";"Moksha"
+"tet";"Tetum"
+"av";"Avar"
+"kg";"Kongo"
+"mwl";"Mirandese"
+"rmy";"Romani"
+"ltg";"Latgalian"
+"cu";"Old Church Slavonic"
+"lbe";"Lak"
+"kaa";"Karakalpak"
+"kbd";"Kabardian Circassian"
+"sm";"Samoan"
+"na";"Nauruan"
+"mo";"Moldovan"
+"got";"Gothic"
+"bm";"Bambara"
+"as";"Assamese"
+"ik";"Inupiak"
+"sd";"Sindhi"
+"bi";"Bislama"
+"ks";"Kashmiri"
+"iu";"Inuktitut"
+"pih";"Norfolk"
+"ss";"Swati"
+"pnt";"Pontic"
+"chr";"Cherokee"
+"cdo";"Min Dong"
+"ee";"Ewe"
+"ha";"Hausa"
+"ti";"Tigrinya"
+"bxr";"Buryat (Russia)"
+"za";"Zhuang"
+"om";"Oromo"
+"zu";"Zulu"
+"ve";"Venda"
+"ts";"Tsonga"
+"rn";"Kirundi"
+"sg";"Sango"
+"dz";"Dzongkha"
+"tum";"Tumbuka"
+"cr";"Cree"
+"ch";"Chamorro"
+"lg";"Luganda"
+"fj";"Fijian"
+"ny";"Chichewa"
+"st";"Sesotho"
+"xh";"Xhosa"
+"ff";"Fula"
+"tn";"Tswana"
+"ki";"Kikuyu"
+"sn";"Shona"
+"ak";"Akan"
+"chy";"Cheyenne"
+"tw";"Twi"
+"ng";"Ndonga"
+"ii";"Sichuan Yi"
+"cho";"Choctaw"
+"mh";"Marshallese"
+"aa";"Afar"
+"kj";"Kuanyama"
+"ho";"Hiri Motu"
+"mus";"Muscogee"
+"kr";"Kanuri"
+"hz";"Herero"
View
36 infobright/loaddata.pl
@@ -0,0 +1,36 @@
+#!/usr/bin/perl
+
+use lib qw(./lib);
+use ibLib;
+
+
+#call getdata
+# unzip
+# call loaddata
+
+my $datadir = '/qa/datasets/wikistats/working/input/';
+my $filename = makefilename();
+my $summarytime = makesummarytime();
+
+my $QUERY = <<SQL;
+
+use wiki1;
+
+set \@bh_dataformat='mysql';
+
+load data infile '$datadir/$filename' into table pagecounts
+fields terminated by ' '
+(
+ projectcode ,
+ pagename ,
+ pageviews ,
+ bytes
+)
+set summarytime='$summarytime'
+
+;
+SQL
+
+print $QUERY;
+
+print "\n\n\n";
View
9 infobright/types.txt
@@ -0,0 +1,9 @@
+"";"wikipedia"
+"b";"wikibooks"
+"d";"wiktionary"
+"mw";"mobile"
+"n";"wikinews"
+"q";"wikiquite"
+"s";"wikisource"
+"v";"wikiversity"
+"m";"wikimedia"
View
96 lib/ibLib.pm
@@ -0,0 +1,96 @@
+#!/usr/bin/perl
+
+use warnings;
+use strict;
+
+use Getopt::Long;
+use Data::Dumper;
+use WWW::Curl::Easy;
+
+## defaults
+
+$| = 1;
+
+my $DEBUG = 0;
+my $data = "./data";
+my $database = "wiki1";
+my $host = "10.20.30.81";
+my $port = "54321";
+my $user = "wiki";
+my $password = "pedia";
+my $year = 2011;
+my $month = 5;
+my $day = 25;
+my $hour = 20;
+
+GetOptions(
+ 'data=s' => \$data,
+ 'DEBUG=s' => \$DEBUG,
+ 'db=s' => \$database,
+ 'host=s' => \$host,
+ 'user=s' => \$user,
+ 'password=s' => \$password,
+ 'year=s' => \$year,
+ 'month=s' => \$month,
+ 'day=s' => \$day,
+ 'hour=s' => \$hour,
+
+);
+
+sub makefilename {
+ my ($suffix) = @_;
+ my $filename = "pagecounts-";
+ if ( $month < 10 ) { $month = "0" . $month }
+ if ( $day < 10 ) { $day = "0" . $day }
+ if ( $hour < 10 ) { $hour = "0" . $hour }
+ $filename .= $year . $month . $day . "-" . $hour . "0000";
+ return $filename . $suffix;
+}
+
+sub makesummarytime {
+ my $stime = $year . "-" . $month . "-" . $day . " " . $hour . ":00:00";
+ return $stime;
+}
+
+=pod
+data source URL example:
+http://dammit.lt/wikistats/pagecounts-20110601-200000.gz
+
+second set is a rollup query on above, subset of above data
+http://dammit.lt/wikistats/projectcounts-20110531-210000
+
+=cut
+
+sub getdata {
+ ## could use a better module, using module installed on current machine
+ my ( $url, $outfile ) = @_;
+ my $curl = WWW::Curl::Easy->new;
+
+ open( my $mydata, '>', "./$outfile" );
+ $curl->setopt( CURLOPT_URL, $url );
+
+ #$curl->setopt( CURLOPT_READDATA, \$mydata );
+ $curl->setopt( CURLOPT_WRITEDATA, \$mydata );
+ my $retcode = $curl->perform();
+ if ( $retcode == 0 ) {
+
+ #print Dumper($mydata);
+ print "#file retreived\n\n";
+
+ }
+ else {
+ die "error on curl "
+ . $curl->strerror($retcode) . " "
+ . $curl->errbuf . "\n";
+ }
+
+}
+
+sub makeurl {
+ my ($filename) = @_;
+ my $url = "http://dammit.lt/wikistats/";
+ $url .= $filename;
+ return $url;
+}
+
+1;

No commit comments for this range

Something went wrong with that request. Please try again.