Skip to content

Commit

Permalink
example fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
denizyuret committed Aug 20, 2018
1 parent b2f3a27 commit 78d3a96
Show file tree
Hide file tree
Showing 39 changed files with 981 additions and 704 deletions.
File renamed without changes.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ log
*.job
*.h5
*.jld
*.jld2
test1
test2
*.so
Expand Down
18 changes: 15 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,12 +1,24 @@
name = "Knet"
uuid = "1902f260-5fb4-5aff-8c31-6271790ab950"
author = ["Deniz Yuret <denizyuret@gmail.com>"]
version = "1.0.0"

[deps]
AutoGrad = "6710c13c-97f1-543f-91c5-74e8f7d95b35"
CUDAapi = "3895d2a7-ec45-59b8-82bb-cfc6a382f9b3"
Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"

[compat]
AutoGrad = "1.0.0"
CUDAapi = "0.5.0"
SpecialFunctions = "0.7.0"

[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test"]
16 changes: 9 additions & 7 deletions data/cifar.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
using Knet

"Where to download cifar from"
cifarurl = "http://www.cs.toronto.edu/~kriz"

"Where to download cifar to"
cifardir = Pkg.dir("Knet","data","cifar")
cifardir = Knet.dir("data", "cifar")

"cifar10() => (xtrn,ytrn,xtst,ytst,labels)"
function cifar10(;
Expand All @@ -13,7 +15,7 @@ function cifar10(;
lbl="batches.meta.txt",
)
global _cifar10_xtrn, _cifar10_ytrn, _cifar10_xtst, _cifar10_ytst, _cifar10_lbls
if !isdefined(:_cifar10_xtrn)
if !(@isdefined _cifar10_xtrn)
_cifar10_xtrn, _cifar10_ytrn, _cifar10_xtst, _cifar10_ytst, _cifar10_lbls = _cifar_read_tgz(tgz,dir,trn,tst,lbl)
end
return _cifar10_xtrn, _cifar10_ytrn, _cifar10_xtst, _cifar10_ytst, _cifar10_lbls
Expand All @@ -28,7 +30,7 @@ function cifar100(;
lbl="fine_label_names.txt",
)
global _cifar100_xtrn, _cifar100_ytrn, _cifar100_xtst, _cifar100_ytst, _cifar100_lbls
if !isdefined(:_cifar100_xtrn)
if !(@isdefined _cifar100_xtrn)
_cifar100_xtrn, _cifar100_ytrn, _cifar100_xtst, _cifar100_ytst, _cifar100_lbls = _cifar_read_tgz(tgz,dir,trn,tst,lbl)
end
return _cifar100_xtrn, _cifar100_ytrn, _cifar100_xtst, _cifar100_ytst, _cifar100_lbls
Expand All @@ -38,7 +40,7 @@ end
cifarview(x,i)=colorview(RGB,permutedims(x[:,:,:,i],(3,2,1)))

function _cifar_read_tgz(tgz,dir,trn,tst,labels)
info("Reading $tgz...")
@info("Reading $tgz...")
if !isdir(cifardir)
mkpath(cifardir)
end
Expand All @@ -63,14 +65,14 @@ function _cifar_read_files(dir,files)
x,y = _cifar_read_file(dir,file)
push!(xs,x); push!(ys,y)
end
return cat(4, xs...), vcat(ys...)
return cat(xs..., dims=4), vcat(ys...)
end

function _cifar_read_file(dir,file)
a = read(joinpath(dir,file))
d = contains(dir,"cifar-100") ? 1 : 0
d = occursin("cifar-100",dir) ? 1 : 0
a = reshape(a, (3073+d, div(length(a),3073+d)))
y = a[1+d,:] + 0x1 # first row (second for cifar100) is Int8 index representation of correct answers
y = a[1+d,:] .+ 0x1 # first row (second for cifar100) is Int8 index representation of correct answers
x = a[2+d:end,:] # rows 2:end (3:end for cifar100) give 32,32,3,N images
# y = full(sparse(y,1:length(y),1f0,10,length(y))) # one-hot vector representation
# maybe convert y to int?
Expand Down
14 changes: 5 additions & 9 deletions data/fashion-mnist.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,10 @@
for p in ("GZip",)
Pkg.installed(p) == nothing && Pkg.add(p)
end

using GZip
using GZip, Knet

"Where to download fmnist from"
fmnisturl = "https://github.com/zalandoresearch/fashion-mnist/raw/master/data/fashion"

"Where to download fmnist to"
fmnistdir = Pkg.dir("Knet","data","fashion-mnist")
fmnistdir = Knet.dir("data","fashion-mnist")

"""
Expand Down Expand Up @@ -36,8 +32,8 @@ descriptions are shown below.

function fmnist()
global _fmnist_xtrn,_fmnist_ytrn,_fmnist_xtst,_fmnist_ytst,_fmnist_lbls
if !isdefined(:_fmnist_xtrn)
info("Loading FMNIST...")
if !(@isdefined _fmnist_xtrn)
@info("Loading FMNIST...")
_fmnist_xtrn = _fmnist_xdata("train-images-idx3-ubyte.gz")
_fmnist_xtst = _fmnist_xdata("t10k-images-idx3-ubyte.gz")
_fmnist_ytrn = _fmnist_ydata("train-labels-idx1-ubyte.gz")
Expand All @@ -56,7 +52,7 @@ function _fmnist_xdata(file)
end

function _fmnist_ydata(file)
_fmnist_gzload(file)[9:end] + 0x1
_fmnist_gzload(file)[9:end] .+ 0x1
end

function _fmnist_gzload(file)
Expand Down
4 changes: 2 additions & 2 deletions data/gutenberg.jl
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
using Random
using Random, Knet

"Where to download gutenberg from"
gutenbergurl = "http://www.gutenberg.org/files"


"Where to download gutenberg to"
gutenbergdir = joinpath(dirname(pathof(Knet)), "..", "data", "gutenberg")
gutenbergdir = Knet.dir("data", "gutenberg")


"Download text from Project Gutenberg and return contents as String."
Expand Down
8 changes: 5 additions & 3 deletions data/housing.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using DelimitedFiles, Statistics, Knet

"""
housing([test]; [url, file])
Expand All @@ -18,18 +20,18 @@ deviation.
"""
function housing(test=0.0;
file=Pkg.dir("Knet","data","housing","housing.data"),
file=Knet.dir("data", "housing", "housing.data"),
url="https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data")
if !isfile(file)
isdir(dirname(file)) || mkpath(dirname(file))
info("Downloading $url to $file")
@info("Downloading $url to $file")
download(url, file)
end
data = readdlm(file)'
# @show size(data) # (14,506)
x = data[1:13,:]
y = data[14:14,:]
x = (x .- mean(x,2)) ./ std(x,2) # Data normalization
x = (x .- mean(x,dims=2)) ./ std(x,dims=2) # Data normalization
if test == 0
xtrn = xtst = x
ytrn = ytst = y
Expand Down
4 changes: 0 additions & 4 deletions data/imagenet.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
for p in ("MAT","Images")
Pkg.installed(p) == nothing && Pkg.add(p)
end

using MAT,Images

_mcnurl = "http://www.vlfeat.org/matconvnet/models"
Expand Down
28 changes: 12 additions & 16 deletions data/imdb.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,8 @@
# Also see https://github.com/fchollet/keras/raw/master/examples/imdb_lstm.py
# Also see https://github.com/ilkarman/DeepLearningFrameworks/raw/master/common/utils.py

for p in ("PyCall","JSON","JLD")
Pkg.installed(p) == nothing && Pkg.add(p)
end

using PyCall,JSON,JLD
using PyCall,JSON,JLD2,Random,Knet
@pyimport numpy as np

"""
Expand All @@ -28,37 +25,36 @@ https://keras.io/datasets and return (xtrn,ytrn,xtst,ytst,dict) tuple.
"""
function imdb(;
url = "https://s3.amazonaws.com/text-datasets",
dir = Pkg.dir("Knet","data","imdb"),
dir = Knet.dir("data", "imdb"),
data="imdb.npz",
dict="imdb_word_index.json",
jld="imdb.jld",
jld2="imdb.jld2",
maxval=nothing,
maxlen=nothing,
seed=0, oov=true, stoken=true, pad=true
)
global _imdb_xtrn,_imdb_ytrn,_imdb_xtst,_imdb_ytst,_imdb_dict
if !isdefined(:_imdb_xtrn)
if !(@isdefined _imdb_xtrn)
isdir(dir) || mkpath(dir)
jldpath = joinpath(dir,jld)
if !isfile(jldpath)
info("Downloading IMDB...")
jld2path = joinpath(dir,jld2)
if !isfile(jld2path)
@info("Downloading IMDB...")
datapath = joinpath(dir,data)
dictpath = joinpath(dir,dict)
isfile(datapath) || download("$url/$data",datapath)
isfile(dictpath) || download("$url/$dict",dictpath)
@pyimport numpy as np
d = np.load(datapath)
_imdb_xtrn = map(a->np.asarray(a,dtype=np.int32), get(d, "x_train"))
_imdb_ytrn = Array{Int8}(get(d, "y_train") .+ 1)
_imdb_xtst = map(a->np.asarray(a,dtype=np.int32), get(d, "x_test"))
_imdb_ytst = Array{Int8}(get(d, "y_test") .+ 1)
_imdb_dict = Dict{String,Int32}(JSON.parsefile(dictpath))
JLD.@save jldpath _imdb_xtrn _imdb_ytrn _imdb_xtst _imdb_ytst _imdb_dict
JLD2.@save jld2path _imdb_xtrn _imdb_ytrn _imdb_xtst _imdb_ytst _imdb_dict
#rm(datapath)
#rm(dictpath)
end
info("Loading IMDB...")
JLD.@load jldpath _imdb_xtrn _imdb_ytrn _imdb_xtst _imdb_ytst _imdb_dict
@info("Loading IMDB...")
JLD2.@load jld2path _imdb_xtrn _imdb_ytrn _imdb_xtst _imdb_ytst _imdb_dict
end
if seed != 0; srand(seed); end
xs = [_imdb_xtrn;_imdb_xtst]
Expand All @@ -85,7 +81,7 @@ function imdb(;
xi = xi[end-maxlen+1:end]
end
if pad && length(xi) < maxlen
xi = append!(repmat([pad_token], maxlen-length(xi)), xi)
xi = append!(repeat([pad_token], maxlen-length(xi)), xi)
end
newx[i] = xi
end
Expand Down
9 changes: 5 additions & 4 deletions data/mikolovptb.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using Knet
mikolovptburl = "http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz"
mikolovptbdir = Pkg.dir("Knet","data","mikolovptb")
mikolovptbdir = Knet.dir("data","mikolovptb")
mikolovptbtrn = "ptb.train.txt"
mikolovptbdev = "ptb.valid.txt"
mikolovptbtst = "ptb.test.txt"
Expand All @@ -21,10 +22,10 @@ been lowercased and reduced to a 10K vocabulary size. Return a tuple
"""
function mikolovptb()
global _mptb_trn, _mptb_dev, _mptb_tst, _mptb_vocab
if !isdefined(:_mptb_trn)
if !(@isdefined _mptb_trn)
isdir(mikolovptbdir) || mkpath(mikolovptbdir)
if !isfile(joinpath(mikolovptbdir, "ptb.train.txt"))
info("Downloading $mikolovptburl")
@info("Downloading $mikolovptburl")
tgz = download(mikolovptburl)
run(`tar --strip-components 3 -C $mikolovptbdir -xzf $tgz ./simple-examples/data/ptb.train.txt ./simple-examples/data/ptb.valid.txt ./simple-examples/data/ptb.test.txt`)
end
Expand All @@ -43,7 +44,7 @@ function mikolovptb()
push!(data, sentences)
end
_mptb_trn, _mptb_dev, _mptb_tst = data
_mptb_vocab = Array{String}(length(dict))
_mptb_vocab = Array{String}(undef, length(dict))
for (k,v) in dict; _mptb_vocab[v] = k; end
end
return _mptb_trn, _mptb_dev, _mptb_tst, _mptb_vocab
Expand Down
9 changes: 2 additions & 7 deletions data/treebank.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,10 @@
for p in ("ZipFile",)
Pkg.installed(p) == nothing && Pkg.add(p)
end

using ZipFile
using ZipFile, Knet

"Where to download dataset from"
const TREEBANK_URL = "https://nlp.stanford.edu/sentiment/trainDevTestTrees_PTB.zip"

"Where to download dataset to"
const TREEBANK_DIR = Pkg.dir("Knet","data","treebank")
const TREEBANK_DIR = Knet.dir("data","treebank")

const TREEBANK_ZIPNAME = "trainDevTestTrees_PTB.zip"
const TREEBANK_SPLITS = ("train", "dev", "test")
Expand Down Expand Up @@ -39,7 +35,6 @@ make_data!(trn, w2i, l2i)
```
"""

mutable struct SentimentTree
label
children
Expand Down
10 changes: 5 additions & 5 deletions data/wikiner.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using Knet
const WIKINER_DOWNLOAD_PREFIX =
"https://github.com/neulab/dynet-benchmark/raw/master/data/tags/"
const WIKINER_DIR = Pkg.dir("Knet","data","wikiner")
const WIKINER_DIR = Knet.dir("data","wikiner")
const WIKINER_FILES = ("train.txt","dev.txt")
const UNK = "_UNK_"
const PAD = "<*>"
Expand Down Expand Up @@ -35,7 +36,6 @@ data = WikiNERData()
```
"""

mutable struct WikiNERData
trn
dev
Expand Down Expand Up @@ -100,7 +100,7 @@ let
end

function parse_line(line)
return map(x->split(x,"|"), split(replace(line,"\n",""), " "))
return map(x->split(x,"|"), split(replace(line,"\n"=>""), " "))
end

function get_words_tags_chars(trn)
Expand All @@ -109,15 +109,15 @@ let
for (word,tag) in sample
push!(words, word)
push!(tags, tag)
push!(chars, convert(Array{UInt8,1}, word)...)
push!(chars, convert(Array{UInt8,1}, codeunits(word))...)
end
end
push!(chars, PAD)
return words, tags, chars
end

function filter_words(wordcounts,minoccur)
filtered_words = filter((w,c)-> c >= minoccur, wordcounts)
filtered_words = filter(x-> x[2] >= minoccur, wordcounts)
filtered_words = collect(keys(filtered_words))
!in(UNK, filtered_words) && push!(filtered_words, UNK)
return filtered_words
Expand Down

0 comments on commit 78d3a96

Please sign in to comment.