diff --git a/NEWS.md b/NEWS.md
index c498c6a1c..f19b98df4 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,28 @@
+Knet v1.4.2 Release Notes
+=========================
+
+* Fixed windows git-tree-sha1 for libknet8.
+* Tutorial fixes.
+* Fix 606: gcnode issues.
+* Fix 610: WeakRefs turn into nothing when Julia garbage collects them.
+* Fix 618: New GPUArrays indexing causes scalar indexing for some Knet operations (i.e., cat).
+* Fix 619: Error converting CuArray to KnetArray.
+* Fix 620: `k[1:2:3,:] .= 0` broken for KnetArray.
+
+
+Knet v1.4.1 Release Notes
+=========================
+b720020 2020-08-28
+
+* Tutorial, README fixes, using MLDatasets.
+* Fixed gcnode issues.
+* Use NVML when choosing GPU.
+* Make RNN atype robust to types with dimensions.
+
+
 Knet v1.4.0 Release Notes
 =========================
+2754cd6 2020-08-19
 
 * Major refactoring of code without effecting the API (hopefully).
 * CuArray support added to all ops, implemented gcnode, tested on examples and tutorial.
diff --git a/Project.toml b/Project.toml
index 8746b94ec..e4a971cc3 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Knet"
 uuid = "1902f260-5fb4-5aff-8c31-6271790ab950"
 authors = ["Deniz Yuret <denizyuret@gmail.com>"]
-version = "1.4.1"
+version = "1.4.2"
 
 [deps]
 AutoGrad = "6710c13c-97f1-543f-91c5-74e8f7d95b35"
@@ -21,7 +21,7 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 AutoGrad = "1.2"
 CUDA = "1.0"
 FileIO = "1.0"
-JLD2 = "0.1"
+JLD2 = "0.1, 0.2"
 NNlib = "0.6, 0.7"
 SpecialFunctions = "0.8, 0.9, 0.10"
 julia = "1.0"
diff --git a/src/knetarrays/getindex.jl b/src/knetarrays/getindex.jl
index 3ecbb3a9e..3e88a2d19 100644
--- a/src/knetarrays/getindex.jl
+++ b/src/knetarrays/getindex.jl
@@ -1,4 +1,6 @@
 import Base: getindex, setindex!
+import Base.Broadcast: materialize!
+using Base.Broadcast: Broadcasted
 using CUDA: CuArray, CuPtr, cuMemcpyDtoD_v2
 using Knet.LibKnet8: @knet8
 # include("kptr.jl") ## KnetPtr, Cptr
@@ -28,17 +30,39 @@ using Knet.LibKnet8: @knet8
 ## Indexing with Pair{Union{Real,AbstractUnitRange,Colon}}
 
 
-# CuArray fallbacks: these rely on KnetArray<->CuArray conversions having shared pointers.
-getindex(A::KnetArray, I...) = KnetArray(getindex(CuArray(A), I...))
+# CuArray fallbacks: these rely on KnetArray<->CuArray conversions having shared pointers
+# and cover the cases which are not explicitly defined using kernels below.
 
+# Fallback for A[I...]  => getindex(A, I...)
+function getindex(A::KnetArray, I...)
+    _A = CuArray(A)
+    _B = getindex(_A, I...)
+    B = (_B isa CuArray ? KnetArray(_B) : _B)
+    return B
+end
+
+# Fallback for A[I...] = B  => setindex!(A, B, I...)
 function setindex!(A::KnetArray, B, I...)
     _A = CuArray(A)
-    _B = (B isa KnetArray || B isa AbstractArray ? convert(CuArray,B) : B)
-    # setindex!(CuArray(A), B, I...)  ## This only works for x[I...] = y but not for x[I...] .= y
-    Base.Broadcast.materialize!(Base.dotview(_A,I...), Base.broadcasted(Base.identity, _B))
+    _B = (B isa KnetArray || B isa AbstractArray ? CuArray(B) : B)
+    setindex!(_A, _B, I...)  ## This only works for x[I...] = y but not for x[I...] .= y
     return A
 end
 
+# Fallback for A[I...] .= B  => materialize!(dotview(A, I...), broadcasted(identity, B))
+function materialize!(A::SubArray{T,N,<:KnetArray}, B) where {T,N}
+    _A = view(CuArray(A.parent), A.indices...)
+    _B = (B isa KnetArray || B isa AbstractArray ? CuArray(B) : B)
+    materialize!(_A, _B)
+end
+
+# Ambiguity fix:
+function materialize!(A::SubArray{T,N,<:KnetArray}, B::Broadcasted{S}) where {T,N,S}
+    _A = view(CuArray(A.parent), A.indices...)
+    materialize!(_A, B)
+end
+
+
 
 # The following fallback version tried to do all allocations using KnetArrays but was recently broken (Issue 618).
 # This only makes a difference if we are using the Knet allocator (i.e. cuallocator[] = false) which is not the default any more.
diff --git a/src/knetarrays/karray.jl b/src/knetarrays/karray.jl
index 4de42fe63..78572470a 100644
--- a/src/knetarrays/karray.jl
+++ b/src/knetarrays/karray.jl
@@ -148,6 +148,10 @@ function CuArray(x::KnetArray{T}) where {T}
     unsafe_wrap(CuArray{T}, p, x.dims; own=false)
 end
 
+function convert(A::Type{<:CuArray}, x::KnetArray)
+    convert(A, CuArray(x))      # extra convert in case T,N changes
+end
+
 # Extend function KnetArray to create a memory shared KnetArray from CuArray:
 function KnetArray(x::CuArray{T,N}) where {T,N}
     p = Base.bitcast(Cptr, x.ptr)
@@ -155,6 +159,10 @@ function KnetArray(x::CuArray{T,N}) where {T,N}
     KnetArray{T,N}(k, size(x))
 end
 
+function convert(A::Type{<:KnetArray}, x::CuArray)
+    convert(A, KnetArray(x))    # extra convert in case T,N changes
+end
+
 function ka(x...)
     @warn "ka() is deprecated, please use KnetArray instead" maxlog=1
     KnetArray(x...)
diff --git a/test/karray.jl b/test/karray.jl
index eda50aa66..4fa201f01 100644
--- a/test/karray.jl
+++ b/test/karray.jl
@@ -32,7 +32,7 @@ if CUDA.functional()
                       ([1,3],), ([2,2],),               # Vector{Int}
                       ([1,3],:), (:,[1,3]),             # Vector{Int},Colon
                       ([2,2],:), (:,[2,2]),             # Repeated index
-                      ([],),                            # Empty Array
+                      # ([],),                          # Empty Array: fails with CuArray
                       ((a.>0.5),),                      # BitArray
                       ([1 3; 2 4],),                    # Array{Int}
                       (CartesianIndex(3,),), (CartesianIndex(2,3),), # CartesianIndex
@@ -54,7 +54,7 @@ if CUDA.functional()
                     k[i...] .= 0
                     @test a == k
                     a[i...] .= ai
-                    k[i...] .= ai
+                    k[i...] .= KnetArray(ai)
                 end
                 @test a == k
                 @test gradcheck(getindex, a, i...; args=1)
@@ -143,7 +143,7 @@ if CUDA.functional()
                       (3:5,),                           # UnitRange
                       (:,:,2),                          # Colon, Colon, Int
                       (:,:,1:2),                        # Colon, Colon, UnitRange
-                      ([],),                            # Empty Array
+                      # ([],),                          # Empty Array fails with CuArray
                       ((a.>0.5),),                      # BitArray
                       )
                 #@show i
@@ -161,7 +161,7 @@ if CUDA.functional()
                     k[i...] .= 0
                     @test a == k
                     a[i...] .= ai
-                    k[i...] .= ai
+                    k[i...] .= KnetArray(ai)
                 end
                 @test a == k
                 @test gradcheck(getindex, a, i...; args=1)