Skip to content
This repository has been archived by the owner on Aug 11, 2020. It is now read-only.

Commit

Permalink
change to unaligned load
Browse files Browse the repository at this point in the history
  • Loading branch information
tqchen committed Oct 17, 2015
1 parent de24af8 commit aa77e19
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 1 deletion.
2 changes: 1 addition & 1 deletion mshadow/extension/implicit_gemm.h
Expand Up @@ -77,7 +77,7 @@ struct Plan<ImplicitGEMMExp<LhsExp, RhsExp, DType>, DType> {
for (index_t j = 0; j < Packet::kSize; ++j) {
rhs_temp[j] = rhs_.Eval(i + j, x);
}
sum = sum + Packet::Load(lhs_temp) * Packet::Load(rhs_temp);
sum = sum + Packet::LoadUnAligned(lhs_temp) * Packet::LoadUnAligned(rhs_temp);
}
DType ret_result = sum.Sum();

Expand Down
4 changes: 4 additions & 0 deletions mshadow/packet/plain-inl.h
Expand Up @@ -30,6 +30,10 @@ struct Packet<DType, kPlain> {
MSHADOW_CINLINE static Packet<DType, kPlain> Load(const DType* src) {
return Packet<DType, kPlain>(*src);
}
// load from address
MSHADOW_CINLINE static Packet<DType, kPlain> LoadUnAligned(const DType* src) {
return Packet<DType, kPlain>(*src);
}
// fill it with value s
MSHADOW_CINLINE Packet<DType, kPlain>& operator=(DType s) {
data_ = s;
Expand Down
7 changes: 7 additions & 0 deletions mshadow/packet/sse-inl.h
Expand Up @@ -32,6 +32,10 @@ struct Packet<float, kSSE2> {
MSHADOW_CINLINE static Packet<float, kSSE2> Load(const float* src) {
return Packet<float, kSSE2>(_mm_load_ps(src));
}
// load from address
MSHADOW_CINLINE static Packet<float, kSSE2> LoadUnAligned(const float* src) {
return Packet<float, kSSE2>(_mm_loadu_ps(src));
}
// fill it with value s
MSHADOW_CINLINE Packet<float, kSSE2>& operator=(float s) {
data_ = _mm_set1_ps(s);
Expand Down Expand Up @@ -73,6 +77,9 @@ struct Packet<double, kSSE2> {
MSHADOW_CINLINE static Packet<double, kSSE2> Load(const double* src) {
return Packet<double, kSSE2>(_mm_load_pd(src));
}
MSHADOW_CINLINE static Packet<double, kSSE2> LoadUnAligned(const double* src) {
return Packet<double, kSSE2>(_mm_loadu_pd(src));
}
// fill it with value s
MSHADOW_CINLINE Packet<double, kSSE2>& operator=(double s) {
data_ = _mm_set1_pd(s);
Expand Down

0 comments on commit aa77e19

Please sign in to comment.