Fix memory overlap issue in copy_ of linear_kernel_output (#2627)

* Fix memory overlap issue in copy_ of linear_kernel * replace is_same with data_ptr check
intel · Mar 6, 2024 · bc32ea4 · bc32ea4
1 parent fac2423
commit bc32ea4
Show file tree

Hide file tree

Showing 2 changed files with 18 additions and 1 deletion.
diff --git a/csrc/cpu/aten/Linear.cpp b/csrc/cpu/aten/Linear.cpp
@@ -87,7 +87,7 @@ void linear_kernel_output(
   if (self.dim() != 2) {
     output_ = output_.reshape(output_size);
   }
-  if (!out_is_contiguous || !output.is_same(output_)) {
+  if (!out_is_contiguous || output.data_ptr() != output_.data_ptr()) {
     output.copy_(output_);
   }
 }

diff --git a/tests/cpu/test_jit.py b/tests/cpu/test_jit.py
@@ -761,6 +761,18 @@ def forward(self, x):
         return torch.add(self.linear(x), self.linear1(x1))
 
 
+class LinearAdd2(nn.Module):
+    def __init__(self, in_channels, out_channels, **kwargs):
+        super(LinearAdd2, self).__init__()
+        seed = 2018
+        torch.manual_seed(seed)
+        self.linear = nn.Linear(in_channels, out_channels, **kwargs)
+
+    def forward(self, x):
+        y = x.clone().unsqueeze(0).permute(2, 1, 0, 3).squeeze(0)
+        return self.linear(x) + y
+
+
 class LinearAddRelu(nn.Module):
     def __init__(self, in_channels, mid_channels, out_channels, inplace, **kwargs):
         super(LinearAddRelu, self).__init__()
@@ -4480,6 +4492,11 @@ def test_output_linear_add(self):
             torch.rand(32, 3),
             kind_in_graph="ipex_prepack::linear_add_run",
         )
+        self._test_dnnl_fp32(
+            LinearAdd2(3, 3, bias=False),
+            torch.rand(3, 1, 3),
+            kind_in_graph="ipex_prepack::linear_add_run",
+        )
         self._test_output_lowp(
             LinearAdd(3, 32, bias=True),
             torch.rand(32, 3),