/
ConvPacked.h
135 lines (112 loc) · 4.46 KB
/
ConvPacked.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#pragma once
#include <ATen/Tensor.h>
#include <c10/util/ArrayRef.h>
#include <array>
#include "ContextConvolution.h"
#include "OpContext.h"
namespace torch_ipex {
namespace cpu {
namespace detail {
namespace convolution {
c10::intrusive_ptr<ConvolutionOpContext> createConvolutionPrePackOpContext(
at::Tensor&& weight,
c10::optional<at::Tensor>&& bias,
std::vector<int64_t>&& stride,
std::vector<int64_t>&& padding,
std::vector<int64_t>&& dilation,
std::vector<int64_t>&& kernel_size,
int64_t groups,
int64_t output_channel,
bool weight_is_channels_last,
std::vector<int64_t>&& input_size);
at::Tensor convolution_run(
const at::Tensor& input,
const c10::intrusive_ptr<ConvolutionOpContext>& op_context);
at::Tensor convolution_relu_run(
const at::Tensor& input,
const c10::intrusive_ptr<ConvolutionOpContext>& op_context);
at::Tensor convolution_leaky_relu_run(
const at::Tensor& input,
at::Scalar alpha,
const c10::intrusive_ptr<ConvolutionOpContext>& op_context);
at::Tensor convolution_sigmoid_run(
const at::Tensor& input,
const c10::intrusive_ptr<ConvolutionOpContext>& op_context);
at::Tensor convolution_hardtanh_run(
const at::Tensor& input,
at::Scalar lower_bound,
at::Scalar upper_bound,
const c10::intrusive_ptr<ConvolutionOpContext>& op_context);
at::Tensor convolution_elu_run(
const at::Tensor& input,
at::Scalar alpha,
at::Scalar scale,
at::Scalar input_scale,
const c10::intrusive_ptr<ConvolutionOpContext>& op_context);
at::Tensor convolution_swish_run(
const at::Tensor& input,
const c10::intrusive_ptr<ConvolutionOpContext>& op_context);
at::Tensor convolution_gelu_run(
const at::Tensor& input,
c10::string_view approximate,
const c10::intrusive_ptr<ConvolutionOpContext>& op_context);
at::Tensor convolution_add_run(
const at::Tensor& input,
at::Tensor& accumu,
const c10::optional<at::Scalar>& alpha,
const c10::intrusive_ptr<ConvolutionOpContext>& op_context);
at::Tensor convolution_add_relu_run(
const at::Tensor& input,
at::Tensor& accumu,
const c10::optional<at::Scalar>& alpha,
const c10::intrusive_ptr<ConvolutionOpContext>& op_context);
at::Tensor& convolution_bottleneck_run(
at::Tensor& input,
const c10::intrusive_ptr<ConvolutionOpContext>& op_context1,
const c10::intrusive_ptr<ConvolutionOpContext>& op_context2,
const c10::intrusive_ptr<ConvolutionOpContext>& op_context3);
at::Tensor convolution_bottleneck_run(
const at::Tensor& input,
const c10::intrusive_ptr<ConvolutionOpContext>& op_context1,
const c10::intrusive_ptr<ConvolutionOpContext>& op_context2,
const c10::intrusive_ptr<ConvolutionOpContext>& op_context3,
const c10::intrusive_ptr<ConvolutionOpContext>& op_context4);
ContextConvolution create(
const at::Tensor& weight,
const c10::optional<at::Tensor>& bias,
const at::IntArrayRef stride,
const at::IntArrayRef padding,
const at::IntArrayRef dilation,
const at::IntArrayRef kerel_size,
const int64_t groups,
const int64_t output_channel,
const bool weight_is_channels_last,
const std::vector<int64_t>& input_size,
const ideep::attr_t& attr);
at::Tensor run(
const ContextConvolution& context,
const at::Tensor& input,
const ideep::attr_t& attr);
at::Tensor& run(
const ContextConvolution& context,
const at::Tensor& input,
at::Tensor& accumu,
const ideep::attr_t& attr);
// Runing backward for conv by given grad_output, input and grad_masks.
// Will using the mkldnn_weight/bias stored in the context
std::tuple<at::Tensor, at::Tensor, at::Tensor> run_backward(
ContextConvolution& context,
const at::Tensor& input,
const at::Tensor& grad_output,
std::array<bool, 3> output_mask);
// Return the n-D ATen weight which sharing same memory with the mkldnn packed
// weight This n-D ATen weight will be used for autograd and optimizer update
at::Tensor get_at_packed_weight(ContextConvolution& context);
// Pack given tensor to same format with mkldnn packed weight
at::Tensor pack(ContextConvolution& context, const at::Tensor& tensor);
// Unpack given tensor to same format with original weight format
at::Tensor unpack(ContextConvolution& context, const at::Tensor& tensor);
} // namespace convolution
} // namespace detail
} // namespace cpu
} // namespace torch_ipex