-
Notifications
You must be signed in to change notification settings - Fork 707
/
step-67.cc
2334 lines (2037 loc) · 106 KB
/
step-67.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* ---------------------------------------------------------------------
*
* Copyright (C) 2020 - 2023 by the deal.II authors
*
* This file is part of the deal.II library.
*
* The deal.II library is free software; you can use it, redistribute
* it, and/or modify it under the terms of the GNU Lesser General
* Public License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
* The full text of the license can be found in the file LICENSE.md at
* the top level directory of deal.II.
*
* ---------------------------------------------------------------------
*
* Author: Martin Kronbichler, 2020
*/
// The include files are similar to the previous matrix-free tutorial programs
// step-37, step-48, and step-59
#include <deal.II/base/conditional_ostream.h>
#include <deal.II/base/function.h>
#include <deal.II/base/logstream.h>
#include <deal.II/base/timer.h>
#include <deal.II/base/time_stepping.h>
#include <deal.II/base/utilities.h>
#include <deal.II/base/vectorization.h>
#include <deal.II/distributed/tria.h>
#include <deal.II/dofs/dof_handler.h>
#include <deal.II/fe/fe_dgq.h>
#include <deal.II/fe/fe_system.h>
#include <deal.II/grid/grid_generator.h>
#include <deal.II/grid/tria.h>
#include <deal.II/lac/affine_constraints.h>
#include <deal.II/lac/la_parallel_vector.h>
#include <deal.II/matrix_free/fe_evaluation.h>
#include <deal.II/matrix_free/matrix_free.h>
#include <deal.II/numerics/data_out.h>
#include <fstream>
#include <iomanip>
#include <iostream>
// The following file includes the CellwiseInverseMassMatrix data structure
// that we will use for the mass matrix inversion, the only new include
// file for this tutorial program:
#include <deal.II/matrix_free/operators.h>
namespace Euler_DG
{
using namespace dealii;
// Similarly to the other matrix-free tutorial programs, we collect all
// parameters that control the execution of the program at the top of the
// file. Besides the dimension and polynomial degree we want to run with, we
// also specify a number of points in the Gaussian quadrature formula we
// want to use for the nonlinear terms in the Euler equations. Furthermore,
// we specify the time interval for the time-dependent problem, and
// implement two different test cases. The first one is an analytical
// solution in 2d, whereas the second is a channel flow around a cylinder as
// described in the introduction. Depending on the test case, we also change
// the final time up to which we run the simulation, and a variable
// `output_tick` that specifies in which intervals we want to write output
// (assuming that the tick is larger than the time step size).
constexpr unsigned int testcase = 0;
constexpr unsigned int dimension = 2;
constexpr unsigned int n_global_refinements = 3;
constexpr unsigned int fe_degree = 5;
constexpr unsigned int n_q_points_1d = fe_degree + 2;
using Number = double;
constexpr double gamma = 1.4;
constexpr double final_time = testcase == 0 ? 10 : 2.0;
constexpr double output_tick = testcase == 0 ? 1 : 0.05;
// Next off are some details of the time integrator, namely a Courant number
// that scales the time step size in terms of the formula $\Delta t =
// \text{Cr} n_\text{stages} \frac{h}{(p+1)^{1.5} (\|\mathbf{u} +
// c)_\text{max}}$, as well as a selection of a few low-storage Runge--Kutta
// methods. We specify the Courant number per stage of the Runge--Kutta
// scheme, as this gives a more realistic expression of the numerical cost
// for schemes of various numbers of stages.
const double courant_number = 0.15 / std::pow(fe_degree, 1.5);
enum LowStorageRungeKuttaScheme
{
stage_3_order_3, /* Kennedy, Carpenter, Lewis, 2000 */
stage_5_order_4, /* Kennedy, Carpenter, Lewis, 2000 */
stage_7_order_4, /* Tselios, Simos, 2007 */
stage_9_order_5, /* Kennedy, Carpenter, Lewis, 2000 */
};
constexpr LowStorageRungeKuttaScheme lsrk_scheme = stage_5_order_4;
// Eventually, we select a detail of the spatial discretization, namely the
// numerical flux (Riemann solver) at the faces between cells. For this
// program, we have implemented a modified variant of the Lax--Friedrichs
// flux and the Harten--Lax--van Leer (HLL) flux.
enum EulerNumericalFlux
{
lax_friedrichs_modified,
harten_lax_vanleer,
};
constexpr EulerNumericalFlux numerical_flux_type = lax_friedrichs_modified;
// @sect3{Equation data}
// We now define a class with the exact solution for the test case 0 and one
// with a background flow field for test case 1 of the channel. Given that
// the Euler equations are a problem with $d+2$ equations in $d$ dimensions,
// we need to tell the Function base class about the correct number of
// components.
template <int dim>
class ExactSolution : public Function<dim>
{
public:
ExactSolution(const double time)
: Function<dim>(dim + 2, time)
{}
virtual double value(const Point<dim> & p,
const unsigned int component = 0) const override;
};
// As far as the actual function implemented is concerned, the analytical
// test case is an isentropic vortex case (see e.g. the book by Hesthaven
// and Warburton, Example 6.1 in Section 6.6 on page 209) which fulfills the
// Euler equations with zero force term on the right hand side. Given that
// definition, we return either the density, the momentum, or the energy
// depending on which component is requested. Note that the original
// definition of the density involves the $\frac{1}{\gamma -1}$-th power of
// some expression. Since `std::pow()` has pretty slow implementations on
// some systems, we replace it by logarithm followed by exponentiation (of
// base 2), which is mathematically equivalent but usually much better
// optimized. This formula might lose accuracy in the last digits
// for very small numbers compared to `std::pow()`, but we are happy with
// it anyway, since small numbers map to data close to 1.
//
// For the channel test case, we simply select a density of 1, a velocity of
// 0.4 in $x$ direction and zero in the other directions, and an energy that
// corresponds to a speed of sound of 1.3 measured against the background
// velocity field, computed from the relation $E = \frac{c^2}{\gamma (\gamma
// -1)} + \frac 12 \rho \|u\|^2$.
template <int dim>
double ExactSolution<dim>::value(const Point<dim> & x,
const unsigned int component) const
{
const double t = this->get_time();
switch (testcase)
{
case 0:
{
Assert(dim == 2, ExcNotImplemented());
const double beta = 5;
Point<dim> x0;
x0[0] = 5.;
const double radius_sqr =
(x - x0).norm_square() - 2. * (x[0] - x0[0]) * t + t * t;
const double factor =
beta / (numbers::PI * 2) * std::exp(1. - radius_sqr);
const double density_log = std::log2(
std::abs(1. - (gamma - 1.) / gamma * 0.25 * factor * factor));
const double density = std::exp2(density_log * (1. / (gamma - 1.)));
const double u = 1. - factor * (x[1] - x0[1]);
const double v = factor * (x[0] - t - x0[0]);
if (component == 0)
return density;
else if (component == 1)
return density * u;
else if (component == 2)
return density * v;
else
{
const double pressure =
std::exp2(density_log * (gamma / (gamma - 1.)));
return pressure / (gamma - 1.) +
0.5 * (density * u * u + density * v * v);
}
}
case 1:
{
if (component == 0)
return 1.;
else if (component == 1)
return 0.4;
else if (component == dim + 1)
return 3.097857142857143;
else
return 0.;
}
default:
Assert(false, ExcNotImplemented());
return 0.;
}
}
// @sect3{Low-storage explicit Runge--Kutta time integrators}
// The next few lines implement a few low-storage variants of Runge--Kutta
// methods. These methods have specific Butcher tableaux with coefficients
// $b_i$ and $a_i$ as shown in the introduction. As usual in Runge--Kutta
// method, we can deduce time steps, $c_i = \sum_{j=1}^{i-2} b_i + a_{i-1}$
// from those coefficients. The main advantage of this kind of scheme is the
// fact that only two vectors are needed per stage, namely the accumulated
// part of the solution $\mathbf{w}$ (that will hold the solution
// $\mathbf{w}^{n+1}$ at the new time $t^{n+1}$ after the last stage), the
// update vector $\mathbf{r}_i$ that gets evaluated during the stages, plus
// one vector $\mathbf{k}_i$ to hold the evaluation of the operator. Such a
// Runge--Kutta setup reduces the memory storage and memory access. As the
// memory bandwidth is often the performance-limiting factor on modern
// hardware when the evaluation of the differential operator is
// well-optimized, performance can be improved over standard time
// integrators. This is true also when taking into account that a
// conventional Runge--Kutta scheme might allow for slightly larger time
// steps as more free parameters allow for better stability properties.
//
// In this tutorial programs, we concentrate on a few variants of
// low-storage schemes defined in the article by Kennedy, Carpenter, and
// Lewis (2000), as well as one variant described by Tselios and Simos
// (2007). There is a large series of other schemes available, which could
// be addressed by additional sets of coefficients or slightly different
// update formulas.
//
// We define a single class for the four integrators, distinguished by the
// enum described above. To each scheme, we then fill the vectors for the
// $b_i$ and $a_i$ to the given variables in the class.
class LowStorageRungeKuttaIntegrator
{
public:
LowStorageRungeKuttaIntegrator(const LowStorageRungeKuttaScheme scheme)
{
TimeStepping::runge_kutta_method lsrk;
// First comes the three-stage scheme of order three by Kennedy et al.
// (2000). While its stability region is significantly smaller than for
// the other schemes, it only involves three stages, so it is very
// competitive in terms of the work per stage.
switch (scheme)
{
case stage_3_order_3:
{
lsrk = TimeStepping::LOW_STORAGE_RK_STAGE3_ORDER3;
break;
}
// The next scheme is a five-stage scheme of order four, again
// defined in the paper by Kennedy et al. (2000).
case stage_5_order_4:
{
lsrk = TimeStepping::LOW_STORAGE_RK_STAGE5_ORDER4;
break;
}
// The following scheme of seven stages and order four has been
// explicitly derived for acoustics problems. It is a balance of
// accuracy for imaginary eigenvalues among fourth order schemes,
// combined with a large stability region. Since DG schemes are
// dissipative among the highest frequencies, this does not
// necessarily translate to the highest possible time step per
// stage. In the context of the present tutorial program, the
// numerical flux plays a crucial role in the dissipation and thus
// also the maximal stable time step size. For the modified
// Lax--Friedrichs flux, this scheme is similar to the
// `stage_5_order_4` scheme in terms of step size per stage if only
// stability is considered, but somewhat less efficient for the HLL
// flux.
case stage_7_order_4:
{
lsrk = TimeStepping::LOW_STORAGE_RK_STAGE7_ORDER4;
break;
}
// The last scheme included here is the nine-stage scheme of order
// five from Kennedy et al. (2000). It is the most accurate among
// the schemes used here, but the higher order of accuracy
// sacrifices some stability, so the step length normalized per
// stage is less than for the fourth order schemes.
case stage_9_order_5:
{
lsrk = TimeStepping::LOW_STORAGE_RK_STAGE9_ORDER5;
break;
}
default:
AssertThrow(false, ExcNotImplemented());
}
TimeStepping::LowStorageRungeKutta<
LinearAlgebra::distributed::Vector<Number>>
rk_integrator(lsrk);
rk_integrator.get_coefficients(ai, bi, ci);
}
unsigned int n_stages() const
{
return bi.size();
}
// The main function of the time integrator is to go through the stages,
// evaluate the operator, prepare the $\mathbf{r}_i$ vector for the next
// evaluation, and update the solution vector $\mathbf{w}$. We hand off
// the work to the `pde_operator` involved in order to be able to merge
// the vector operations of the Runge--Kutta setup with the evaluation of
// the differential operator for better performance, so all we do here is
// to delegate the vectors and coefficients.
//
// We separately call the operator for the first stage because we need
// slightly modified arguments there: We evaluate the solution from
// the old solution $\mathbf{w}^n$ rather than a $\mathbf r_i$ vector, so
// the first argument is `solution`. We here let the stage vector
// $\mathbf{r}_i$ also hold the temporary result of the evaluation, as it
// is not used otherwise. For all subsequent stages, we use the vector
// `vec_ki` as the second vector argument to store the result of the
// operator evaluation. Finally, when we are at the last stage, we must
// skip the computation of the vector $\mathbf{r}_{s+1}$ as there is no
// coefficient $a_s$ available (nor will it be used).
template <typename VectorType, typename Operator>
void perform_time_step(const Operator &pde_operator,
const double current_time,
const double time_step,
VectorType & solution,
VectorType & vec_ri,
VectorType & vec_ki) const
{
AssertDimension(ai.size() + 1, bi.size());
pde_operator.perform_stage(current_time,
bi[0] * time_step,
ai[0] * time_step,
solution,
vec_ri,
solution,
vec_ri);
for (unsigned int stage = 1; stage < bi.size(); ++stage)
{
const double c_i = ci[stage];
pde_operator.perform_stage(current_time + c_i * time_step,
bi[stage] * time_step,
(stage == bi.size() - 1 ?
0 :
ai[stage] * time_step),
vec_ri,
vec_ki,
solution,
vec_ri);
}
}
private:
std::vector<double> bi;
std::vector<double> ai;
std::vector<double> ci;
};
// @sect3{Implementation of point-wise operations of the Euler equations}
// In the following functions, we implement the various problem-specific
// operators pertaining to the Euler equations. Each function acts on the
// vector of conserved variables $[\rho, \rho\mathbf{u}, E]$ that we hold in
// the solution vectors, and computes various derived quantities.
//
// First out is the computation of the velocity, that we derive from the
// momentum variable $\rho \mathbf{u}$ by division by $\rho$. One thing to
// note here is that we decorate all those functions with the keyword
// `DEAL_II_ALWAYS_INLINE`. This is a special macro that maps to a
// compiler-specific keyword that tells the compiler to never create a
// function call for any of those functions, and instead move the
// implementation <a
// href="https://en.wikipedia.org/wiki/Inline_function">inline</a> to where
// they are called. This is critical for performance because we call into some
// of those functions millions or billions of times: For example, we both use
// the velocity for the computation of the flux further down, but also for the
// computation of the pressure, and both of these places are evaluated at
// every quadrature point of every cell. Making sure these functions are
// inlined ensures not only that the processor does not have to execute a jump
// instruction into the function (and the corresponding return jump), but also
// that the compiler can re-use intermediate information from one function's
// context in code that comes after the place where the function was called.
// (We note that compilers are generally quite good at figuring out which
// functions to inline by themselves. Here is a place where compilers may or
// may not have figured it out by themselves but where we know for sure that
// inlining is a win.)
//
// Another trick we apply is a separate variable for the inverse density
// $\frac{1}{\rho}$. This enables the compiler to only perform a single
// division for the flux, despite the division being used at several
// places. As divisions are around ten to twenty times as expensive as
// multiplications or additions, avoiding redundant divisions is crucial for
// performance. We note that taking the inverse first and later multiplying
// with it is not equivalent to a division in floating point arithmetic due
// to roundoff effects, so the compiler is not allowed to exchange one way by
// the other with standard optimization flags. However, it is also not
// particularly difficult to write the code in the right way.
//
// To summarize, the chosen strategy of always inlining and careful
// definition of expensive arithmetic operations allows us to write compact
// code without passing all intermediate results around, despite making sure
// that the code maps to excellent machine code.
template <int dim, typename Number>
inline DEAL_II_ALWAYS_INLINE //
Tensor<1, dim, Number>
euler_velocity(const Tensor<1, dim + 2, Number> &conserved_variables)
{
const Number inverse_density = Number(1.) / conserved_variables[0];
Tensor<1, dim, Number> velocity;
for (unsigned int d = 0; d < dim; ++d)
velocity[d] = conserved_variables[1 + d] * inverse_density;
return velocity;
}
// The next function computes the pressure from the vector of conserved
// variables, using the formula $p = (\gamma - 1) \left(E - \frac 12 \rho
// \mathbf{u}\cdot \mathbf{u}\right)$. As explained above, we use the
// velocity from the `euler_velocity()` function. Note that we need to
// specify the first template argument `dim` here because the compiler is
// not able to deduce it from the arguments of the tensor, whereas the
// second argument (number type) can be automatically deduced.
template <int dim, typename Number>
inline DEAL_II_ALWAYS_INLINE //
Number
euler_pressure(const Tensor<1, dim + 2, Number> &conserved_variables)
{
const Tensor<1, dim, Number> velocity =
euler_velocity<dim>(conserved_variables);
Number rho_u_dot_u = conserved_variables[1] * velocity[0];
for (unsigned int d = 1; d < dim; ++d)
rho_u_dot_u += conserved_variables[1 + d] * velocity[d];
return (gamma - 1.) * (conserved_variables[dim + 1] - 0.5 * rho_u_dot_u);
}
// Here is the definition of the Euler flux function, i.e., the definition
// of the actual equation. Given the velocity and pressure (that the
// compiler optimization will make sure are done only once), this is
// straight-forward given the equation stated in the introduction.
template <int dim, typename Number>
inline DEAL_II_ALWAYS_INLINE //
Tensor<1, dim + 2, Tensor<1, dim, Number>>
euler_flux(const Tensor<1, dim + 2, Number> &conserved_variables)
{
const Tensor<1, dim, Number> velocity =
euler_velocity<dim>(conserved_variables);
const Number pressure = euler_pressure<dim>(conserved_variables);
Tensor<1, dim + 2, Tensor<1, dim, Number>> flux;
for (unsigned int d = 0; d < dim; ++d)
{
flux[0][d] = conserved_variables[1 + d];
for (unsigned int e = 0; e < dim; ++e)
flux[e + 1][d] = conserved_variables[e + 1] * velocity[d];
flux[d + 1][d] += pressure;
flux[dim + 1][d] =
velocity[d] * (conserved_variables[dim + 1] + pressure);
}
return flux;
}
// This next function is a helper to simplify the implementation of the
// numerical flux, implementing the action of a tensor of tensors (with
// non-standard outer dimension of size `dim + 2`, so the standard overloads
// provided by deal.II's tensor classes do not apply here) with another
// tensor of the same inner dimension, i.e., a matrix-vector product.
template <int n_components, int dim, typename Number>
inline DEAL_II_ALWAYS_INLINE //
Tensor<1, n_components, Number>
operator*(const Tensor<1, n_components, Tensor<1, dim, Number>> &matrix,
const Tensor<1, dim, Number> & vector)
{
Tensor<1, n_components, Number> result;
for (unsigned int d = 0; d < n_components; ++d)
result[d] = matrix[d] * vector;
return result;
}
// This function implements the numerical flux (Riemann solver). It gets the
// state from the two sides of an interface and the normal vector, oriented
// from the side of the solution $\mathbf{w}^-$ towards the solution
// $\mathbf{w}^+$. In finite volume methods which rely on piece-wise
// constant data, the numerical flux is the central ingredient as it is the
// only place where the physical information is entered. In DG methods, the
// numerical flux is less central due to the polynomials within the elements
// and the physical flux used there. As a result of higher-degree
// interpolation with consistent values from both sides in the limit of a
// continuous solution, the numerical flux can be seen as a control of the
// jump of the solution from both sides to weakly impose continuity. It is
// important to realize that a numerical flux alone cannot stabilize a
// high-order DG method in the presence of shocks, and thus any DG method
// must be combined with further shock-capturing techniques to handle those
// cases. In this tutorial, we focus on wave-like solutions of the Euler
// equations in the subsonic regime without strong discontinuities where our
// basic scheme is sufficient.
//
// Nonetheless, the numerical flux is decisive in terms of the numerical
// dissipation of the overall scheme and influences the admissible time step
// size with explicit Runge--Kutta methods. We consider two choices, a
// modified Lax--Friedrichs scheme and the widely used Harten--Lax--van Leer
// (HLL) flux. For both variants, we first need to get the velocities and
// pressures from both sides of the interface and evaluate the physical
// Euler flux.
//
// For the local Lax--Friedrichs flux, the definition is $\hat{\mathbf{F}}
// =\frac{\mathbf{F}(\mathbf{w}^-)+\mathbf{F}(\mathbf{w}^+)}{2} +
// \frac{\lambda}{2}\left[\mathbf{w}^--\mathbf{w}^+\right]\otimes
// \mathbf{n^-}$, where the factor $\lambda =
// \max\left(\|\mathbf{u}^-\|+c^-, \|\mathbf{u}^+\|+c^+\right)$ gives the
// maximal wave speed and $c = \sqrt{\gamma p / \rho}$ is the speed of
// sound. Here, we choose two modifications of that expression for reasons
// of computational efficiency, given the small impact of the flux on the
// solution. For the above definition of the factor $\lambda$, we would need
// to take four square roots, two for the two velocity norms and two for the
// speed of sound on either side. The first modification is hence to rather
// use $\sqrt{\|\mathbf{u}\|^2+c^2}$ as an estimate of the maximal speed
// (which is at most a factor of 2 away from the actual maximum, as shown in
// the introduction). This allows us to pull the square root out of the
// maximum and get away with a single square root computation. The second
// modification is to further relax on the parameter $\lambda$---the smaller
// it is, the smaller the dissipation factor (which is multiplied by the
// jump in $\mathbf{w}$, which might result in a smaller or bigger
// dissipation in the end). This allows us to fit the spectrum into the
// stability region of the explicit Runge--Kutta integrator with bigger time
// steps. However, we cannot make dissipation too small because otherwise
// imaginary eigenvalues grow larger. Finally, the current conservative
// formulation is not energy-stable in the limit of $\lambda\to 0$ as it is
// not skew-symmetric, and would need additional measures such as split-form
// DG schemes in that case.
//
// For the HLL flux, we follow the formula from literature, introducing an
// additional weighting of the two states from Lax--Friedrichs by a
// parameter $s$. It is derived from the physical transport directions of
// the Euler equations in terms of the current direction of velocity and
// sound speed. For the velocity, we here choose a simple arithmetic average
// which is sufficient for DG scenarios and moderate jumps in material
// parameters.
//
// Since the numerical flux is multiplied by the normal vector in the weak
// form, we multiply by the result by the normal vector for all terms in the
// equation. In these multiplications, the `operator*` defined above enables
// a compact notation similar to the mathematical definition.
//
// In this and the following functions, we use variable suffixes `_m` and
// `_p` to indicate quantities derived from $\mathbf{w}^-$ and $\mathbf{w}^+$,
// i.e., values "here" and "there" relative to the current cell when looking
// at a neighbor cell.
template <int dim, typename Number>
inline DEAL_II_ALWAYS_INLINE //
Tensor<1, dim + 2, Number>
euler_numerical_flux(const Tensor<1, dim + 2, Number> &u_m,
const Tensor<1, dim + 2, Number> &u_p,
const Tensor<1, dim, Number> & normal)
{
const auto velocity_m = euler_velocity<dim>(u_m);
const auto velocity_p = euler_velocity<dim>(u_p);
const auto pressure_m = euler_pressure<dim>(u_m);
const auto pressure_p = euler_pressure<dim>(u_p);
const auto flux_m = euler_flux<dim>(u_m);
const auto flux_p = euler_flux<dim>(u_p);
switch (numerical_flux_type)
{
case lax_friedrichs_modified:
{
const auto lambda =
0.5 * std::sqrt(std::max(velocity_p.norm_square() +
gamma * pressure_p * (1. / u_p[0]),
velocity_m.norm_square() +
gamma * pressure_m * (1. / u_m[0])));
return 0.5 * (flux_m * normal + flux_p * normal) +
0.5 * lambda * (u_m - u_p);
}
case harten_lax_vanleer:
{
const auto avg_velocity_normal =
0.5 * ((velocity_m + velocity_p) * normal);
const auto avg_c = std::sqrt(std::abs(
0.5 * gamma *
(pressure_p * (1. / u_p[0]) + pressure_m * (1. / u_m[0]))));
const Number s_pos =
std::max(Number(), avg_velocity_normal + avg_c);
const Number s_neg =
std::min(Number(), avg_velocity_normal - avg_c);
const Number inverse_s = Number(1.) / (s_pos - s_neg);
return inverse_s *
((s_pos * (flux_m * normal) - s_neg * (flux_p * normal)) -
s_pos * s_neg * (u_m - u_p));
}
default:
{
Assert(false, ExcNotImplemented());
return {};
}
}
}
// This and the next function are helper functions to provide compact
// evaluation calls as multiple points get batched together via a
// VectorizedArray argument (see the step-37 tutorial for details). This
// function is used for the subsonic outflow boundary conditions where we
// need to set the energy component to a prescribed value. The next one
// requests the solution on all components and is used for inflow boundaries
// where all components of the solution are set.
template <int dim, typename Number>
VectorizedArray<Number>
evaluate_function(const Function<dim> & function,
const Point<dim, VectorizedArray<Number>> &p_vectorized,
const unsigned int component)
{
VectorizedArray<Number> result;
for (unsigned int v = 0; v < VectorizedArray<Number>::size(); ++v)
{
Point<dim> p;
for (unsigned int d = 0; d < dim; ++d)
p[d] = p_vectorized[d][v];
result[v] = function.value(p, component);
}
return result;
}
template <int dim, typename Number, int n_components = dim + 2>
Tensor<1, n_components, VectorizedArray<Number>>
evaluate_function(const Function<dim> & function,
const Point<dim, VectorizedArray<Number>> &p_vectorized)
{
AssertDimension(function.n_components, n_components);
Tensor<1, n_components, VectorizedArray<Number>> result;
for (unsigned int v = 0; v < VectorizedArray<Number>::size(); ++v)
{
Point<dim> p;
for (unsigned int d = 0; d < dim; ++d)
p[d] = p_vectorized[d][v];
for (unsigned int d = 0; d < n_components; ++d)
result[d][v] = function.value(p, d);
}
return result;
}
// @sect3{The EulerOperation class}
// This class implements the evaluators for the Euler problem, in analogy to
// the `LaplaceOperator` class of step-37 or step-59. Since the present
// operator is non-linear and does not require a matrix interface (to be
// handed over to preconditioners), we skip the various `vmult` functions
// otherwise present in matrix-free operators and only implement an `apply`
// function as well as the combination of `apply` with the required vector
// updates for the low-storage Runge--Kutta time integrator mentioned above
// (called `perform_stage`). Furthermore, we have added three additional
// functions involving matrix-free routines, namely one to compute an
// estimate of the time step scaling (that is combined with the Courant
// number for the actual time step size) based on the velocity and speed of
// sound in the elements, one for the projection of solutions (specializing
// VectorTools::project() for the DG case), and one to compute the errors
// against a possible analytical solution or norms against some background
// state.
//
// The rest of the class is similar to other matrix-free tutorials. As
// discussed in the introduction, we provide a few functions to allow a user
// to pass in various forms of boundary conditions on different parts of the
// domain boundary marked by types::boundary_id variables, as well as
// possible body forces.
template <int dim, int degree, int n_points_1d>
class EulerOperator
{
public:
static constexpr unsigned int n_quadrature_points_1d = n_points_1d;
EulerOperator(TimerOutput &timer_output);
void reinit(const Mapping<dim> & mapping,
const DoFHandler<dim> &dof_handler);
void set_inflow_boundary(const types::boundary_id boundary_id,
std::unique_ptr<Function<dim>> inflow_function);
void set_subsonic_outflow_boundary(
const types::boundary_id boundary_id,
std::unique_ptr<Function<dim>> outflow_energy);
void set_wall_boundary(const types::boundary_id boundary_id);
void set_body_force(std::unique_ptr<Function<dim>> body_force);
void apply(const double current_time,
const LinearAlgebra::distributed::Vector<Number> &src,
LinearAlgebra::distributed::Vector<Number> & dst) const;
void
perform_stage(const Number cur_time,
const Number factor_solution,
const Number factor_ai,
const LinearAlgebra::distributed::Vector<Number> ¤t_ri,
LinearAlgebra::distributed::Vector<Number> & vec_ki,
LinearAlgebra::distributed::Vector<Number> & solution,
LinearAlgebra::distributed::Vector<Number> &next_ri) const;
void project(const Function<dim> & function,
LinearAlgebra::distributed::Vector<Number> &solution) const;
std::array<double, 3> compute_errors(
const Function<dim> & function,
const LinearAlgebra::distributed::Vector<Number> &solution) const;
double compute_cell_transport_speed(
const LinearAlgebra::distributed::Vector<Number> &solution) const;
void
initialize_vector(LinearAlgebra::distributed::Vector<Number> &vector) const;
private:
MatrixFree<dim, Number> data;
TimerOutput &timer;
std::map<types::boundary_id, std::unique_ptr<Function<dim>>>
inflow_boundaries;
std::map<types::boundary_id, std::unique_ptr<Function<dim>>>
subsonic_outflow_boundaries;
std::set<types::boundary_id> wall_boundaries;
std::unique_ptr<Function<dim>> body_force;
void local_apply_inverse_mass_matrix(
const MatrixFree<dim, Number> & data,
LinearAlgebra::distributed::Vector<Number> & dst,
const LinearAlgebra::distributed::Vector<Number> &src,
const std::pair<unsigned int, unsigned int> & cell_range) const;
void local_apply_cell(
const MatrixFree<dim, Number> & data,
LinearAlgebra::distributed::Vector<Number> & dst,
const LinearAlgebra::distributed::Vector<Number> &src,
const std::pair<unsigned int, unsigned int> & cell_range) const;
void local_apply_face(
const MatrixFree<dim, Number> & data,
LinearAlgebra::distributed::Vector<Number> & dst,
const LinearAlgebra::distributed::Vector<Number> &src,
const std::pair<unsigned int, unsigned int> & face_range) const;
void local_apply_boundary_face(
const MatrixFree<dim, Number> & data,
LinearAlgebra::distributed::Vector<Number> & dst,
const LinearAlgebra::distributed::Vector<Number> &src,
const std::pair<unsigned int, unsigned int> & face_range) const;
};
template <int dim, int degree, int n_points_1d>
EulerOperator<dim, degree, n_points_1d>::EulerOperator(TimerOutput &timer)
: timer(timer)
{}
// For the initialization of the Euler operator, we set up the MatrixFree
// variable contained in the class. This can be done given a mapping to
// describe possible curved boundaries as well as a DoFHandler object
// describing the degrees of freedom. Since we use a discontinuous Galerkin
// discretization in this tutorial program where no constraints are imposed
// strongly on the solution field, we do not need to pass in an
// AffineConstraints object and rather use a dummy for the
// construction. With respect to quadrature, we want to select two different
// ways of computing the underlying integrals: The first is a flexible one,
// based on a template parameter `n_points_1d` (that will be assigned the
// `n_q_points_1d` value specified at the top of this file). More accurate
// integration is necessary to avoid the aliasing problem due to the
// variable coefficients in the Euler operator. The second less accurate
// quadrature formula is a tight one based on `fe_degree+1` and needed for
// the inverse mass matrix. While that formula provides an exact inverse
// only on affine element shapes and not on deformed elements, it enables
// the fast inversion of the mass matrix by tensor product techniques,
// necessary to ensure optimal computational efficiency overall.
template <int dim, int degree, int n_points_1d>
void EulerOperator<dim, degree, n_points_1d>::reinit(
const Mapping<dim> & mapping,
const DoFHandler<dim> &dof_handler)
{
const std::vector<const DoFHandler<dim> *> dof_handlers = {&dof_handler};
const AffineConstraints<double> dummy;
const std::vector<const AffineConstraints<double> *> constraints = {&dummy};
const std::vector<Quadrature<1>> quadratures = {QGauss<1>(n_q_points_1d),
QGauss<1>(fe_degree + 1)};
typename MatrixFree<dim, Number>::AdditionalData additional_data;
additional_data.mapping_update_flags =
(update_gradients | update_JxW_values | update_quadrature_points |
update_values);
additional_data.mapping_update_flags_inner_faces =
(update_JxW_values | update_quadrature_points | update_normal_vectors |
update_values);
additional_data.mapping_update_flags_boundary_faces =
(update_JxW_values | update_quadrature_points | update_normal_vectors |
update_values);
additional_data.tasks_parallel_scheme =
MatrixFree<dim, Number>::AdditionalData::none;
data.reinit(
mapping, dof_handlers, constraints, quadratures, additional_data);
}
template <int dim, int degree, int n_points_1d>
void EulerOperator<dim, degree, n_points_1d>::initialize_vector(
LinearAlgebra::distributed::Vector<Number> &vector) const
{
data.initialize_dof_vector(vector);
}
// The subsequent four member functions are the ones that must be called from
// outside to specify the various types of boundaries. For an inflow boundary,
// we must specify all components in terms of density $\rho$, momentum $\rho
// \mathbf{u}$ and energy $E$. Given this information, we then store the
// function alongside the respective boundary id in a map member variable of
// this class. Likewise, we proceed for the subsonic outflow boundaries (where
// we request a function as well, which we use to retrieve the energy) and for
// wall (no-penetration) boundaries where we impose zero normal velocity (no
// function necessary, so we only request the boundary id). For the present
// DG code where boundary conditions are solely applied as part of the weak
// form (during time integration), the call to set the boundary conditions
// can appear both before or after the `reinit()` call to this class. This
// is different from continuous finite element codes where the boundary
// conditions determine the content of the AffineConstraints object that is
// sent into MatrixFree for initialization, thus requiring to be set before
// the initialization of the matrix-free data structures.
//
// The checks added in each of the four function are used to
// ensure that boundary conditions are mutually exclusive on the various
// parts of the boundary, i.e., that a user does not accidentally designate a
// boundary as both an inflow and say a subsonic outflow boundary.
template <int dim, int degree, int n_points_1d>
void EulerOperator<dim, degree, n_points_1d>::set_inflow_boundary(
const types::boundary_id boundary_id,
std::unique_ptr<Function<dim>> inflow_function)
{
AssertThrow(subsonic_outflow_boundaries.find(boundary_id) ==
subsonic_outflow_boundaries.end() &&
wall_boundaries.find(boundary_id) == wall_boundaries.end(),
ExcMessage("You already set the boundary with id " +
std::to_string(static_cast<int>(boundary_id)) +
" to another type of boundary before now setting " +
"it as inflow"));
AssertThrow(inflow_function->n_components == dim + 2,
ExcMessage("Expected function with dim+2 components"));
inflow_boundaries[boundary_id] = std::move(inflow_function);
}
template <int dim, int degree, int n_points_1d>
void EulerOperator<dim, degree, n_points_1d>::set_subsonic_outflow_boundary(
const types::boundary_id boundary_id,
std::unique_ptr<Function<dim>> outflow_function)
{
AssertThrow(inflow_boundaries.find(boundary_id) ==
inflow_boundaries.end() &&
wall_boundaries.find(boundary_id) == wall_boundaries.end(),
ExcMessage("You already set the boundary with id " +
std::to_string(static_cast<int>(boundary_id)) +
" to another type of boundary before now setting " +
"it as subsonic outflow"));
AssertThrow(outflow_function->n_components == dim + 2,
ExcMessage("Expected function with dim+2 components"));
subsonic_outflow_boundaries[boundary_id] = std::move(outflow_function);
}
template <int dim, int degree, int n_points_1d>
void EulerOperator<dim, degree, n_points_1d>::set_wall_boundary(
const types::boundary_id boundary_id)
{
AssertThrow(inflow_boundaries.find(boundary_id) ==
inflow_boundaries.end() &&
subsonic_outflow_boundaries.find(boundary_id) ==
subsonic_outflow_boundaries.end(),
ExcMessage("You already set the boundary with id " +
std::to_string(static_cast<int>(boundary_id)) +
" to another type of boundary before now setting " +
"it as wall boundary"));
wall_boundaries.insert(boundary_id);
}
template <int dim, int degree, int n_points_1d>
void EulerOperator<dim, degree, n_points_1d>::set_body_force(
std::unique_ptr<Function<dim>> body_force)
{
AssertDimension(body_force->n_components, dim);
this->body_force = std::move(body_force);
}
// @sect4{Local evaluators}
// Now we proceed to the local evaluators for the Euler problem. The
// evaluators are relatively simple and follow what has been presented in
// step-37, step-48, or step-59. The first notable difference is the fact
// that we use an FEEvaluation with a non-standard number of quadrature
// points. Whereas we previously always set the number of quadrature points
// to equal the polynomial degree plus one (ensuring exact integration on
// affine element shapes), we now set the number quadrature points as a
// separate variable (e.g. the polynomial degree plus two or three halves of
// the polynomial degree) to more accurately handle nonlinear terms. Since
// the evaluator is fed with the appropriate loop lengths via the template
// argument and keeps the number of quadrature points in the whole cell in
// the variable FEEvaluation::n_q_points, we now automatically operate on
// the more accurate formula without further changes.
//
// The second difference is due to the fact that we are now evaluating a
// multi-component system, as opposed to the scalar systems considered
// previously. The matrix-free framework provides several ways to handle the
// multi-component case. The variant shown here utilizes an FEEvaluation
// object with multiple components embedded into it, specified by the fourth
// template argument `dim + 2` for the components in the Euler system. As a
// consequence, the return type of FEEvaluation::get_value() is not a scalar
// any more (that would return a VectorizedArray type, collecting data from
// several elements), but a Tensor of `dim+2` components. The functionality
// is otherwise similar to the scalar case; it is handled by a template
// specialization of a base class, called FEEvaluationAccess. An alternative
// variant would have been to use several FEEvaluation objects, a scalar one
// for the density, a vector-valued one with `dim` components for the
// momentum, and another scalar evaluator for the energy. To ensure that
// those components point to the correct part of the solution, the
// constructor of FEEvaluation takes three optional integer arguments after
// the required MatrixFree field, namely the number of the DoFHandler for
// multi-DoFHandler systems (taking the first by default), the number of the
// quadrature point in case there are multiple Quadrature objects (see more
// below), and as a third argument the component within a vector system. As
// we have a single vector for all components, we would go with the third
// argument, and set it to `0` for the density, `1` for the vector-valued
// momentum, and `dim+1` for the energy slot. FEEvaluation then picks the
// appropriate subrange of the solution vector during
// FEEvaluationBase::read_dof_values() and
// FEEvaluation::distributed_local_to_global() or the more compact
// FEEvaluation::gather_evaluate() and FEEvaluation::integrate_scatter()
// calls.
//
// When it comes to the evaluation of the body force vector, we distinguish
// between two cases for efficiency reasons: In case we have a constant
// function (derived from Functions::ConstantFunction), we can precompute
// the value outside the loop over quadrature points and simply use the
// value everywhere. For a more general function, we instead need to call
// the `evaluate_function()` method we provided above; this path is more
// expensive because we need to access the memory associated with the
// quadrature point data.
//
// The rest follows the other tutorial programs. Since we have implemented
// all physics for the Euler equations in the separate `euler_flux()`
// function, all we have to do here is to call this function
// given the current solution evaluated at quadrature points, returned by
// `phi.get_value(q)`, and tell the FEEvaluation object to queue the flux
// for testing it by the gradients of the shape functions (which is a Tensor
// of outer `dim+2` components, each holding a tensor of `dim` components
// for the $x,y,z$ component of the Euler flux). One final thing worth
// mentioning is the order in which we queue the data for testing by the
// value of the test function, `phi.submit_value()`, in case we are given an
// external function: We must do this after calling `phi.get_value(q)`,
// because `get_value()` (reading the solution) and `submit_value()`
// (queuing the value for multiplication by the test function and summation
// over quadrature points) access the same underlying data field. Here it