diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md index fb5deed5f6..1c355366d6 100644 --- a/docs/advanced/input_files/input-main.md +++ b/docs/advanced/input_files/input-main.md @@ -3957,6 +3957,13 @@ Currently supported: `RPA`, `LDA`, `PBE`, `HSE`, `HF`. - **Description**: The number of 2-particle states to be solved - **Default**: 0 +### lr_unrestricted +- **Type**: Boolean +- **Description**: Whether to use unrestricted construction for LR-TDDFT (the matrix size will be doubled). + - True: Always use unrestricted LR-TDDFT. + - False: Use unrestricted LR-TDDFT only when the system is open-shell. +- **Default**: False + ### abs_wavelen_range - **Type**: Real Real diff --git a/source/Makefile.Objects b/source/Makefile.Objects index 8bad2ec8bf..c6aa1b0c8d 100644 --- a/source/Makefile.Objects +++ b/source/Makefile.Objects @@ -729,7 +729,6 @@ OBJS_TENSOR=tensor.o\ operator_lr_exx.o\ kernel_xc.o\ pot_hxc_lrtd.o\ - hsolver_lrtd.o\ lr_spectrum.o\ hamilt_casida.o\ esolver_lrtd_lcao.o\ diff --git a/source/module_basis/module_ao/parallel_2d.h b/source/module_basis/module_ao/parallel_2d.h index d3c88d5783..8aeea9792f 100644 --- a/source/module_basis/module_ao/parallel_2d.h +++ b/source/module_basis/module_ao/parallel_2d.h @@ -15,6 +15,7 @@ class Parallel_2D ~Parallel_2D() = default; Parallel_2D& operator=(Parallel_2D&& rhs) = default; + Parallel_2D(Parallel_2D&& rhs) = default; /// number of local rows int get_row_size() const diff --git a/source/module_esolver/esolver.cpp b/source/module_esolver/esolver.cpp index 3501cc79b2..b0247fd63e 100644 --- a/source/module_esolver/esolver.cpp +++ b/source/module_esolver/esolver.cpp @@ -187,13 +187,8 @@ ESolver* init_esolver(const Input_para& inp, UnitCell& ucell) else if (esolver_type == "lr_lcao") { // use constructor rather than Init function to initialize reference (instead of pointers) to ucell - if (PARAM.globalv.gamma_only_local){ - return new LR::ESolver_LR(inp, ucell); - } else if (PARAM.inp.nspin < 2) { - return new LR::ESolver_LR, double>(inp, ucell); - } else { - throw std::runtime_error("LR-TDDFT is not implemented for spin polarized case"); -} + if (PARAM.globalv.gamma_only_local) { return new LR::ESolver_LR(inp, ucell); } + else { return new LR::ESolver_LR, double>(inp, ucell); } } else if (esolver_type == "ksdft_lr_lcao") { diff --git a/source/module_io/read_input_item_tddft.cpp b/source/module_io/read_input_item_tddft.cpp index 544fe00ebb..ccd3190c54 100644 --- a/source/module_io/read_input_item_tddft.cpp +++ b/source/module_io/read_input_item_tddft.cpp @@ -327,6 +327,12 @@ void ReadInput::item_lr_tddft() read_sync_bool(input.out_wfc_lr); this->add_item(item); } + { + Input_Item item("lr_unrestricted"); + item.annotation = "Whether to use unrestricted construction for LR-TDDFT"; + read_sync_bool(input.lr_unrestricted); + this->add_item(item); + } { Input_Item item("abs_wavelen_range"); item.annotation = "the range of wavelength(nm) to output the absorption spectrum "; @@ -337,10 +343,6 @@ void ReadInput::item_lr_tddft() para.input.abs_wavelen_range.push_back(std::stod(item.str_values[i])); } }; - item.check_value = [](const Input_Item& item, const Parameter& para) { - auto& awr = para.input.abs_wavelen_range; - if (awr.size() < 2) { ModuleBase::WARNING_QUIT("ReadInput", "abs_wavelen_range must have two values"); } - }; sync_doublevec(input.abs_wavelen_range, 2, 0.0); this->add_item(item); } diff --git a/source/module_io/test/read_input_ptest.cpp b/source/module_io/test/read_input_ptest.cpp index 7953002c3a..a36cb5cd7d 100644 --- a/source/module_io/test/read_input_ptest.cpp +++ b/source/module_io/test/read_input_ptest.cpp @@ -419,6 +419,7 @@ TEST_F(InputParaTest, ParaRead) EXPECT_EQ(param.inp.xc_kernel, "LDA"); EXPECT_EQ(param.inp.lr_solver, "dav"); EXPECT_DOUBLE_EQ(param.inp.lr_thr, 1e-2); + EXPECT_FALSE(param.inp.lr_unrestricted); EXPECT_FALSE(param.inp.out_wfc_lr); EXPECT_EQ(param.inp.abs_wavelen_range.size(), 2); EXPECT_DOUBLE_EQ(param.inp.abs_wavelen_range[0], 0.0); diff --git a/source/module_lr/AX/AX.h b/source/module_lr/AX/AX.h index 360a4c9cec..36a6d5aebd 100644 --- a/source/module_lr/AX/AX.h +++ b/source/module_lr/AX/AX.h @@ -13,13 +13,13 @@ namespace LR const psi::Psi& c, const int& nocc, const int& nvirt, - psi::Psi& AX_istate); + double* AX_istate); void cal_AX_blas( const std::vector& V_istate, const psi::Psi& c, const int& nocc, const int& nvirt, - psi::Psi& AX_istate, + double* AX_istate, const bool add_on = true); #ifdef __MPI void cal_AX_pblas( @@ -27,11 +27,11 @@ namespace LR const Parallel_2D& pmat, const psi::Psi& c, const Parallel_2D& pc, - int naos, - int nocc, - int nvirt, - Parallel_2D& pX, - psi::Psi& AX_istate, + const int& naos, + const int& nocc, + const int& nvirt, + const Parallel_2D& pX, + double* AX_istate, const bool add_on=true); #endif // complex @@ -40,13 +40,13 @@ namespace LR const psi::Psi>& c, const int& nocc, const int& nvirt, - psi::Psi>& AX_istate); + std::complex* AX_istate); void cal_AX_blas( const std::vector& V_istate, const psi::Psi>& c, const int& nocc, const int& nvirt, - psi::Psi>& AX_istate, + std::complex* AX_istate, const bool add_on = true); #ifdef __MPI @@ -55,11 +55,11 @@ namespace LR const Parallel_2D& pmat, const psi::Psi>& c, const Parallel_2D& pc, - int naos, - int nocc, - int nvirt, - Parallel_2D& pX, - psi::Psi>& AX_istate, + const int& naos, + const int& nocc, + const int& nvirt, + const Parallel_2D& pX, + std::complex* AX_istate, const bool add_on = true); #endif } \ No newline at end of file diff --git a/source/module_lr/AX/AX_parallel.cpp b/source/module_lr/AX/AX_parallel.cpp index 2b390e431a..221962416e 100644 --- a/source/module_lr/AX/AX_parallel.cpp +++ b/source/module_lr/AX/AX_parallel.cpp @@ -14,20 +14,17 @@ namespace LR const Parallel_2D& pmat, const psi::Psi& c, const Parallel_2D& pc, - int naos, - int nocc, - int nvirt, - Parallel_2D& pX, - psi::Psi& AX_istate, + const int& naos, + const int& nocc, + const int& nvirt, + const Parallel_2D& pX, + double* AX_istate, const bool add_on) { ModuleBase::TITLE("hamilt_lrtd", "cal_AX_pblas"); - assert(pmat.comm() == pc.comm()); - assert(pmat.blacs_ctxt == pc.blacs_ctxt); - - if (pX.comm() != pmat.comm() || pX.blacs_ctxt != pmat.blacs_ctxt) - LR_Util::setup_2d_division(pX, pmat.get_block_size(), nvirt, nocc, pmat.blacs_ctxt); - else assert(pX.get_local_size() > 0 && AX_istate.get_nbasis() == pX.get_local_size()); + assert(pmat.comm() == pc.comm() && pmat.comm() == pX.comm()); + assert(pmat.blacs_ctxt == pc.blacs_ctxt && pmat.blacs_ctxt == pX.blacs_ctxt); + assert(pX.get_local_size() > 0); const int nks = V_istate.size(); @@ -35,7 +32,7 @@ namespace LR LR_Util::setup_2d_division(pVc, pmat.get_block_size(), naos, nocc, pmat.blacs_ctxt); for (int isk = 0;isk < nks;++isk) { - AX_istate.fix_k(isk); + const int ax_start = isk * pX.get_local_size(); c.fix_k(isk); //Vc @@ -60,7 +57,7 @@ namespace LR pdgemm_(&transa, &transb, &nvirt, &nocc, &naos, &alpha, c.get_pointer(), &i1, &ivirt, pc.desc, Vc.data(), &i1, &i1, pVc.desc, - &beta, AX_istate.get_pointer(), &i1, &i1, pX.desc); + &beta, AX_istate + ax_start, &i1, &i1, pX.desc); } } @@ -70,20 +67,17 @@ namespace LR const Parallel_2D& pmat, const psi::Psi>& c, const Parallel_2D& pc, - int naos, - int nocc, - int nvirt, - Parallel_2D& pX, - psi::Psi>& AX_istate, + const int& naos, + const int& nocc, + const int& nvirt, + const Parallel_2D& pX, + std::complex* AX_istate, const bool add_on) { ModuleBase::TITLE("hamilt_lrtd", "cal_AX_plas"); - assert(pmat.comm() == pc.comm()); - assert(pmat.blacs_ctxt == pc.blacs_ctxt); - - if (pX.comm() != pmat.comm() || pX.blacs_ctxt != pmat.blacs_ctxt) - LR_Util::setup_2d_division(pX, pmat.get_block_size(), nvirt, nocc, pmat.blacs_ctxt); - else assert(pX.get_local_size() > 0 && AX_istate.get_nbasis() == pX.get_local_size()); + assert(pmat.comm() == pc.comm() && pmat.comm() == pX.comm()); + assert(pmat.blacs_ctxt == pc.blacs_ctxt && pmat.blacs_ctxt == pX.blacs_ctxt); + assert(pX.get_local_size() > 0); const int nks = V_istate.size(); @@ -91,7 +85,7 @@ namespace LR LR_Util::setup_2d_division(pVc, pmat.get_block_size(), naos, nocc, pmat.blacs_ctxt); for (int isk = 0;isk < nks;++isk) { - AX_istate.fix_k(isk); + const int ax_start = isk * pX.get_local_size(); c.fix_k(isk); //Vc @@ -116,7 +110,7 @@ namespace LR pzgemm_(&transa, &transb, &nvirt, &nocc, &naos, &alpha, c.get_pointer(), &i1, &ivirt, pc.desc, Vc.data>(), &i1, &i1, pVc.desc, - &beta, AX_istate.get_pointer(), &i1, &i1, pX.desc); + &beta, AX_istate + ax_start, &i1, &i1, pX.desc); } } } diff --git a/source/module_lr/AX/AX_serial.cpp b/source/module_lr/AX/AX_serial.cpp index 019fd45928..abe990e5ff 100644 --- a/source/module_lr/AX/AX_serial.cpp +++ b/source/module_lr/AX/AX_serial.cpp @@ -9,18 +9,17 @@ namespace LR const psi::Psi& c, const int& nocc, const int& nvirt, - psi::Psi& AX_istate) + double* AX_istate) { ModuleBase::TITLE("hamilt_lrtd", "cal_AX_forloop"); const int nks = V_istate.size(); int naos = c.get_nbasis(); - AX_istate.fix_k(0); - ModuleBase::GlobalFunc::ZEROS(AX_istate.get_pointer(), nks * nocc * nvirt); + ModuleBase::GlobalFunc::ZEROS(AX_istate, nks * nocc * nvirt); for (int isk = 0;isk < nks;++isk) { c.fix_k(isk); - AX_istate.fix_k(isk); + const int ax_start = isk * nocc * nvirt; for (int i = 0;i < nocc;++i) { for (int a = 0;a < nvirt;++a) @@ -29,7 +28,7 @@ namespace LR { for (int mu = 0;mu < naos;++mu) { - AX_istate(i * nvirt + a) += c(nocc + a, mu) * V_istate[isk].data()[nu * naos + mu] * c(i, nu); + AX_istate[ax_start + i * nvirt + a] += c(nocc + a, mu) * V_istate[isk].data()[nu * naos + mu] * c(i, nu); } } } @@ -41,18 +40,17 @@ namespace LR const psi::Psi>& c, const int& nocc, const int& nvirt, - psi::Psi>& AX_istate) + std::complex* AX_istate) { ModuleBase::TITLE("hamilt_lrtd", "cal_AX_forloop"); const int nks = V_istate.size(); int naos = c.get_nbasis(); - AX_istate.fix_k(0); - ModuleBase::GlobalFunc::ZEROS(AX_istate.get_pointer(), nks * nocc * nvirt); + ModuleBase::GlobalFunc::ZEROS(AX_istate, nks * nocc * nvirt); for (int isk = 0;isk < nks;++isk) { c.fix_k(isk); - AX_istate.fix_k(isk); + const int ax_start = isk * nocc * nvirt; for (int i = 0;i < nocc;++i) { for (int a = 0;a < nvirt;++a) @@ -61,7 +59,7 @@ namespace LR { for (int mu = 0;mu < naos;++mu) { - AX_istate(i * nvirt + a) += std::conj(c(nocc + a, mu)) * V_istate[isk].data>()[nu * naos + mu] * c(i, nu); + AX_istate[ax_start + i * nvirt + a] += std::conj(c(nocc + a, mu)) * V_istate[isk].data>()[nu * naos + mu] * c(i, nu); } } } @@ -74,7 +72,7 @@ namespace LR const psi::Psi& c, const int& nocc, const int& nvirt, - psi::Psi& AX_istate, + double* AX_istate, const bool add_on) { ModuleBase::TITLE("hamilt_lrtd", "cal_AX_blas"); @@ -84,7 +82,7 @@ namespace LR for (int isk = 0;isk < nks;++isk) { c.fix_k(isk); - AX_istate.fix_k(isk); + const int ax_start = isk * nocc * nvirt; // Vc[naos*nocc] container::Tensor Vc(DAT::DT_DOUBLE, DEV::CpuDevice, { nocc, naos });// (Vc)^T @@ -101,7 +99,7 @@ namespace LR //AX_istate=c^TVc (nvirt major) dgemm_(&transa, &transb, &nvirt, &nocc, &naos, &alpha, c.get_pointer(nocc), &naos, Vc.data(), &naos, &beta, - AX_istate.get_pointer(), &nvirt); + AX_istate + ax_start, &nvirt); } } void cal_AX_blas( @@ -109,7 +107,7 @@ namespace LR const psi::Psi>& c, const int& nocc, const int& nvirt, - psi::Psi>& AX_istate, + std::complex* AX_istate, const bool add_on) { ModuleBase::TITLE("hamilt_lrtd", "cal_AX_blas"); @@ -119,7 +117,7 @@ namespace LR for (int isk = 0;isk < nks;++isk) { c.fix_k(isk); - AX_istate.fix_k(isk); + const int ax_start = isk * nocc * nvirt; // Vc[naos*nocc] (V is hermitian) container::Tensor Vc(DAT::DT_COMPLEX_DOUBLE, DEV::CpuDevice, { nocc, naos });// (Vc)^T @@ -136,7 +134,7 @@ namespace LR //AX_istate=c^\dagger Vc (nvirt major) zgemm_(&transa, &transb, &nvirt, &nocc, &naos, &alpha, c.get_pointer(nocc), &naos, Vc.data>(), &naos, &beta, - AX_istate.get_pointer(), &nvirt); + AX_istate + ax_start, &nvirt); } } } \ No newline at end of file diff --git a/source/module_lr/AX/test/AX_test.cpp b/source/module_lr/AX/test/AX_test.cpp index 92ed30f7e9..d7b4b5115e 100644 --- a/source/module_lr/AX/test/AX_test.cpp +++ b/source/module_lr/AX/test/AX_test.cpp @@ -4,6 +4,7 @@ #include "module_lr/utils/lr_util.h" +#define rand01 (static_cast(rand()) / static_cast(RAND_MAX) - 0.5 ) struct matsize { int nks = 1; @@ -23,7 +24,7 @@ class AXTest : public testing::Test std::vector sizes{ // {2, 3, 2, 1}, {2, 13, 7, 4}, - {2, 14, 8, 5} + // {2, 14, 8, 5} }; int nstate = 2; std::ofstream ofs_running; @@ -41,18 +42,12 @@ class AXTest : public testing::Test } #endif - void set_ones(double* data, int size) { for (int i = 0;i < size;++i) { data[i] = 1.0; -}}; - void set_int(double* data, int size) { for (int i = 0;i < size;++i) { data[i] = static_cast(i + 1); -}}; - void set_int(std::complex* data, int size) { for (int i = 0;i < size;++i) { data[i] = std::complex(i + 1, -i - 1); -}}; - void set_rand(double* data, int size) { for (int i = 0;i < size;++i) { data[i] = double(rand()) / double(RAND_MAX) * 10.0 - 5.0; -}}; - void set_rand(std::complex* data, int size) { for (int i = 0;i < size;++i) { data[i] = std::complex(rand(), rand()) / double(RAND_MAX) * 10.0 - 5.0; -}}; - void check_eq(double* data1, double* data2, int size) { for (int i = 0;i < size;++i) { EXPECT_NEAR(data1[i], data2[i], 1e-10); -}}; + void set_ones(double* data, int size) { for (int i = 0;i < size;++i) { data[i] = 1.0; } }; + void set_int(double* data, int size) { for (int i = 0;i < size;++i) { data[i] = static_cast(i + 1); } }; + void set_int(std::complex* data, int size) { for (int i = 0;i < size;++i) { data[i] = std::complex(i + 1, -i - 1); } }; + void set_rand(double* data, int size) { for (int i = 0;i < size;++i) { data[i] = rand01 * 10; } }; + void set_rand(std::complex* data, int size) { for (int i = 0;i < size;++i) { data[i] = std::complex(rand01 * 10, rand01 * 10); } }; + void check_eq(double* data1, double* data2, int size) { for (int i = 0;i < size;++i) { EXPECT_NEAR(data1[i], data2[i], 1e-10); } }; void check_eq(std::complex* data1, std::complex* data2, int size) { for (int i = 0;i < size;++i) @@ -75,17 +70,12 @@ TEST_F(AXTest, DoubleSerial) { psi::Psi c(s.nks, s.nocc + s.nvirt, s.naos); std::vector V(s.nks, container::Tensor(DAT::DT_DOUBLE, DEV::CpuDevice, { s.naos, s.naos })); - set_rand(c.get_pointer(), size_c); - for (auto& v : V) {set_rand(v.data(), size_v); -} - AX_for.fix_b(istate); - AX_blas.fix_b(istate); - LR::cal_AX_forloop_serial(V, c, s.nocc, s.nvirt, AX_for); - LR::cal_AX_blas(V, c, s.nocc, s.nvirt, AX_blas, false); + set_rand(&c(0, 0, 0), size_c); + for (auto& v : V) { set_rand(v.data(), size_v); } + LR::cal_AX_forloop_serial(V, c, s.nocc, s.nvirt, &AX_for(istate, 0, 0)); + LR::cal_AX_blas(V, c, s.nocc, s.nvirt, &AX_blas(istate, 0, 0), false); } - AX_for.fix_b(0); - AX_blas.fix_b(0); - check_eq(AX_for.get_pointer(), AX_blas.get_pointer(), nstate * s.nks * s.nocc * s.nvirt); + check_eq(&AX_for(0, 0, 0), &AX_blas(0, 0, 0), nstate * s.nks * s.nocc * s.nvirt); } } @@ -101,17 +91,12 @@ TEST_F(AXTest, ComplexSerial) { psi::Psi> c(s.nks, s.nocc + s.nvirt, s.naos); std::vector V(s.nks, container::Tensor(DAT::DT_COMPLEX_DOUBLE, DEV::CpuDevice, { s.naos, s.naos })); - set_rand(c.get_pointer(), size_c); - for (auto& v : V) {set_rand(v.data>(), size_v); -} - AX_for.fix_b(istate); - AX_blas.fix_b(istate); - LR::cal_AX_forloop_serial(V, c, s.nocc, s.nvirt, AX_for); - LR::cal_AX_blas(V, c, s.nocc, s.nvirt, AX_blas, false); + set_rand(&c(0, 0, 0), size_c); + for (auto& v : V) { set_rand(v.data>(), size_v); } + LR::cal_AX_forloop_serial(V, c, s.nocc, s.nvirt, &AX_for(istate, 0, 0)); + LR::cal_AX_blas(V, c, s.nocc, s.nvirt, &AX_blas(istate, 0, 0), false); } - AX_for.fix_b(0); - AX_blas.fix_b(0); - check_eq(AX_for.get_pointer(), AX_blas.get_pointer(), nstate * s.nks * s.nocc * s.nvirt); + check_eq(&AX_for(0, 0, 0), &AX_blas(0, 0, 0), nstate * s.nks * s.nocc * s.nvirt); } } #ifdef __MPI @@ -135,25 +120,20 @@ TEST_F(AXTest, DoubleParallel) EXPECT_GE(s.nvirt, px.dim0); EXPECT_GE(s.nocc, px.dim1); EXPECT_GE(s.naos, pc.dim0); - psi::Psi AX_pblas_loc(s.nks, nstate, px.get_local_size()); + psi::Psi AX_pblas_loc(s.nks, nstate, px.get_local_size(), nullptr, false); psi::Psi AX_gather(s.nks, nstate, s.nocc * s.nvirt, nullptr, false); for (int istate = 0;istate < nstate;++istate) { for (int isk = 0;isk < s.nks;++isk) { set_rand(V.at(isk).data(), pV.get_local_size()); - c.fix_k(isk); - set_rand(c.get_pointer(), pc.get_local_size()); + set_rand(&c(isk, 0, 0), pc.get_local_size()); } - AX_pblas_loc.fix_b(istate); - AX_gather.fix_b(istate); - LR::cal_AX_pblas(V, pV, c, pc, s.naos, s.nocc, s.nvirt, px, AX_pblas_loc, false); + LR::cal_AX_pblas(V, pV, c, pc, s.naos, s.nocc, s.nvirt, px, &AX_pblas_loc(istate, 0, 0), false); // gather AX and output for (int isk = 0;isk < s.nks;++isk) { - AX_pblas_loc.fix_k(isk); - AX_gather.fix_k(isk); - LR_Util::gather_2d_to_full(px, AX_pblas_loc.get_pointer(), AX_gather.get_pointer(), false/*pblas: row first*/, s.nvirt, s.nocc); + LR_Util::gather_2d_to_full(px, &AX_pblas_loc(istate, isk, 0), &AX_gather(istate, isk, 0), false/*pblas: row first*/, s.nvirt, s.nocc); } // compare to global AX std::vector V_full(s.nks, container::Tensor(DAT::DT_DOUBLE, DEV::CpuDevice, { s.naos, s.naos })); @@ -161,17 +141,13 @@ TEST_F(AXTest, DoubleParallel) for (int isk = 0;isk < s.nks;++isk) { LR_Util::gather_2d_to_full(pV, V.at(isk).data(), V_full.at(isk).data(), false, s.naos, s.naos); - c.fix_k(isk); - c_full.fix_k(isk); - LR_Util::gather_2d_to_full(pc, c.get_pointer(), c_full.get_pointer(), false, s.naos, s.nocc + s.nvirt); + LR_Util::gather_2d_to_full(pc, &c(isk, 0, 0), &c_full(isk, 0, 0), false, s.naos, s.nocc + s.nvirt); } if (my_rank == 0) { psi::Psi AX_full_istate(s.nks, 1, s.nocc * s.nvirt, nullptr, false); - LR::cal_AX_blas(V_full, c_full, s.nocc, s.nvirt, AX_full_istate, false); - AX_full_istate.fix_b(0); - AX_gather.fix_b(istate); - check_eq(AX_full_istate.get_pointer(), AX_gather.get_pointer(), s.nks * s.nocc * s.nvirt); + LR::cal_AX_blas(V_full, c_full, s.nocc, s.nvirt, &AX_full_istate(0, 0, 0), false); + check_eq(&AX_full_istate(0, 0, 0), &AX_gather(istate, 0, 0), s.nks * s.nocc * s.nvirt); } } } @@ -191,26 +167,21 @@ TEST_F(AXTest, ComplexParallel) Parallel_2D px; LR_Util::setup_2d_division(px, s.nb, s.nvirt, s.nocc, pV.blacs_ctxt); - psi::Psi> AX_pblas_loc(s.nks, nstate, px.get_local_size()); + psi::Psi> AX_pblas_loc(s.nks, nstate, px.get_local_size(), nullptr, false); psi::Psi> AX_gather(s.nks, nstate, s.nocc * s.nvirt, nullptr, false); for (int istate = 0;istate < nstate;++istate) { for (int isk = 0;isk < s.nks;++isk) { set_rand(V.at(isk).data>(), pV.get_local_size()); - c.fix_k(isk); - set_rand(c.get_pointer(), pc.get_local_size()); + set_rand(&c(isk, 0, 0), pc.get_local_size()); } - AX_pblas_loc.fix_b(istate); - AX_gather.fix_b(istate); - LR::cal_AX_pblas(V, pV, c, pc, s.naos, s.nocc, s.nvirt, px, AX_pblas_loc, false); + LR::cal_AX_pblas(V, pV, c, pc, s.naos, s.nocc, s.nvirt, px, &AX_pblas_loc(istate, 0, 0), false); // gather AX and output for (int isk = 0;isk < s.nks;++isk) { - AX_pblas_loc.fix_k(isk); - AX_gather.fix_k(isk); - LR_Util::gather_2d_to_full(px, AX_pblas_loc.get_pointer(), AX_gather.get_pointer(), false/*pblas: row first*/, s.nvirt, s.nocc); + LR_Util::gather_2d_to_full(px, &AX_pblas_loc(istate, isk, 0), &AX_gather(istate, isk, 0), false/*pblas: row first*/, s.nvirt, s.nocc); } // compare to global AX std::vector V_full(s.nks, container::Tensor(DAT::DT_COMPLEX_DOUBLE, DEV::CpuDevice, { s.naos, s.naos })); @@ -218,17 +189,13 @@ TEST_F(AXTest, ComplexParallel) for (int isk = 0;isk < s.nks;++isk) { LR_Util::gather_2d_to_full(pV, V.at(isk).data>(), V_full.at(isk).data>(), false, s.naos, s.naos); - c.fix_k(isk); - c_full.fix_k(isk); - LR_Util::gather_2d_to_full(pc, c.get_pointer(), c_full.get_pointer(), false, s.naos, s.nocc + s.nvirt); + LR_Util::gather_2d_to_full(pc, &c(isk, 0, 0), &c_full(isk, 0, 0), false, s.naos, s.nocc + s.nvirt); } if (my_rank == 0) { psi::Psi> AX_full_istate(s.nks, 1, s.nocc * s.nvirt, nullptr, false); - LR::cal_AX_blas(V_full, c_full, s.nocc, s.nvirt, AX_full_istate, false); - AX_full_istate.fix_b(0); - AX_gather.fix_b(istate); - check_eq(AX_full_istate.get_pointer(), AX_gather.get_pointer(), s.nks * s.nocc * s.nvirt); + LR::cal_AX_blas(V_full, c_full, s.nocc, s.nvirt, &AX_full_istate(0, 0, 0), false); + check_eq(&AX_full_istate(0, 0, 0), &AX_gather(istate, 0, 0), s.nks * s.nocc * s.nvirt); } } } diff --git a/source/module_lr/CMakeLists.txt b/source/module_lr/CMakeLists.txt index 12743eef8f..4b816f09b3 100644 --- a/source/module_lr/CMakeLists.txt +++ b/source/module_lr/CMakeLists.txt @@ -15,7 +15,6 @@ if(ENABLE_LCAO) operator_casida/operator_lr_hxc.cpp operator_casida/operator_lr_exx.cpp potentials/pot_hxc_lrtd.cpp - hsolver_lrtd.cpp lr_spectrum.cpp hamilt_casida.cpp esolver_lrtd_lcao.cpp) diff --git a/source/module_lr/dm_trans/dm_trans.h b/source/module_lr/dm_trans/dm_trans.h index a3fdee2938..283d22149c 100644 --- a/source/module_lr/dm_trans/dm_trans.h +++ b/source/module_lr/dm_trans/dm_trans.h @@ -14,14 +14,14 @@ namespace LR /// \f[ \tilde{\rho}_{\mu_j\mu_b}=\sum_{jb}c_{j,\mu_j}X_{jb}c^*_{b,\mu_b} \f] template std::vector cal_dm_trans_pblas( - const psi::Psi& X_istate, + const T* X_istate, const Parallel_2D& px, const psi::Psi& c, const Parallel_2D& pc, const int naos, const int nocc, const int nvirt, - Parallel_2D& pmat, + const Parallel_2D& pmat, const bool renorm_k = true, const int nspin = 1); #endif @@ -29,7 +29,7 @@ namespace LR /// @brief calculate the 2d-block transition density matrix in AO basis using ?gemm template std::vector cal_dm_trans_blas( - const psi::Psi& X_istate, + const T* X_istate, const psi::Psi& c, const int& nocc, const int& nvirt, const bool renorm_k = true, @@ -39,7 +39,7 @@ namespace LR /// @brief calculate the 2d-block transition density matrix in AO basis using for loop (for test) template std::vector cal_dm_trans_forloop_serial( - const psi::Psi& X_istate, + const T* X_istate, const psi::Psi& c, const int& nocc, const int& nvirt, const bool renorm_k = true, diff --git a/source/module_lr/dm_trans/dm_trans_parallel.cpp b/source/module_lr/dm_trans/dm_trans_parallel.cpp index 68123bf74c..a21a95c8ee 100644 --- a/source/module_lr/dm_trans/dm_trans_parallel.cpp +++ b/source/module_lr/dm_trans/dm_trans_parallel.cpp @@ -10,36 +10,30 @@ namespace LR // c: nao*nbands in para2d, nbands*nao in psi (row-para and constructed: nao) // X: nvirt*nocc in para2d, nocc*nvirt in psi (row-para and constructed: nvirt) template <> -std::vector cal_dm_trans_pblas(const psi::Psi& X_istate, - const Parallel_2D& px, - const psi::Psi& c, - const Parallel_2D& pc, - const int naos, - const int nocc, - const int nvirt, - Parallel_2D& pmat, - const bool renorm_k, - const int nspin) +std::vector cal_dm_trans_pblas(const double* X_istate, + const Parallel_2D& px, + const psi::Psi& c, + const Parallel_2D& pc, + const int naos, + const int nocc, + const int nvirt, + const Parallel_2D& pmat, + const bool renorm_k, + const int nspin) { ModuleBase::TITLE("hamilt_lrtd", "cal_dm_trans_pblas"); - assert(px.comm() == pc.comm()); - assert(px.blacs_ctxt == pc.blacs_ctxt); + assert(px.comm() == pc.comm() && px.comm() == pmat.comm()); + assert(px.blacs_ctxt == pc.blacs_ctxt && px.blacs_ctxt == pmat.blacs_ctxt); + assert(pmat.get_local_size() > 0); - if (pmat.comm() != px.comm() || pmat.blacs_ctxt != px.blacs_ctxt) { - LR_Util::setup_2d_division(pmat, px.get_block_size(), naos, naos, px.blacs_ctxt); - } else { - assert(pmat.get_local_size() > 0); -} - - const int nks = X_istate.get_nk(); + const int nks = c.get_nk(); - std::vector dm_trans( - nks, - container::Tensor(DAT::DT_DOUBLE, DEV::CpuDevice, {pmat.get_col_size(), pmat.get_row_size()})); + std::vector dm_trans(nks, + container::Tensor(DAT::DT_DOUBLE, DEV::CpuDevice, { pmat.get_col_size(), pmat.get_row_size() })); for (int isk = 0; isk < nks; ++isk) { c.fix_k(isk); - X_istate.fix_k(isk); + const int x_start = isk * px.get_local_size(); int i1 = 1; int ivirt = nocc + 1; char transa = 'N'; @@ -54,79 +48,43 @@ std::vector cal_dm_trans_pblas(const psi::Psi& X_ista DEV::CpuDevice, {pXc.get_col_size(), pXc.get_row_size()}); // row is "inside"(memory contiguity) for pblas Xc.zero(); - pdgemm_(&transa, - &transb, - &naos, - &nvirt, - &nocc, - &alpha, - c.get_pointer(), - &i1, - &i1, - pc.desc, - X_istate.get_pointer(), - &i1, - &i1, - px.desc, - &beta, - Xc.data(), - &i1, - &i1, - pXc.desc); + pdgemm_(&transa, &transb, &naos, &nvirt, &nocc, + &alpha, c.get_pointer(), &i1, &i1, pc.desc, + X_istate + x_start, &i1, &i1, px.desc, + &beta, Xc.data(), &i1, &i1, pXc.desc); // 2. C_virt*[X*C_occ^T] - pdgemm_(&transa, - &transb, - &naos, - &naos, - &nvirt, - &alpha, - c.get_pointer(), - &i1, - &ivirt, - pc.desc, - Xc.data(), - &i1, - &i1, - pXc.desc, - &beta, - dm_trans[isk].data(), - &i1, - &i1, - pmat.desc); + pdgemm_(&transa, &transb, &naos, &naos, &nvirt, + &alpha, c.get_pointer(), &i1, &ivirt, pc.desc, + Xc.data(), &i1, &i1, pXc.desc, + &beta, dm_trans[isk].data(), &i1, &i1, pmat.desc); } return dm_trans; } template <> -std::vector cal_dm_trans_pblas(const psi::Psi>& X_istate, - const Parallel_2D& px, - const psi::Psi>& c, - const Parallel_2D& pc, - const int naos, - const int nocc, - const int nvirt, - Parallel_2D& pmat, - const bool renorm_k, - const int nspin) +std::vector cal_dm_trans_pblas(const std::complex* X_istate, + const Parallel_2D& px, + const psi::Psi>& c, + const Parallel_2D& pc, + const int naos, + const int nocc, + const int nvirt, + const Parallel_2D& pmat, + const bool renorm_k, + const int nspin) { ModuleBase::TITLE("hamilt_lrtd", "cal_dm_trans_pblas"); - assert(px.comm() == pc.comm()); - assert(px.blacs_ctxt == pc.blacs_ctxt); - - if (pmat.comm() != px.comm() || pmat.blacs_ctxt != px.blacs_ctxt) { - LR_Util::setup_2d_division(pmat, px.get_block_size(), naos, naos, px.blacs_ctxt); - } else { - assert(pmat.get_local_size() > 0); -} - const int nks = X_istate.get_nk(); + assert(px.comm() == pc.comm() && px.comm() == pmat.comm()); + assert(px.blacs_ctxt == pc.blacs_ctxt && px.blacs_ctxt == pmat.blacs_ctxt); + assert(pmat.get_local_size() > 0); + const int nks = c.get_nk(); - std::vector dm_trans( - nks, + std::vector dm_trans(nks, container::Tensor(DAT::DT_COMPLEX_DOUBLE, DEV::CpuDevice, {pmat.get_col_size(), pmat.get_row_size()})); for (int isk = 0; isk < nks; ++isk) { c.fix_k(isk); - X_istate.fix_k(isk); + const int x_start = isk * px.get_local_size(); int i1 = 1; int ivirt = nocc + 1; @@ -141,7 +99,7 @@ std::vector cal_dm_trans_pblas(const psi::Psi beta(0.0, 0.0); // pzgemm_(&transa, &transb, &naos, &nvirt, &nocc, // &alpha, c.get_pointer(), &i1, &i1, pc.desc, - // X_istate.get_pointer(), &i1, &i1, px.desc, + // X_istate + x_start, &i1, &i1, px.desc, // &beta, Xc.data>(), &i1, &i1, pXc.desc); // // 2. C_virt*[X*C_occ^\dagger] @@ -163,48 +121,18 @@ std::vector cal_dm_trans_pblas(const psi::Psi alpha(1.0, 0.0); const std::complex beta(0.0, 0.0); - pzgemm_(&transa, - &transb, - &nvirt, - &naos, - &nocc, - &alpha, - X_istate.get_pointer(), - &i1, - &i1, - px.desc, - c.get_pointer(), - &i1, - &i1, - pc.desc, - &beta, - Xc.data>(), - &i1, - &i1, - pXc.desc); + pzgemm_(&transa, &transb, &nvirt, &naos, &nocc, &alpha, + X_istate + x_start, &i1, &i1, px.desc, + c.get_pointer(), &i1, &i1, pc.desc, + &beta, Xc.data>(), &i1, &i1, pXc.desc); // 2. [X*C_occ^\dagger]^TC_virt^T alpha.real(renorm_k ? 1.0 / static_cast(nks) : 1.0); transa = transb = 'T'; - pzgemm_(&transa, - &transb, - &naos, - &naos, - &nvirt, - &alpha, - Xc.data>(), - &i1, - &i1, - pXc.desc, - c.get_pointer(), - &i1, - &ivirt, - pc.desc, - &beta, - dm_trans[isk].data>(), - &i1, - &i1, - pmat.desc); + pzgemm_(&transa, &transb, &naos, &naos, &nvirt, + &alpha, Xc.data>(), &i1, &i1, pXc.desc, + c.get_pointer(), &i1, &ivirt, pc.desc, + &beta, dm_trans[isk].data>(), &i1, &i1, pmat.desc); } return dm_trans; } diff --git a/source/module_lr/dm_trans/dm_trans_serial.cpp b/source/module_lr/dm_trans/dm_trans_serial.cpp index cbdd63a6c8..70dc6e6542 100644 --- a/source/module_lr/dm_trans/dm_trans_serial.cpp +++ b/source/module_lr/dm_trans/dm_trans_serial.cpp @@ -6,7 +6,7 @@ namespace LR { template<> std::vector cal_dm_trans_forloop_serial( - const psi::Psi& X_istate, + const double* X_istate, const psi::Psi& c, const int& nocc, const int& nvirt, @@ -15,16 +15,15 @@ namespace LR { // cxc_out_test(X_istate, c); ModuleBase::TITLE("hamilt_lrtd", "cal_dm_trans_forloop"); - const int nks = X_istate.get_nk(); - assert(nocc * nvirt == X_istate.get_nbasis()); - int naos = c.get_nbasis(); + const int nks = c.get_nk(); + const int naos = c.get_nbasis(); std::vector dm_trans(nks, container::Tensor(DAT::DT_DOUBLE, DEV::CpuDevice, { naos, naos })); for (auto& dm : dm_trans)ModuleBase::GlobalFunc::ZEROS(dm.data(), naos * naos); // loop for AOs for (size_t isk = 0;isk < nks;++isk) { c.fix_k(isk); - X_istate.fix_k(isk); + const int x_start = isk * nocc * nvirt; for (size_t mu = 0;mu < naos;++mu) { for (size_t nu = 0;nu < naos;++nu) @@ -33,7 +32,7 @@ namespace LR for (size_t j = 0;j < nocc;++j) { for (size_t b = 0; b < nvirt;++b) - dm_trans[isk].data()[mu * naos + nu] += c(j, mu) * X_istate(j * nvirt + b) * c(nocc + b, nu); + dm_trans[isk].data()[mu * naos + nu] += c(j, mu) * X_istate[x_start + j * nvirt + b] * c(nocc + b, nu); } } } @@ -42,7 +41,7 @@ namespace LR } template<> std::vector cal_dm_trans_forloop_serial( - const psi::Psi>& X_istate, + const std::complex* X_istate, const psi::Psi>& c, const int& nocc, const int& nvirt, @@ -51,8 +50,7 @@ namespace LR { // cxc_out_test(X_istate, c); ModuleBase::TITLE("hamilt_lrtd", "cal_dm_trans_forloop"); - const int nks = X_istate.get_nk(); - assert(nocc * nvirt == X_istate.get_nbasis()); + const int nks = c.get_nk(); int naos = c.get_nbasis(); std::vector dm_trans(nks, container::Tensor(DAT::DT_COMPLEX_DOUBLE, DEV::CpuDevice, { naos, naos })); for (auto& dm : dm_trans)ModuleBase::GlobalFunc::ZEROS(dm.data>(), naos * naos); @@ -60,7 +58,7 @@ namespace LR for (size_t isk = 0;isk < nks;++isk) { c.fix_k(isk); - X_istate.fix_k(isk); + const int x_start = isk * nocc * nvirt; for (size_t mu = 0;mu < naos;++mu) { for (size_t nu = 0;nu < naos;++nu) @@ -70,7 +68,7 @@ namespace LR { for (size_t b = 0; b < nvirt;++b) dm_trans[isk].data>()[nu * naos + mu] += - std::conj(c(j, mu)) * X_istate(j * nvirt + b) * c(nocc + b, nu) / static_cast(nks / nspin); + std::conj(c(j, mu)) * X_istate[x_start + j * nvirt + b] * c(nocc + b, nu) / static_cast(nks / nspin); } } } @@ -80,7 +78,7 @@ namespace LR template<> std::vector cal_dm_trans_blas( - const psi::Psi& X_istate, + const double* X_istate, const psi::Psi& c, const int& nocc, const int& nvirt, @@ -88,14 +86,13 @@ namespace LR const int nspin) { ModuleBase::TITLE("hamilt_lrtd", "cal_dm_trans_blas"); - const int nks = X_istate.get_nk(); - assert(nocc * nvirt == X_istate.get_nbasis()); + const int nks = c.get_nk(); int naos = c.get_nbasis(); std::vector dm_trans(nks, container::Tensor(DAT::DT_DOUBLE, DEV::CpuDevice, { naos, naos })); for (size_t isk = 0;isk < nks;++isk) { c.fix_k(isk); - X_istate.fix_k(isk); + const int x_start = isk * nocc * nvirt; // 1. [X*C_occ^T]^T=C_occ*X^T char transa = 'N'; char transb = 'T'; @@ -103,7 +100,7 @@ namespace LR const double beta = 0.0; container::Tensor Xc(DAT::DT_DOUBLE, DEV::CpuDevice, { nvirt, naos }); dgemm_(&transa, &transb, &naos, &nvirt, &nocc, &alpha, - c.get_pointer(), &naos, X_istate.get_pointer(), &nvirt, + c.get_pointer(), &naos, X_istate + x_start, &nvirt, &beta, Xc.data(), &naos); // 2. C_virt*[X*C_occ^T] dgemm_(&transa, &transb, &naos, &naos, &nvirt, &alpha, @@ -115,7 +112,7 @@ namespace LR template<> std::vector cal_dm_trans_blas( - const psi::Psi>& X_istate, + const std::complex* X_istate, const psi::Psi>& c, const int& nocc, const int& nvirt, @@ -123,14 +120,13 @@ namespace LR const int nspin) { ModuleBase::TITLE("hamilt_lrtd", "cal_dm_trans_blas"); - const int nks = X_istate.get_nk(); - assert(nocc * nvirt == X_istate.get_nbasis()); + const int nks = c.get_nk(); int naos = c.get_nbasis(); std::vector dm_trans(nks, container::Tensor(DAT::DT_COMPLEX_DOUBLE, DEV::CpuDevice, { naos, naos })); for (size_t isk = 0;isk < nks;++isk) { c.fix_k(isk); - X_istate.fix_k(isk); + const int x_start = isk * nocc * nvirt; char transa = 'N'; char transb = 'C'; @@ -154,7 +150,7 @@ namespace LR // 1. X*C_occ^\dagger container::Tensor Xc(DAT::DT_COMPLEX_DOUBLE, DEV::CpuDevice, { naos, nvirt }); zgemm_(&transa, &transb, &nvirt, &naos, &nocc, &alpha, - X_istate.get_pointer(), &nvirt, c.get_pointer(), &naos, + X_istate + x_start, &nvirt, c.get_pointer(), &naos, &beta, Xc.data>(), &nvirt); // 2. [X*C_occ^\dagger]^TC_virt^T transa = transb = 'T'; diff --git a/source/module_lr/dm_trans/test/dm_trans_test.cpp b/source/module_lr/dm_trans/test/dm_trans_test.cpp index 4e0a034f20..212db64cc8 100644 --- a/source/module_lr/dm_trans/test/dm_trans_test.cpp +++ b/source/module_lr/dm_trans/test/dm_trans_test.cpp @@ -68,8 +68,8 @@ TEST_F(DMTransTest, DoubleSerial) psi::Psi c(s.nks, s.nocc + s.nvirt, s.naos); set_rand(c.get_pointer(), size_c); X.fix_b(istate); - const std::vector& dm_for = LR::cal_dm_trans_forloop_serial(X, c, s.nocc, s.nvirt); - const std::vector& dm_blas = LR::cal_dm_trans_blas(X, c, s.nocc, s.nvirt); + const std::vector& dm_for = LR::cal_dm_trans_forloop_serial(X.get_pointer(), c, s.nocc, s.nvirt); + const std::vector& dm_blas = LR::cal_dm_trans_blas(X.get_pointer(), c, s.nocc, s.nvirt); for (int isk = 0;isk < s.nks;++isk) check_eq(dm_for[isk].data(), dm_blas[isk].data(), s.naos * s.naos); } @@ -87,8 +87,8 @@ TEST_F(DMTransTest, ComplexSerial) psi::Psi> c(s.nks, s.nocc + s.nvirt, s.naos); set_rand(c.get_pointer(), size_c); X.fix_b(istate); - const std::vector& dm_for = LR::cal_dm_trans_forloop_serial(X, c, s.nocc, s.nvirt); - const std::vector& dm_blas = LR::cal_dm_trans_blas(X, c, s.nocc, s.nvirt); + const std::vector& dm_for = LR::cal_dm_trans_forloop_serial(X.get_pointer(), c, s.nocc, s.nvirt); + const std::vector& dm_blas = LR::cal_dm_trans_blas(X.get_pointer(), c, s.nocc, s.nvirt); for (int isk = 0;isk < s.nks;++isk) check_eq(dm_for[isk].data>(), dm_blas[isk].data>(), s.naos * s.naos); } @@ -109,6 +109,7 @@ TEST_F(DMTransTest, DoubleParallel) LR_Util::setup_2d_division(pc, s.nb, s.naos, s.nocc + s.nvirt, px.blacs_ctxt); psi::Psi c(s.nks, pc.get_col_size(), pc.get_row_size()); Parallel_2D pmat; + LR_Util::setup_2d_division(pmat, s.nb, s.naos, s.naos, px.blacs_ctxt); EXPECT_EQ(px.dim0, pc.dim0); EXPECT_EQ(px.dim1, pc.dim1); @@ -137,7 +138,7 @@ TEST_F(DMTransTest, DoubleParallel) X.fix_b(istate); X_full.fix_b(istate); - std::vector dm_pblas_loc = LR::cal_dm_trans_pblas(X, px, c, pc, s.naos, s.nocc, s.nvirt, pmat); + std::vector dm_pblas_loc = LR::cal_dm_trans_pblas(X.get_pointer(), px, c, pc, s.naos, s.nocc, s.nvirt, pmat); // gather dm and output std::vector dm_gather(s.nks, container::Tensor(DAT::DT_DOUBLE, DEV::CpuDevice, { s.naos, s.naos })); @@ -154,7 +155,7 @@ TEST_F(DMTransTest, DoubleParallel) } if (my_rank == 0) { - const std::vector& dm_full = LR::cal_dm_trans_blas(X_full, c_full, s.nocc, s.nvirt); + const std::vector& dm_full = LR::cal_dm_trans_blas(X_full.get_pointer(), c_full, s.nocc, s.nvirt); for (int isk = 0;isk < s.nks;++isk) check_eq(dm_full[isk].data(), dm_gather[isk].data(), s.naos * s.naos); } } @@ -173,6 +174,7 @@ TEST_F(DMTransTest, ComplexParallel) LR_Util::setup_2d_division(pc, s.nb, s.naos, s.nocc + s.nvirt, px.blacs_ctxt); psi::Psi> c(s.nks, pc.get_col_size(), pc.get_row_size()); Parallel_2D pmat; + LR_Util::setup_2d_division(pmat, s.nb, s.naos, s.naos, px.blacs_ctxt); set_rand(X.get_pointer(), nstate * s.nks * px.get_local_size()); //set X and X_full psi::Psi> X_full(s.nks, nstate, s.nocc * s.nvirt, nullptr, false); // allocate X_full @@ -195,7 +197,7 @@ TEST_F(DMTransTest, ComplexParallel) X.fix_b(istate); X_full.fix_b(istate); - std::vector dm_pblas_loc = LR::cal_dm_trans_pblas(X, px, c, pc, s.naos, s.nocc, s.nvirt, pmat); + std::vector dm_pblas_loc = LR::cal_dm_trans_pblas(X.get_pointer(), px, c, pc, s.naos, s.nocc, s.nvirt, pmat); // gather dm and output std::vector dm_gather(s.nks, container::Tensor(DAT::DT_COMPLEX_DOUBLE, DEV::CpuDevice, { s.naos, s.naos })); @@ -212,7 +214,7 @@ TEST_F(DMTransTest, ComplexParallel) } if (my_rank == 0) { - std::vector dm_full = LR::cal_dm_trans_blas(X_full, c_full, s.nocc, s.nvirt); + std::vector dm_full = LR::cal_dm_trans_blas(X_full.get_pointer(), c_full, s.nocc, s.nvirt); for (int isk = 0;isk < s.nks;++isk) check_eq(dm_full[isk].data>(), dm_gather[isk].data>(), s.naos * s.naos); } } diff --git a/source/module_lr/esolver_lrtd_lcao.cpp b/source/module_lr/esolver_lrtd_lcao.cpp index f21a86d2c1..2f4f3b4b51 100644 --- a/source/module_lr/esolver_lrtd_lcao.cpp +++ b/source/module_lr/esolver_lrtd_lcao.cpp @@ -2,8 +2,9 @@ #include "utils/gint_move.hpp" #include "utils/lr_util.h" #include "hamilt_casida.h" +#include "hamilt_ulr.hpp" #include "module_lr/potentials/pot_hxc_lrtd.h" -#include "module_lr/hsolver_lrtd.h" +#include "module_lr/hsolver_lrtd.hpp" #include "module_lr/lr_spectrum.h" #include #include "module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h" @@ -67,6 +68,21 @@ inline void redirect_log(const bool& out_alllog) } } +inline int cal_nupdown_form_occ(const ModuleBase::matrix& wg) +{ // only for nspin=2 + const int& nk = wg.nr / 2; + auto occ_sum_k = [&](const int& is, const int& ib)->double { double o = 0.0; for (int ik = 0;ik < nk;++ik) { o += wg(is * nk + ik, ib); } return o;}; + int nupdown = 0; + for (int ib = 0;ib < wg.nc;++ib) + { + const int nu = static_cast(std::lround(occ_sum_k(0, ib))); + const int nd = static_cast(std::lround(occ_sum_k(1, ib))); + if ((nu + nd) == 0) { break; } + nupdown += nu - nd; + } + return nupdown; +} + template void LR::ESolver_LR::parameter_check()const { @@ -87,32 +103,25 @@ template void LR::ESolver_LR::set_dimension() { this->nspin = PARAM.inp.nspin; - if (nspin == 2) { std::cout << "** Assuming the spin-up and spin-down states are degenerate. **" << std::endl; -} this->nstates = input.lr_nstates; this->nbasis = PARAM.globalv.nlocal; // calculate the number of occupied and unoccupied states // which determines the basis size of the excited states this->nocc_max = LR_Util::cal_nocc(LR_Util::cal_nelec(ucell)); - this->nocc = std::max(1, std::min(input.nocc, this->nocc_max)); - this->nvirt = PARAM.inp.nbands - this->nocc_max; //nbands-nocc - if (input.nvirt > this->nvirt) { - GlobalV::ofs_warning << "ESolver_LR: input nvirt is too large to cover by nbands, set nvirt = nbands - nocc = " << this->nvirt << std::endl; - } else if (input.nvirt > 0) { this->nvirt = input.nvirt; -} - this->nbands = this->nocc + this->nvirt; - this->npairs = this->nocc * this->nvirt; + this->nocc_in = std::max(1, std::min(input.nocc, this->nocc_max)); + this->nvirt_in = PARAM.inp.nbands - this->nocc_max; //nbands-nocc + if (input.nvirt > this->nvirt_in) { GlobalV::ofs_warning << "ESolver_LR: input nvirt is too large to cover by nbands, set nvirt = nbands - nocc = " << this->nvirt_in << std::endl; } + else if (input.nvirt > 0) { this->nvirt_in = input.nvirt; } + this->nbands = this->nocc_in + this->nvirt_in; this->nk = this->kv.get_nks() / this->nspin; - if (this->nstates > this->nocc * this->nvirt * this->nk) { - throw std::invalid_argument("ESolver_LR: nstates > nocc*nvirt*nk"); -} - + this->nocc.resize(nspin, nocc_in); + this->nvirt.resize(nspin, nvirt_in); + for (int is = 0;is < nspin;++is) { this->npairs.push_back(nocc[is] * nvirt[is]); } GlobalV::ofs_running << "Setting LR-TDDFT parameters: " << std::endl; - GlobalV::ofs_running << "number of occupied bands: " << this->nocc << std::endl; - GlobalV::ofs_running << "number of virtual bands: " << this->nvirt << std::endl; + GlobalV::ofs_running << "number of occupied bands: " << nocc_in << std::endl; + GlobalV::ofs_running << "number of virtual bands: " << nvirt_in << std::endl; GlobalV::ofs_running << "number of Atom orbitals (LCAO-basis size): " << this->nbasis << std::endl; GlobalV::ofs_running << "number of KS bands: " << this->eig_ks.nc << std::endl; - GlobalV::ofs_running << "number of electron-hole pairs (2-particle basis size): " << this->npairs << std::endl; GlobalV::ofs_running << "number of excited states to be solved: " << this->nstates << std::endl; if (input.ri_hartree_benchmark == "aims" && !input.aims_nbasis.empty()) { @@ -121,6 +130,23 @@ void LR::ESolver_LR::set_dimension() } } +template +void LR::ESolver_LR::reset_dim_spin2() +{ + if (nspin != 2) { return; } + if (nupdown == 0) { std::cout << "** Assuming the spin-up and spin-down states are degenerate. **" << std::endl; } + else + { + this->openshell = true; + nupdown > 0 ? ((nocc[1] -= nupdown) && (nvirt[1] += nupdown)) : ((nocc[0] += nupdown) && (nvirt[0] -= nupdown)); + npairs = { nocc[0] * nvirt[0], nocc[1] * nvirt[1] }; + std::cout << "** Solve the spin-up and spin-down states separately for open-shell system. **" << std::endl; + } + for (int is : {0, 1}) { if (npairs[is] <= 0) { throw std::invalid_argument(std::string("ESolver_LR: npairs (nocc*nvirt) <= 0 for spin") + std::string(is == 0 ? "up" : "down")); } } + if (nstates > (npairs[0] + npairs[1]) * nk) { throw std::invalid_argument("ESolver_LR: nstates > nocc*nvirt*nk"); } + if (input.lr_unrestricted) { this->openshell = true; } +} + template LR::ESolver_LR::ESolver_LR(ModuleESolver::ESolver_KS_LCAO&& ks_sol, const Input_para& inp, UnitCell& ucell) @@ -171,7 +197,7 @@ LR::ESolver_LR::ESolver_LR(ModuleESolver::ESolver_KS_LCAO&& ks_sol { this->psi_ks = new psi::Psi(this->kv.get_nks(), this->paraC_.get_col_size(), this->paraC_.get_row_size()); this->eig_ks.create(this->kv.get_nks(), this->nbands); - const int start_band = this->nocc_max - this->nocc; + const int start_band = this->nocc_max - std::max(nocc[0], nocc[1]); for (int ik = 0;ik < this->kv.get_nks();++ik) { Cpxgemr2d(this->nbasis, this->nbands, &(*ks_sol.psi)(ik, 0, 0), 1, start_band + 1, ks_sol.pv.desc_wfc, @@ -182,12 +208,18 @@ LR::ESolver_LR::ESolver_LR(ModuleESolver::ESolver_KS_LCAO&& ks_sol #else move_gs(); #endif + if (nspin == 2) + { + this->nupdown = cal_nupdown_form_occ(ks_sol.pelec->wg); + reset_dim_spin2(); + } //grid integration this->gt_ = std::move(ks_sol.GridT); if (std::is_same::value) { this->gint_g_ = std::move(ks_sol.GG); } else { this->gint_k_ = std::move(ks_sol.GK); } this->set_gint(); + this->gint_->reset_DMRGint(1); // move pw basis delete this->pw_rho; // newed in ESolver_FP::ESolver_FP @@ -275,6 +307,11 @@ LR::ESolver_LR::ESolver_LR(const Input_para& inp, UnitCell& ucell) : inpu this->paraMat_.ncol_bands, this->paraMat_.get_row_size()); this->read_ks_wfc(); + if (nspin == 2) + { + this->nupdown = cal_nupdown_form_occ(this->pelec->wg); + reset_dim_spin2(); + } LR_Util::setup_2d_division(this->paraC_, 1, this->nbasis, this->nbands #ifdef __MPI @@ -313,7 +350,6 @@ LR::ESolver_LR::ESolver_LR(const Input_para& inp, UnitCell& ucell) : inpu PARAM.inp.test_atom_input); this->set_gint(); this->gint_->gridt = &this->gt_; - this->gint_->reset_DMRGint(1); // (3) Periodic condition search for each grid. double dr_uniform = 0.001; @@ -399,35 +435,63 @@ void LR::ESolver_LR::runner(int istep, UnitCell& cell) this->setup_eigenvectors_X(); this->pelec->ekb.create(nspin, this->nstates); + auto efile = [&](const std::string& label)->std::string {return PARAM.globalv.global_out_dir + "Excitation_Energy_" + label + ".dat";}; + auto vfile = [&](const std::string& label)->std::string {return PARAM.globalv.global_out_dir + "Excitation_Amplitude_" + label + "_" + std::to_string(GlobalV::MY_RANK) + ".dat";}; if (this->input.lr_solver != "spectrum") { + auto write_states = [&](const std::string& label, const Real* e, const T* v, const int& dim, const int& nst, const int& prec = 8)->void + { + if (GlobalV::MY_RANK == 0) { assert(nst == LR_Util::write_value(efile(label), prec, e, nst)); } + assert(nst * dim == LR_Util::write_value(vfile(label), prec, v, nst, dim)); + }; // allocate and initialize A matrix and density matrix - std::vector spin_type = { "Spin Singlet", "Spin Triplet" }; - for (int is = 0;is < nspin;++is) + if (openshell) { - if (nspin == 2) { std::cout << "Calculating " << spin_type[is] << " excitations" << std::endl; } - hamilt::Hamilt* phamilt = new HamiltCasidaLR(xc_kernel, nspin, this->nbasis, this->nocc, this->nvirt, this->ucell, orb_cutoff_, GlobalC::GridD, this->psi_ks, this->eig_ks, + std::cout << "Solving spin-conserving excitation for open-shell system." << std::endl; + HamiltULR hulr(xc_kernel, nspin, this->nbasis, this->nocc, this->nvirt, this->ucell, orb_cutoff_, GlobalC::GridD, *this->psi_ks, this->eig_ks, #ifdef __EXX this->exx_lri, this->exx_info.info_global.hybrid_alpha, #endif - this->gint_, this->pot[is], this->kv, & this->paraX_, & this->paraC_, & this->paraMat_, - spin_type[is], input.ri_hartree_benchmark, (input.ri_hartree_benchmark == "aims" ? input.aims_nbasis : std::vector({}))); - // solve the Casida equation - HSolverLR hsol(nk, this->npairs, is, this->input.out_wfc_lr); - hsol.set_diagethr(hsol.diag_ethr, 0, 0, std::max(1e-13, this->input.lr_thr)); - hsol.solve(phamilt, *this->X[is], this->pelec, this->input.lr_solver/*, - !std::set({ "hf", "hse" }).count(this->xc_kernel)*/); //whether the kernel is Hermitian - delete phamilt; + this->gint_, this->pot, this->kv, this->paraX_, this->paraC_, this->paraMat_); + LR::HSolver::solve(hulr, this->X[0].template data(), nloc_per_band, nstates, this->pelec->ekb.c, this->input.lr_solver, this->input.lr_thr); + if (input.out_wfc_lr) { write_states("openshell", this->pelec->ekb.c, this->X[0].template data(), nloc_per_band, nstates); } + } + else + { + auto spin_types = std::vector({ "singlet", "triplet" }); + for (int is = 0;is < nspin;++is) + { + std::cout << "Calculating " << spin_types[is] << " excitations" << std::endl; + HamiltLR hlr(xc_kernel, nspin, this->nbasis, this->nocc, this->nvirt, this->ucell, orb_cutoff_, GlobalC::GridD, *this->psi_ks, this->eig_ks, +#ifdef __EXX + this->exx_lri, this->exx_info.info_global.hybrid_alpha, +#endif + this->gint_, this->pot[is], this->kv, this->paraX_, this->paraC_, this->paraMat_, + spin_types[is], input.ri_hartree_benchmark, (input.ri_hartree_benchmark == "aims" ? input.aims_nbasis : std::vector({}))); + // solve the Casida equation + LR::HSolver::solve(hlr, this->X[is].template data(), nloc_per_band, nstates, + this->pelec->ekb.c + is * nstates, this->input.lr_solver, this->input.lr_thr/*, + !std::set({ "hf", "hse" }).count(this->xc_kernel)*/); //whether the kernel is Hermitian + if (input.out_wfc_lr) { write_states(spin_types[is], this->pelec->ekb.c + is * nstates, this->X[is].template data(), nloc_per_band, nstates); } + } } } else // read the eigenvalues { - std::ifstream ifs(PARAM.globalv.global_readin_dir + "Excitation_Energy.dat"); - std::cout << "reading the excitation energies from file: \n"; - for (int is = 0;is < nspin;++is) + auto read_states = [&](const std::string& label, Real* e, T* v, const int& dim, const int& nst)->void + { + if (GlobalV::MY_RANK == 0) { assert(nst == LR_Util::read_value(efile(label), e, nst)); } + assert(nst * dim == LR_Util::read_value(vfile(label), v, nst, dim)); + }; + std::cout << "reading the excitation amplitudes from file: \n"; + if (openshell) + { + read_states("openshell", this->pelec->ekb.c, this->X[0].template data(), nloc_per_band, nstates); + } + else { - for (int i = 0;i < this->nstates;++i) { ifs >> this->pelec->ekb(is, i); } - for (int i = 0;i < this->nstates;++i) { std::cout << this->pelec->ekb(is, i) << " "; } + auto spin_types = std::vector({ "singlet", "triplet" }); + for (int is = 0;is < nspin;++is) { read_states(spin_types[is], this->pelec->ekb.c + is * nstates, this->X[is].template data(), nloc_per_band, nstates); } } } return; @@ -441,99 +505,92 @@ void LR::ESolver_LR::after_all_runners() //cal spectrum std::vector freq(100); std::vector abs_wavelen_range({ 20, 200 });//default range - if (input.abs_wavelen_range.size() == 2 && std::abs(input.abs_wavelen_range[1] - input.abs_wavelen_range[0]) > 0.02) + if (input.abs_wavelen_range.size() >= 2 && std::abs(input.abs_wavelen_range[1] - input.abs_wavelen_range[0]) > 0.02) { abs_wavelen_range = input.abs_wavelen_range; } double lambda_diff = std::abs(abs_wavelen_range[1] - abs_wavelen_range[0]); double lambda_min = std::min(abs_wavelen_range[1], abs_wavelen_range[0]); for (int i = 0;i < freq.size();++i) { freq[i] = 91.126664 / (lambda_min + 0.01 * static_cast(i + 1) * lambda_diff); } - for (int is = 0;is < this->nspin;++is) + auto spin_types = (nspin == 2 && !openshell) ? std::vector({ "singlet", "triplet" }) : std::vector({ "updown" }); + for (int is = 0;is < this->X.size();++is) { - LR_Spectrum spectrum(&this->pelec->ekb.c[is * this->nstates], *this->X[is], - this->nspin, this->nbasis, this->nocc, this->nvirt, this->gint_, *this->pw_rho, *this->psi_ks, - this->ucell, this->kv, this->paraX_, this->paraC_, this->paraMat_); - spectrum.oscillator_strength(); - spectrum.transition_analysis(is); - spectrum.optical_absorption(freq, input.abs_broadening, is); + LR_Spectrum spectrum(nspin, this->nbasis, this->nocc, this->nvirt, this->gint_, *this->pw_rho, *this->psi_ks, + this->ucell, this->kv, GlobalC::GridD, this->orb_cutoff_, + this->paraX_, this->paraC_, this->paraMat_, + &this->pelec->ekb.c[is * nstates], this->X[is].template data(), nstates, openshell); + spectrum.transition_analysis(spin_types[is]); + spectrum.optical_absorption(freq, input.abs_broadening, spin_types[is]); } } - template void LR::ESolver_LR::setup_eigenvectors_X() { ModuleBase::TITLE("ESolver_LR", "setup_eigenvectors_X"); - LR_Util::setup_2d_division(this->paraX_, 1, this->nvirt, this->nocc + for (int is = 0;is < nspin;++is) + { + Parallel_2D px; + LR_Util::setup_2d_division(px, /*nb2d=*/1, this->nvirt[is], this->nocc[is] #ifdef __MPI - , this->paraC_.blacs_ctxt + , this->paraC_.blacs_ctxt #endif - );//nvirt - row, nocc - col - this->X.resize(this->nspin); - const std::vector spin_types = { "Spin Singlet", "Spin Triplet" }; - // if spectrum-only, read the LR-eigenstates from file and return - if (this->input.lr_solver == "spectrum") - { - std::cout << "reading the excitation amplitudes from file: \n"; - for (int is = 0; is < this->nspin; ++is) - { - this->X[is] = std::make_shared>(LR_Util::read_psi_bandfirst( - PARAM.globalv.global_readin_dir + "Excitation_Amplitude_" + spin_types[is], GlobalV::MY_RANK)); - } - } - else - { - for (int is = 0; is < this->nspin; ++is) - { - this->X[is] = std::make_shared>(this->nk, - this->nstates, - this->paraX_.get_local_size(), - nullptr, - false); // band(state)-first - this->X[is]->zero_out(); - } - set_X_initial_guess(); + );//nvirt - row, nocc - col + this->paraX_.emplace_back(std::move(px)); } + this->nloc_per_band = nk * (openshell ? paraX_[0].get_local_size() + paraX_[1].get_local_size() : paraX_[0].get_local_size()); + + this->X.resize(openshell ? 1 : nspin, LR_Util::newTensor({ nstates, nloc_per_band })); + for (auto& x : X) { x.zero(); } + + auto spin_types = (nspin == 2 && !openshell) ? std::vector({ "singlet", "triplet" }) : std::vector({ "updown" }); + // if spectrum-only, read the LR-eigenstates from file and return + if (this->input.lr_solver != "spectrum") { set_X_initial_guess(); } } template void LR::ESolver_LR::set_X_initial_guess() { // set the initial guess of X - // if (E_{lumo}-E_{homo-1} < E_{lumo+1}-E{homo}), mode = 0, else 1(smaller first) - bool ix_mode = false; //default - if (this->eig_ks.nc > nocc + 1 && nocc >= 2 && eig_ks(0, nocc) - eig_ks(0, nocc - 2) - 1e-5 > eig_ks(0, nocc + 1) - eig_ks(0, nocc - 1)) { ix_mode = true; } - GlobalV::ofs_running << "setting the initial guess of X: " << std::endl; - if (nocc >= 2 && eig_ks.nc > nocc) { GlobalV::ofs_running << "E_{lumo}-E_{homo-1}=" << eig_ks(0, nocc) - eig_ks(0, nocc - 2) << std::endl; } - if (nocc >= 1 && eig_ks.nc > nocc + 1) { GlobalV::ofs_running << "E_{lumo+1}-E{homo}=" << eig_ks(0, nocc + 1) - eig_ks(0, nocc - 1) << std::endl; } - GlobalV::ofs_running << "mode of X-index: " << ix_mode << std::endl; - - /// global index map between (i,c) and ix - ModuleBase::matrix ioiv2ix; - std::vector> ix2ioiv; - std::pair>> indexmap = - LR_Util::set_ix_map_diagonal(ix_mode, nocc, nvirt); - - ioiv2ix = std::move(std::get<0>(indexmap)); - ix2ioiv = std::move(std::get<1>(indexmap)); - - // use unit vectors as the initial guess - // for (int i = 0; i < std::min(this->nstates * PARAM.inp.pw_diag_ndim, nocc * nvirt); i++) for (int is = 0;is < this->nspin;++is) { - for (int s = 0; s < nstates; ++s) + const int& no = this->nocc[is]; + const int& nv = this->nvirt[is]; + const int& np = this->npairs[is]; + const Parallel_2D& px = this->paraX_[is]; + + // if (E_{lumo}-E_{homo-1} < E_{lumo+1}-E{homo}), mode = 0, else 1(smaller first) + bool ix_mode = false; //default + if (this->eig_ks.nc > no + 1 && no >= 2 && eig_ks(is, no) - eig_ks(is, no - 2) - 1e-5 > eig_ks(is, no + 1) - eig_ks(is, no - 1)) { ix_mode = true; } + GlobalV::ofs_running << "setting the initial guess of X of spin" << is << std::endl; + if (no >= 2 && eig_ks.nc > no) { GlobalV::ofs_running << "E_{lumo}-E_{homo-1}=" << eig_ks(is, no) - eig_ks(is, no - 2) << std::endl; } + if (no >= 1 && eig_ks.nc > no + 1) { GlobalV::ofs_running << "E_{lumo+1}-E{homo}=" << eig_ks(is, no + 1) - eig_ks(is, no - 1) << std::endl; } + GlobalV::ofs_running << "mode of X-index: " << ix_mode << std::endl; + + /// global index map between (i,c) and ix + ModuleBase::matrix ioiv2ix; + std::vector> ix2ioiv; + std::pair>> indexmap = + LR_Util::set_ix_map_diagonal(ix_mode, no, nv); + + ioiv2ix = std::move(std::get<0>(indexmap)); + ix2ioiv = std::move(std::get<1>(indexmap)); + + for (int ib = 0; ib < nstates; ++ib) { - this->X[is]->fix_b(s); - int ipair = s % (npairs); - int occ_global = std::get<0>(ix2ioiv[ipair]); // occ - int virt_global = std::get<1>(ix2ioiv[ipair]); // virt - int ik = s / (npairs); - if (this->paraX_.in_this_processor(virt_global, occ_global)) - (*X[is])(ik, this->paraX_.global2local_col(occ_global) * this->paraX_.get_row_size() - + this->paraX_.global2local_row(virt_global)) - = (static_cast(1.0) / static_cast(nk)); + const int ipair = ib % np; + const int occ_global = std::get<0>(ix2ioiv[ipair]); // occ + const int virt_global = std::get<1>(ix2ioiv[ipair]); // virt + const int ik = ib / np; + const int xstart_b = ib * nloc_per_band; //start index of band ib + const int xstart_bs = (openshell && is == 1) ? xstart_b + nk * paraX_[0].get_local_size() : xstart_b; // start index of band ib, spin is + const int is_in_x = openshell ? 0 : is; // if openshell, spin-up and spin-down are put together + if (px.in_this_processor(virt_global, occ_global)) + { + const int ipair_loc = px.global2local_col(occ_global) * px.get_row_size() + px.global2local_row(virt_global); + X[is_in_x].data()[xstart_bs + ipair_loc] = (static_cast(1.0) / static_cast(nk)); + } } - this->X[is]->fix_b(0); //recover the pointer } } @@ -544,12 +601,13 @@ void LR::ESolver_LR::init_pot(const Charge& chg_gs) if (this->input.ri_hartree_benchmark != "none") { return; } //no need to initialize potential for Hxc kernel in the RI-benchmark routine switch (nspin) { + using ST = PotHxcLR::SpinType; case 1: - this->pot[0] = std::make_shared(xc_kernel, this->pw_rho, &ucell, &chg_gs, PotHxcLR::SpinType::S1); + this->pot[0] = std::make_shared(xc_kernel, this->pw_rho, &ucell, &chg_gs, ST::S1); break; case 2: - this->pot[0] = std::make_shared(xc_kernel, this->pw_rho, &ucell, &chg_gs, PotHxcLR::SpinType::S2_singlet); - this->pot[1] = std::make_shared(xc_kernel, this->pw_rho, &ucell, &chg_gs, PotHxcLR::SpinType::S2_triplet); + this->pot[0] = std::make_shared(xc_kernel, this->pw_rho, &ucell, &chg_gs, openshell ? ST::S2_updown : ST::S2_singlet); + this->pot[1] = std::make_shared(xc_kernel, this->pw_rho, &ucell, &chg_gs, openshell ? ST::S2_updown : ST::S2_triplet); break; default: throw std::invalid_argument("ESolver_LR: nspin must be 1 or 2"); @@ -567,8 +625,8 @@ void LR::ESolver_LR::read_ks_wfc() { #ifdef __EXX int ncore = 0; - std::vector eig_ks_vec = RI_Benchmark::read_aims_ebands(PARAM.globalv.global_readin_dir + "band_out", nocc, nvirt, ncore); - std::cout << "ncore=" << ncore << ", nocc=" << nocc << ", nvirt=" << nvirt << ", nbands=" << this->nbands << std::endl; + std::vector eig_ks_vec = RI_Benchmark::read_aims_ebands(PARAM.globalv.global_readin_dir + "band_out", nocc_in, nvirt_in, ncore); + std::cout << "ncore=" << ncore << ", nocc=" << nocc_in << ", nvirt=" << nvirt_in << ", nbands=" << this->nbands << std::endl; std::cout << "eig_ks_vec.size()=" << eig_ks_vec.size() << std::endl; if(eig_ks_vec.size() != this->nbands) {ModuleBase::WARNING_QUIT("ESolver_LR", "read_aims_ebands failed.");}; for (int i = 0;i < nbands;++i) { this->pelec->ekb(0, i) = eig_ks_vec[i]; } @@ -578,7 +636,7 @@ void LR::ESolver_LR::read_ks_wfc() #endif } else if (!ModuleIO::read_wfc_nao(PARAM.globalv.global_readin_dir, this->paraMat_, *this->psi_ks, this->pelec, - /*skip_bands=*/this->nocc_max - this->nocc)) { + /*skip_bands=*/this->nocc_max - this->nocc_in)) { ModuleBase::WARNING_QUIT("ESolver_LR", "read ground-state wavefunction failed."); } this->eig_ks = std::move(this->pelec->ekb); diff --git a/source/module_lr/esolver_lrtd_lcao.h b/source/module_lr/esolver_lrtd_lcao.h index f9db98d765..62e27e4fa8 100644 --- a/source/module_lr/esolver_lrtd_lcao.h +++ b/source/module_lr/esolver_lrtd_lcao.h @@ -65,20 +65,28 @@ namespace LR /// @brief ground state bands, read from the file, or moved from ESolver_FP::pelec.ekb ModuleBase::matrix eig_ks;///< energy of ground state - /// @brief Excited state info. size: nstates * nks * (nocc(local) * nvirt (local)) - std::vector>> X; - - int nocc = 1; + /// @brief Excited state wavefunction (locc, lvirt are local size of nocc and nvirt in each process) + /// size of X: [neq][{nstate, nloc_per_band}], namely: + /// - [nspin][{nstates, nk* (locc* lvirt}] for close- shell, + /// - [1][{nstates, nk * (locc[0] * lvirt[0]) + nk * (locc[1] * lvirt[1])}] for open-shell + std::vector X; + int nloc_per_band = 1; + + std::vector nocc; ///< number of occupied orbitals for each spin used in the calculation + int nocc_in = 1; ///< nocc read from input (adjusted by nelec): max(spin-up, spindown) int nocc_max = 1; ///< nelec/2 - int nvirt = 1; + std::vector nvirt; ///< number of virtual orbitals for each spin used in the calculation + int nvirt_in = 1; ///< nvirt read from input (adjusted by nelec): min(spin-up, spindown) int nbands = 2; int nbasis = 2; /// n_occ*nvirt, the basis size of electron-hole pair representation - int npairs = 1; + std::vector npairs; /// how many 2-particle states to be solved int nstates = 1; int nspin = 1; int nk = 1; + int nupdown = 0; + bool openshell = false; std::string xc_kernel; Grid_Technique gt_; @@ -90,7 +98,7 @@ namespace LR /// @brief variables for parallel distribution of KS orbitals Parallel_2D paraC_; /// @brief variables for parallel distribution of excited states - Parallel_2D paraX_; + std::vector paraX_; /// @brief variables for parallel distribution of matrix in AO representation Parallel_Orbitals paraMat_; @@ -111,6 +119,8 @@ namespace LR void parameter_check() const; /// @brief set nocc, nvirt, nbasis, npairs and nstates void set_dimension(); + /// reset nocc, nvirt, npairs after read ground-state wavefunction when nspin=2 + void reset_dim_spin2(); #ifdef __EXX /// Tdata of Exx_LRI is same as T, for the reason, see operator_lr_exx.h diff --git a/source/module_lr/hamilt_casida.cpp b/source/module_lr/hamilt_casida.cpp index 08a00bb09e..a29429d5be 100644 --- a/source/module_lr/hamilt_casida.cpp +++ b/source/module_lr/hamilt_casida.cpp @@ -1,58 +1,57 @@ #include "hamilt_casida.h" +#include "module_lr/utils/lr_util_print.h" namespace LR { template - std::vector HamiltCasidaLR::matrix() + std::vector HamiltLR::matrix()const { - ModuleBase::TITLE("HamiltCasidaLR", "matrix"); - int npairs = this->nocc * this->nvirt; + ModuleBase::TITLE("HamiltLR", "matrix"); + const int no = this->nocc[0]; + const int nv = this->nvirt[0]; + const auto& px = this->pX[0]; + const int ldim = nk * px.get_local_size(); + int npairs = no * nv; std::vector Amat_full(this->nk * npairs * this->nk * npairs, 0.0); for (int ik = 0;ik < this->nk;++ik) - for (int j = 0;j < nocc;++j) - for (int b = 0;b < nvirt;++b) + for (int j = 0;j < no;++j) + for (int b = 0;b < nv;++b) {//calculate A^{ai} for each bj - int bj = j * nvirt + b; - int kbj = ik * npairs + bj; - psi::Psi X_bj(1, 1, this->nk * this->pX->get_local_size()); // k1-first, like in iterative solver + int bj = j * nv + b; //global + int kbj = ik * npairs + bj; //global + psi::Psi X_bj(1, 1, this->nk * px.get_local_size()); // k1-first, like in iterative solver X_bj.zero_out(); - // X_bj(0, 0, lj * this->pX->get_row_size() + lb) = this->one(); - int lj = this->pX->global2local_col(j); - int lb = this->pX->global2local_row(b); - if (this->pX->in_this_processor(b, j)) X_bj(0, 0, ik * this->pX->get_local_size() + lj * this->pX->get_row_size() + lb) = this->one(); - psi::Psi A_aibj(1, 1, this->nk * this->pX->get_local_size()); // k1-first + // X_bj(0, 0, lj * px.get_row_size() + lb) = this->one(); + int lj = px.global2local_col(j); + int lb = px.global2local_row(b); + if (px.in_this_processor(b, j)) { X_bj(0, 0, ik * px.get_local_size() + lj * px.get_row_size() + lb) = this->one(); } + psi::Psi A_aibj(1, 1, this->nk * px.get_local_size()); // k1-first A_aibj.zero_out(); + this->cal_dm_trans(0, X_bj.get_pointer()); hamilt::Operator* node(this->ops); while (node != nullptr) { // act() on and return the k1-first type of psi - node->act(X_bj, A_aibj, 1); + node->act(1, ldim, /*npol=*/1, X_bj.get_pointer(), A_aibj.get_pointer()); node = (hamilt::Operator*)(node->next_op); } // reduce ai for a fixed bj A_aibj.fix_kb(0, 0); #ifdef __MPI for (int ik_ai = 0;ik_ai < this->nk;++ik_ai) - LR_Util::gather_2d_to_full(*this->pX, &A_aibj.get_pointer()[ik_ai * this->pX->get_local_size()], + LR_Util::gather_2d_to_full(px, &A_aibj.get_pointer()[ik_ai * px.get_local_size()], Amat_full.data() + kbj * this->nk * npairs /*col, bj*/ + ik_ai * npairs/*row, ai*/, - false, this->nvirt, this->nocc); + false, nv, no); #endif } // output Amat - std::cout << "Amat_full:" << std::endl; - for (int i = 0;i < this->nk * npairs;++i) - { - for (int j = 0;j < this->nk * npairs;++j) - { - std::cout << Amat_full[i * this->nk * npairs + j] << " "; - } - std::cout << std::endl; - } + std::cout << "Full A matrix: (elements < 1e-10 is set to 0)" << std::endl; + LR_Util::print_value(Amat_full.data(), nk * npairs, nk * npairs); return Amat_full; } - template<> double HamiltCasidaLR::one() { return 1.0; } - template<> std::complex HamiltCasidaLR>::one() { return std::complex(1.0, 0.0); } + template<> double HamiltLR::one() const { return 1.0; } + template<> std::complex HamiltLR>::one() const { return std::complex(1.0, 0.0); } - template class HamiltCasidaLR; - template class HamiltCasidaLR>; + template class HamiltLR; + template class HamiltLR>; } \ No newline at end of file diff --git a/source/module_lr/hamilt_casida.h b/source/module_lr/hamilt_casida.h index 27d637f7b9..ab40c82611 100644 --- a/source/module_lr/hamilt_casida.h +++ b/source/module_lr/hamilt_casida.h @@ -1,9 +1,11 @@ #pragma once +#include #include "module_hamilt_general/hamilt.h" #include "module_elecstate/module_dm/density_matrix.h" #include "module_lr/operator_casida/operator_lr_diag.h" #include "module_lr/operator_casida/operator_lr_hxc.h" #include "module_basis/module_ao/parallel_orbitals.h" +#include "module_lr/dm_trans/dm_trans.h" #ifdef __EXX #include "module_lr/operator_casida/operator_lr_exx.h" #include "module_lr/ri_benchmark/operator_ri_hartree.h" @@ -12,19 +14,19 @@ namespace LR { template - class HamiltCasidaLR : public hamilt::Hamilt + class HamiltLR { public: template - HamiltCasidaLR(std::string& xc_kernel, + HamiltLR(std::string& xc_kernel, const int& nspin, const int& naos, - const int& nocc, - const int& nvirt, + const std::vector& nocc, + const std::vector& nvirt, const UnitCell& ucell_in, const std::vector& orb_cutoff, Grid_Driver& gd_in, - const psi::Psi* psi_ks_in, + const psi::Psi& psi_ks_in, const ModuleBase::matrix& eig_ks, #ifdef __EXX std::weak_ptr> exx_lri_in, @@ -33,20 +35,22 @@ namespace LR TGint* gint_in, std::weak_ptr pot_in, const K_Vectors& kv_in, - Parallel_2D* pX_in, - Parallel_2D* pc_in, - Parallel_Orbitals* pmat_in, + const std::vector& pX_in, + const Parallel_2D& pc_in, + const Parallel_Orbitals& pmat_in, const std::string& spin_type, const std::string& ri_hartree_benchmark = "none", - const std::vector& aims_nbasis = {}) : nocc(nocc), nvirt(nvirt), pX(pX_in), nk(kv_in.get_nks() / nspin) + const std::vector& aims_nbasis = {}) : nspin(nspin), nocc(nocc), nvirt(nvirt), pX(pX_in), nk(kv_in.get_nks() / nspin) { - ModuleBase::TITLE("HamiltCasidaLR", "HamiltCasidaLR"); + ModuleBase::TITLE("HamiltLR", "HamiltLR"); if (ri_hartree_benchmark != "aims") { assert(aims_nbasis.empty()); } - this->classname = "HamiltCasidaLR"; - this->DM_trans.resize(1); - this->DM_trans[0] = LR_Util::make_unique>(pmat_in, 1, kv_in.kvec_d, nk); + // always use nspin=1 for transition density matrix + this->DM_trans = LR_Util::make_unique>(&pmat_in, 1, kv_in.kvec_d, nk); + if (ri_hartree_benchmark == "none") { LR_Util::initialize_DMR(*this->DM_trans, pmat_in, ucell_in, gd_in, orb_cutoff); } + // this->DM_trans->init_DMR(&gd_in, &ucell_in); // too large due to not restricted by orb_cutoff + // add the diag operator (the first one) - this->ops = new OperatorLRDiag(eig_ks, pX_in, nk, nocc, nvirt); + this->ops = new OperatorLRDiag(eig_ks.c, pX[0], nk, nocc[0], nvirt[0]); //add Hxc operator #ifdef __EXX using TAC = std::pair>; @@ -63,7 +67,7 @@ namespace LR if (ri_hartree_benchmark != "none") { #ifdef __EXX - if (spin_type == "Spin Singlet") + if (spin_type == "singlet") { if (ri_hartree_benchmark == "aims") { @@ -77,11 +81,11 @@ namespace LR } if (!std::set({ "rpa", "hf" }).count(xc_kernel)) { throw std::runtime_error("ri_hartree_benchmark is only supported for xc_kernel rpa and hf"); } RI_Benchmark::OperatorRIHartree* ri_hartree_op - = new RI_Benchmark::OperatorRIHartree(ucell_in, naos, nocc, nvirt, *psi_ks_in, + = new RI_Benchmark::OperatorRIHartree(ucell_in, naos, nocc[0], nvirt[0], psi_ks_in, Cs_read, Vs_read, ri_hartree_benchmark == "aims", aims_nbasis); this->ops->add(ri_hartree_op); } - else if (spin_type == "Spin Triplet") {std::cout<<"f_Hxc based on grid integral is not needed."<reset_Cs(Cs_read); exx_lri_in.lock()->reset_Vs(Vs_read); } // std::cout << "exx_alpha=" << exx_alpha << std::endl; // the default value of exx_alpha is 0.25 when dft_functional is pbe or hse - hamilt::Operator* lr_exx = new OperatorLREXX(nspin, naos, nocc, nvirt, ucell_in, psi_ks_in, - this->DM_trans, exx_lri_in, kv_in, pX_in, pc_in, pmat_in, + hamilt::Operator* lr_exx = new OperatorLREXX(nspin, naos, nocc[0], nvirt[0], ucell_in, psi_ks_in, + this->DM_trans, exx_lri_in, kv_in, pX_in[0], pc_in, pmat_in, xc_kernel == "hf" ? 1.0 : exx_alpha, //alpha - ri_hartree_benchmark != "none"/*whether to cal_dm_trans first here*/, aims_nbasis); this->ops->add(lr_exx); } #endif + + this->cal_dm_trans = [&, this](const int& is, const T* X)->void + { + const auto psi_ks_is = LR_Util::get_psi_spin(psi_ks_in, is, nk); +#ifdef __MPI + std::vector dm_trans_2d = cal_dm_trans_pblas(X, pX[is], psi_ks_is, pc_in, naos, nocc[is], nvirt[is], pmat_in); + if (this->tdm_sym) for (auto& t : dm_trans_2d) LR_Util::matsym(t.data(), naos, pmat_in); +#else + std::vector dm_trans_2d = cal_dm_trans_blas(X, psi_ks_is, nocc[is], nvirt[is]); + if (this->tdm_sym) for (auto& t : dm_trans_2d) LR_Util::matsym(t.data(), naos); +#endif + // LR_Util::print_tensor(dm_trans_2d[0], "dm_trans_2d[0]", &pmat_in); + // tensor to vector, then set DMK + for (int ik = 0;ik < nk;++ik) { this->DM_trans->set_DMK_pointer(ik, dm_trans_2d[ik].data()); } + }; } - ~HamiltCasidaLR() + ~HamiltLR() { delete this->ops; } + + std::vector matrix()const; + + void hPsi(const T* psi_in, T* hpsi, const int ld_psi, const int& nband) const { - if (this->ops != nullptr) + assert(ld_psi == nk * pX[0].get_local_size()); + for (int ib = 0;ib < nband;++ib) { - delete this->ops; + const int offset = ib * ld_psi; + this->cal_dm_trans(0, psi_in + offset); // calculate transition density matrix here + hamilt::Operator* node(this->ops); + while (node != nullptr) + { + node->act(/*nband=*/1, ld_psi, /*npol=*/1, psi_in + offset, hpsi + offset); + node = (hamilt::Operator*)(node->next_op); + } } - }; + } - virtual std::vector matrix() override; + void global2local(T* lvec, const T* gvec, const int& nband) const + { + const int npairs = nocc[0] * nvirt[0]; + for (int ib = 0;ib < nband;++ib) + { + const int loffset_b = ib * nk * pX[0].get_local_size(); + const int goffset_b = ib * nk * npairs; + for (int ik = 0;ik < nk;++ik) + { + const int loffset = loffset_b + ik * pX[0].get_local_size(); + const int goffset = goffset_b + ik * npairs; + for (int lo = 0;lo < pX[0].get_col_size();++lo) + { + const int go = pX[0].local2global_col(lo); + for (int lv = 0;lv < pX[0].get_row_size();++lv) + { + const int gv = pX[0].local2global_row(lv); + lvec[loffset + lo * pX[0].get_row_size() + lv] = gvec[goffset + go * nvirt[0] + gv]; + } + } + } + } + } private: - int nocc; - int nvirt; - int nk; - Parallel_2D* pX = nullptr; - T one(); + const std::vector& nocc; + const std::vector& nvirt; + const int nspin = 1; + const int nk = 1; + const bool tdm_sym = false; ///< whether to symmetrize the transition density matrix + const std::vector& pX; + T one()const; /// transition density matrix in AO representation - /// Hxc only: size=1, calculate on the same address for each bands - /// Hxc+Exx: size=nbands, store the result of each bands for common use - std::vector>> DM_trans; + /// calculate on the same address for each bands, and commonly used by all the operators + std::unique_ptr> DM_trans; + + /// first node operator, add operations from each operators + hamilt::Operator* ops = nullptr; + + std::function cal_dm_trans; }; } diff --git a/source/module_lr/hamilt_ulr.hpp b/source/module_lr/hamilt_ulr.hpp new file mode 100644 index 0000000000..77ee62ce73 --- /dev/null +++ b/source/module_lr/hamilt_ulr.hpp @@ -0,0 +1,230 @@ +#pragma once +#include "module_hamilt_general/hamilt.h" +#include "module_elecstate/module_dm/density_matrix.h" +#include "module_lr/operator_casida/operator_lr_diag.h" +#include "module_lr/operator_casida/operator_lr_hxc.h" +#include "module_lr/utils/lr_util_print.h" +#ifdef __EXX +#include "module_lr/operator_casida/operator_lr_exx.h" +#endif +namespace LR +{ + /// Unristricted TDDFT (TDA) for open-shell systems + /// The A matrix is diveded by 4 blocks: uu, ud, du, dd + template + class HamiltULR + { + public: + template + HamiltULR(std::string& xc_kernel, + const int& nspin, + const int& naos, + const std::vector& nocc, ///< {up, down} + const std::vector& nvirt, ///< {up, down} + const UnitCell& ucell_in, + const std::vector& orb_cutoff, + Grid_Driver& gd_in, + const psi::Psi& psi_ks_in, + const ModuleBase::matrix& eig_ks, +#ifdef __EXX + std::weak_ptr> exx_lri_in, + const double& exx_alpha, +#endif + TGint* gint_in, + std::vector>& pot_in, + const K_Vectors& kv_in, + const std::vector& pX_in, ///< {up, down} + const Parallel_2D& pc_in, + const Parallel_Orbitals& pmat_in) :nocc(nocc), nvirt(nvirt), pX(pX_in), nk(kv_in.get_nks() / nspin), + ldim(nk* pX[0].get_local_size() + nk * pX[1].get_local_size()), + gdim(nk* std::inner_product(nocc.begin(), nocc.end(), nvirt.begin(), 0)) + { + ModuleBase::TITLE("HamiltULR", "HamiltULR"); + this->DM_trans = LR_Util::make_unique>(&pmat_in, 1, kv_in.kvec_d, nk); + LR_Util::initialize_DMR(*this->DM_trans, pmat_in, ucell_in, gd_in, orb_cutoff); + // this->DM_trans->init_DMR(&gd_in, &ucell_in); // too large due to not restricted by orb_cutoff + this->ops.resize(4); + + this->ops[0] = new OperatorLRDiag(eig_ks.c, pX_in[0], nk, nocc[0], nvirt[0]); + this->ops[3] = new OperatorLRDiag(eig_ks.c + nk * (nocc[0] + nvirt[0]), pX_in[1], nk, nocc[1], nvirt[1]); + + auto newHxc = [&](const int& sl, const int& sr) { return new OperatorLRHxc(nspin, naos, nocc, nvirt, psi_ks_in, + this->DM_trans, gint_in, pot_in[sl], ucell_in, orb_cutoff, gd_in, kv_in, pX_in, pc_in, pmat_in, { sl,sr }); }; + this->ops[0]->add(newHxc(0, 0)); + this->ops[1] = newHxc(0, 1); + this->ops[2] = newHxc(1, 0); + this->ops[3]->add(newHxc(1, 1)); + +#ifdef __EXX + if (xc_kernel == "hf" || xc_kernel == "hse") + { + std::vector> psi_ks_spin = { LR_Util::get_psi_spin(psi_ks_in, 0, nk), LR_Util::get_psi_spin(psi_ks_in, 1, nk) }; + for (int is : {0, 1}) + { + this->ops[(is << 1) + is]->add(new OperatorLREXX(nspin, naos, nocc[is], nvirt[is], ucell_in, psi_ks_spin[is], + this->DM_trans, exx_lri_in, kv_in, pX_in[is], pc_in, pmat_in, + xc_kernel == "hf" ? 1.0 : exx_alpha)); + } + } +#endif + + this->cal_dm_trans = [&, this](const int& is, const T* X)->void + { + const auto psi_ks_is = LR_Util::get_psi_spin(psi_ks_in, is, nk); + // LR_Util::print_value(X, pX_in[is].get_local_size()); +#ifdef __MPI + std::vector dm_trans_2d = cal_dm_trans_pblas(X, pX[is], psi_ks_is, pc_in, naos, nocc[is], nvirt[is], pmat_in); + if (this->tdm_sym) for (auto& t : dm_trans_2d) LR_Util::matsym(t.data(), naos, pmat_in); +#else + std::vector dm_trans_2d = cal_dm_trans_blas(X, psi_ks_is, nocc[is], nvirt[is]); + if (this->tdm_sym) for (auto& t : dm_trans_2d) LR_Util::matsym(t.data(), naos); +#endif + // LR_Util::print_tensor(dm_trans_2d[0], "DMtrans(k=0)", &pmat_in); + // tensor to vector, then set DMK + for (int ik = 0;ik < nk;++ik) { this->DM_trans->set_DMK_pointer(ik, dm_trans_2d[ik].data()); } + }; + } + ~HamiltULR() + { + for (auto& op : ops) { delete op; } + } + void hPsi(const T* psi_in, T* hpsi, const int ld_psi, const int& nband) const + { + ModuleBase::TITLE("HamiltULR", "hPsi"); + assert(ld_psi == this->ldim); + const std::vector xdim_is = { nk * pX[0].get_local_size(), nk * pX[1].get_local_size() }; + /// band-wise act (also works for close-shell, but not efficient) + for (int ib = 0;ib < nband;++ib) + { + const int offset_band = ib * ld_psi; + for (int is_bj : {0, 1}) + { + const int offset_bj = offset_band + is_bj * xdim_is[0]; + cal_dm_trans(is_bj, psi_in + offset_bj); // calculate transition density matrix here + for (int is_ai : {0, 1}) + { + const int offset_ai = offset_band + is_ai * xdim_is[0]; + hamilt::Operator* node(this->ops[(is_ai << 1) + is_bj]); + while (node != nullptr) + { + node->act(/*nband=*/1, xdim_is[is_bj], /*npol=*/1, psi_in + offset_bj, hpsi + offset_ai); + node = (hamilt::Operator*)(node->next_op); + } + } + } + } + } + std::vector matrix()const + { + ModuleBase::TITLE("HamiltULR", "matrix"); + const std::vector npairs = { this->nocc[0] * this->nvirt[0], this->nocc[1] * this->nvirt[1] }; + const std::vector ldim_is = { nk * pX[0].get_local_size(), nk * pX[1].get_local_size() }; + const std::vector gdim_is = { nk * npairs[0], nk * npairs[1] }; + std::vector Amat_full(gdim * gdim); + for (int is_bj : {0, 1}) + { + const int no = this->nocc[is_bj]; + const int nv = this->nvirt[is_bj]; + const auto& px = this->pX[is_bj]; + const int loffset_bj = is_bj * ldim_is[0]; + const int goffset_bj = is_bj * gdim_is[0]; + for (int ik_bj = 0;ik_bj < nk;++ik_bj) + { + for (int j = 0;j < no;++j) + { + for (int b = 0;b < nv;++b) + { + const int gcol = goffset_bj + ik_bj * npairs[is_bj] + j * nv + b;//global + std::vector X_bj(this->ldim, T(0)); + const int lj = px.global2local_col(j); + const int lb = px.global2local_row(b); + const int lcol = loffset_bj + ik_bj * px.get_local_size() + lj * px.get_row_size() + lb;//local + if (px.in_this_processor(b, j)) { X_bj[lcol] = T(1); } + this->cal_dm_trans(is_bj, X_bj.data() + loffset_bj); + std::vector Aloc_col(this->ldim, T(0)); // a col of A matrix (local) + for (int is_ai : {0, 1}) + { + const int goffset_ai = is_ai * gdim_is[0]; + const int loffset_ai = is_ai * ldim_is[0]; + const auto& pax = this->pX[is_ai]; + hamilt::Operator* node(this->ops[(is_ai << 1) + is_bj]); + while (node != nullptr) + { + node->act(1, ldim_is[is_bj], /*npol=*/1, X_bj.data() + loffset_bj, Aloc_col.data() + loffset_ai); + node = (hamilt::Operator*)(node->next_op); + } +#ifdef __MPI + for (int ik_ai = 0;ik_ai < this->nk;++ik_ai) + { + LR_Util::gather_2d_to_full(pax, Aloc_col.data() + loffset_ai + ik_ai * pax.get_local_size(), + Amat_full.data() + gcol * gdim /*col, bj*/ + goffset_ai + ik_ai * npairs[is_ai]/*row, ai*/, + false, nv, no); + } +#else + std::memcpy(Amat_full.data() + gcol * gdim + goffset_ai, Aloc_col.data() + goffset_ai, gdim_is[is_ai] * sizeof(T)); +#endif + } + } + } + } + } + std::cout << "Full A matrix:" << std::endl; + LR_Util::print_value(Amat_full.data(), gdim, gdim); + return Amat_full; + } + + /// copy global data (eigenvectors) to local memory + void global2local(T* lvec, const T* gvec, const int& nband) const + { + const std::vector npairs = { this->nocc[0] * this->nvirt[0], this->nocc[1] * this->nvirt[1] }; + const std::vector ldim_is = { nk * pX[0].get_local_size(), nk * pX[1].get_local_size() }; + const std::vector gdim_is = { nk * npairs[0], nk * npairs[1] }; + for (int ib = 0;ib < nband;++ib) + { + const int loffset_b = ib * this->ldim; + const int goffset_b = ib * this->gdim; + for (int is : {0, 1}) + { + const int loffset_bs = loffset_b + is * ldim_is[0]; + const int goffset_bs = goffset_b + is * gdim_is[0]; + for (int ik = 0;ik < nk;++ik) + { + const int loffset = loffset_bs + ik * pX[is].get_local_size(); + const int goffset = goffset_bs + ik * npairs[is]; + for (int lo = 0;lo < pX[is].get_col_size();++lo) + { + const int go = pX[is].local2global_col(lo); + for (int lv = 0;lv < pX[is].get_row_size();++lv) + { + const int gv = pX[is].local2global_row(lv); + lvec[loffset + lo * pX[is].get_row_size() + lv] = gvec[goffset + go * nvirt[is] + gv]; + } + } + } + } + } + } + + private: + const std::vector& nocc; + const std::vector& nvirt; + + const std::vector& pX; + + + const int nk = 1; + const int ldim = 1; + const int gdim = 1; + + /// 4 operator lists: uu, ud, du, dd + std::vector*> ops; + + /// transition density matrix in AO representation + /// Hxc only: size=1, calculate on the same address for each bands + /// Hxc+Exx: size=nbands, store the result of each bands for common use + std::unique_ptr> DM_trans; + + std::function cal_dm_trans; + const bool tdm_sym = false; ///< whether to symmetrize the transition density matrix + }; +} \ No newline at end of file diff --git a/source/module_lr/hsolver_lrtd.cpp b/source/module_lr/hsolver_lrtd.cpp deleted file mode 100644 index c8f2b9d5b6..0000000000 --- a/source/module_lr/hsolver_lrtd.cpp +++ /dev/null @@ -1,203 +0,0 @@ -#include "hsolver_lrtd.h" -#include "module_parameter/parameter.h" -#include "module_hsolver/diago_david.h" -#include "module_hsolver/diago_dav_subspace.h" -#include "module_hsolver/diago_cg.h" -#include "module_lr/utils/lr_util.h" -#include "module_lr/utils/lr_util_print.h" - -namespace LR -{ - inline double square(double x) { return x * x; }; - inline double square(std::complex x) { return x.real() * x.real() + x.imag() * x.imag(); }; - template - inline void print_eigs(const std::vector& eigs, const std::string& label = "", const double factor = 1.0) - { - std::cout << label << std::endl; - for (auto& e : eigs) {std::cout << e * factor << " "; -} - std::cout << std::endl; - } - template - void HSolverLR::solve(hamilt::Hamilt* pHamilt, - psi::Psi& psi, - elecstate::ElecState* pes, - const std::string method_in, - const bool hermitian) - { - ModuleBase::TITLE("HSolverLR", "solve"); - assert(psi.get_nk() == nk); - const std::vector spin_types = { "Spin Singlet", "Spin Triplet" }; - // note: if not TDA, the eigenvalues will be complex - // then we will need a new constructor of DiagoDavid - - // 1. allocate precondition and eigenvalue - std::vector precondition(psi.get_nk() * psi.get_nbasis()); - std::vector eigenvalue(psi.get_nbands()); //nstates - // 2. select the method - this->method = method_in; -#ifdef __MPI - const hsolver::diag_comm_info comm_info = { POOL_WORLD, GlobalV::RANK_IN_POOL, GlobalV::NPROC_IN_POOL }; -#else - const hsolver::diag_comm_info comm_info = { GlobalV::RANK_IN_POOL, GlobalV::NPROC_IN_POOL }; -#endif - - if (this->method == "lapack") - { - std::vector Amat_full = pHamilt->matrix(); - eigenvalue.resize(nk * npairs); - if (hermitian) { LR_Util::diag_lapack(nk * npairs, Amat_full.data(), eigenvalue.data()); } - else - { - std::vector> eig_complex(nk * npairs); - LR_Util::diag_lapack_nh(nk * npairs, Amat_full.data(), eig_complex.data()); - print_eigs(eig_complex, "Right eigenvalues: of the non-Hermitian matrix: (Ry)"); - for (int i = 0; i < nk * npairs; i++) { eigenvalue[i] = eig_complex[i].real(); } - } - psi.fix_kb(0, 0); - // copy eigenvectors - for (int i = 0;i < psi.size();++i) { psi.get_pointer()[i] = Amat_full[i]; -} - } - else - { - // 3. set precondition and diagethr - for (int i = 0; i < psi.get_nk() * psi.get_nbasis(); ++i) { - precondition[i] = static_cast(1.0); -} - - // wrap band-first psi as k1-first psi_k1_dav - psi::Psi psi_k1_dav = LR_Util::bfirst_to_k1_wrapper(psi); - assert(psi_k1_dav.get_nbands() == psi.get_nbands()); - assert(psi_k1_dav.get_nbasis() == psi.get_nbasis() * psi.get_nk()); - - const int david_maxiter = hsolver::DiagoIterAssist::PW_DIAG_NMAX; - - if (this->method == "dav") - { - // Allow 5 tries at most. If ntry > ntry_max = 5, exit diag loop. - const int ntry_max = 5; - // In non-self consistent calculation, do until totally converged. Else allow 5 eigenvecs to be NOT - // converged. - const int notconv_max = ("nscf" == PARAM.inp.calculation) ? 0 : 5; - // do diag and add davidson iteration counts up to avg_iter - - auto hpsi_func = [pHamilt]( - T* psi_in, - T* hpsi_out, - const int ld_psi, - const int nvec) - { - auto psi_iter_wrapper = psi::Psi(psi_in, 1, nvec, ld_psi, nullptr); - psi::Range bands_range(true, 0, 0, nvec-1); - using hpsi_info = typename hamilt::Operator::hpsi_info; - hpsi_info info(&psi_iter_wrapper, bands_range, hpsi_out); - pHamilt->ops->hPsi(info); - }; - auto spsi_func = [pHamilt](const T* psi_in, T* spsi_out, - const int ld_psi, const int nbands) - { - // sPsi determines S=I or not by PARAM.globalv.use_uspp inside - pHamilt->sPsi(psi_in, spsi_out, ld_psi, ld_psi, nbands); - }; - - const int& dim = psi_k1_dav.get_nbasis(); //equals to leading dimension here - const int& nband = psi_k1_dav.get_nbands(); - hsolver::DiagoDavid david(precondition.data(), nband, dim, PARAM.inp.pw_diag_ndim, PARAM.inp.use_paw, comm_info); - hsolver::DiagoIterAssist::avg_iter += static_cast(david.diag(hpsi_func, spsi_func, - dim, psi_k1_dav.get_pointer(), eigenvalue.data(), this->diag_ethr, david_maxiter, ntry_max, 0)); - } - else if (this->method == "dav_subspace") //need refactor - { - hsolver::Diago_DavSubspace dav_subspace(precondition, - psi_k1_dav.get_nbands(), - psi_k1_dav.get_nbasis(), - PARAM.inp.pw_diag_ndim, - this->diag_ethr, - david_maxiter, - false, //always do the subspace diag (check the implementation) - comm_info); - - auto hpsi_func = [pHamilt]( - T* psi_in, - T* hpsi_out, - const int ld_psi, - const int nvec) - { - auto psi_iter_wrapper = psi::Psi(psi_in, 1, nvec, ld_psi, nullptr); - psi::Range bands_range(true, 0, 0, nvec-1); - using hpsi_info = typename hamilt::Operator::hpsi_info; - hpsi_info info(&psi_iter_wrapper, bands_range, hpsi_out); - pHamilt->ops->hPsi(info); - }; - auto subspace_func = [pHamilt](T* psi_out, - T* psi_in, - Real* eigenvalue_in_hsolver, - const int nband_in, - const int nbasis_max_in) { - // Convert "pointer data stucture" to a psi::Psi object - auto psi_in_wrapper = psi::Psi(psi_in, 1, nband_in, nbasis_max_in, nullptr); - auto psi_out_wrapper = psi::Psi(psi_out, 1, nband_in, nbasis_max_in, nullptr); - - hsolver::DiagoIterAssist::diagH_subspace(pHamilt, - psi_in_wrapper, - psi_out_wrapper, - eigenvalue_in_hsolver, - nband_in); - }; - - std::vector ethr_band(psi_k1_dav.get_nbands(), this->diag_ethr); - hsolver::DiagoIterAssist::avg_iter - += static_cast(dav_subspace.diag( - hpsi_func, psi_k1_dav.get_pointer(), - psi_k1_dav.get_nbasis(), - eigenvalue.data(), - ethr_band.data(), - false /*scf*/)); - } - else {throw std::runtime_error("HSolverLR::solve: method not implemented");} - } - - // 5. copy eigenvalue to pes - for (int ist = 0;ist < psi.get_nbands();++ist) { pes->ekb(ispin_solve, ist) = eigenvalue[ist];} - - - // 6. output eigenvalues and eigenvectors - print_eigs(eigenvalue, "eigenvalues: (Ry)"); - print_eigs(eigenvalue, "eigenvalues: (eV)", ModuleBase::Ry_to_eV); - if (out_wfc_lr) - { - if (GlobalV::MY_RANK == 0) - { - std::ofstream ofs(PARAM.globalv.global_out_dir + "Excitation_Energy_" + spin_types[ispin_solve] + ".dat"); - ofs << std::setprecision(8) << std::scientific; - for (auto& e : eigenvalue) {ofs << e << " ";} - ofs.close(); - } - LR_Util::write_psi_bandfirst(psi, PARAM.globalv.global_out_dir + "Excitation_Amplitude_" + spin_types[ispin_solve], GlobalV::MY_RANK); - } - - // normalization is already satisfied - // std::cout << "check normalization of eigenvectors:" << std::endl; - // for (int ist = 0;ist < psi.get_nbands();++ist) - // { - // double norm2 = 0; - // for (int ik = 0;ik < psi.get_nk();++ik) - // { - // for (int ib = 0;ib < psi.get_nbasis();++ib) - // { - // norm2 += square(psi(ist, ik, ib)); - // // std::cout << "norm2_now=" << norm2 << std::endl; - // } - // } - // std::cout << "state " << ist << ", norm2=" << norm2 << std::endl; - // } - - // output iters - std::cout << "Average iterative diagonalization steps: " << hsolver::DiagoIterAssist::avg_iter - << " ; where current threshold is: " << hsolver::DiagoIterAssist::PW_DIAG_THR << " . " << std::endl; - // castmem_2d_2h_op()(cpu_ctx, cpu_ctx, pes->ekb.c, eigenvalues.data(), pes->ekb.nr * pes->ekb.nc); - } - template class HSolverLR; - template class HSolverLR>; -}; \ No newline at end of file diff --git a/source/module_lr/hsolver_lrtd.h b/source/module_lr/hsolver_lrtd.h deleted file mode 100644 index 7248a5a4f1..0000000000 --- a/source/module_lr/hsolver_lrtd.h +++ /dev/null @@ -1,34 +0,0 @@ -#pragma once -#include "module_hsolver/hsolver.h" -#include "module_hsolver/diago_iter_assist.h" -#include "module_psi/psi.h" -namespace LR -{ - template - class HSolverLR - { - using Real = typename GetTypeReal::type; - const int& nk; - const int& npairs; - const int& ispin_solve; - const bool out_wfc_lr = false; - public: - HSolverLR(const int& nk_in, const int& npairs_in, const int& ispin_solve_in = 0, const bool& out_wfc_lr_in = false) - :nk(nk_in), npairs(npairs_in), out_wfc_lr(out_wfc_lr_in), ispin_solve(ispin_solve_in) {}; - Real set_diagethr(Real diag_ethr_in, const int istep, const int iter, const Real ethr) - { - this->diag_ethr = ethr; - return ethr; - } - void solve(hamilt::Hamilt* pHamilt, - psi::Psi& psi, - elecstate::ElecState* pes, - const std::string method_in, - const bool hermitian = true); - - Real diag_ethr = 0.0; // threshold for diagonalization - - private: - std::string method = "none"; - }; -}; \ No newline at end of file diff --git a/source/module_lr/hsolver_lrtd.hpp b/source/module_lr/hsolver_lrtd.hpp new file mode 100644 index 0000000000..107621fa08 --- /dev/null +++ b/source/module_lr/hsolver_lrtd.hpp @@ -0,0 +1,139 @@ +#pragma once +#include "module_parameter/parameter.h" +#include "module_hsolver/diago_david.h" +#include "module_hsolver/diago_dav_subspace.h" +#include "module_hsolver/diago_cg.h" +#include "module_hsolver/diago_iter_assist.h" +#include "module_lr/utils/lr_util.h" +#include "module_lr/utils/lr_util_print.h" + +namespace LR +{ + template using Real = typename GetTypeReal::type; + + namespace HSolver + { + template + inline void print_eigs(const std::vector& eigs, const std::string& label = "", const double factor = 1.0) + { + std::cout << label << std::endl; + for (auto& e : eigs) { std::cout << e * factor << " "; } + std::cout << std::endl; + } + + /// eigensolver for common Hamilt + template + void solve(const THamilt& hm, + T* psi, + const int& dim, ///< local leading dimension (or nbasis) + const int& nband, ///< nstates in LR-TDDFT, not (nocc+nvirt) + double* eig, + const std::string method, + const Real& diag_ethr, ///< threshold for diagonalization + const bool hermitian = true) + { + ModuleBase::TITLE("HSolverLR", "solve"); + const std::vector spin_types = { "singlet", "triplet" }; + // note: if not TDA, the eigenvalues will be complex + // then we will need a new constructor of DiagoDavid + + // 1. allocate precondition and eigenvalue + std::vector> precondition(dim); + std::vector> eigenvalue(nband); //nstates + // 2. select the method +#ifdef __MPI + const hsolver::diag_comm_info comm_info = { POOL_WORLD, GlobalV::RANK_IN_POOL, GlobalV::NPROC_IN_POOL }; +#else + const hsolver::diag_comm_info comm_info = { GlobalV::RANK_IN_POOL, GlobalV::NPROC_IN_POOL }; +#endif + + if (method == "lapack") + { + std::vector Amat_full = hm.matrix(); + const int gdim = std::sqrt(Amat_full.size()); + eigenvalue.resize(gdim); + if (hermitian) { LR_Util::diag_lapack(gdim, Amat_full.data(), eigenvalue.data()); } + else + { + std::vector> eig_complex(gdim); + LR_Util::diag_lapack_nh(gdim, Amat_full.data(), eig_complex.data()); + print_eigs(eig_complex, "Right eigenvalues: of the non-Hermitian matrix: (Ry)"); + for (int i = 0; i < gdim; i++) { eigenvalue[i] = eig_complex[i].real(); } + } + // copy eigenvectors + hm.global2local(psi, Amat_full.data(), nband); + } + else + { + // 3. set precondition and diagethr + for (int i = 0; i < dim; ++i) { precondition[i] = static_cast>(1.0); } + + const int david_maxiter = hsolver::DiagoIterAssist::PW_DIAG_NMAX; + + auto hpsi_func = [&hm](T* psi_in, T* hpsi, const int ld_psi, const int nvec) {hm.hPsi(psi_in, hpsi, ld_psi, nvec);}; + auto spsi_func = [&hm](const T* psi_in, T* spsi, const int ld_psi, const int nvec) + { std::memcpy(spsi, psi_in, sizeof(T) * ld_psi * nvec); }; + + if (method == "dav") + { + // Allow 5 tries at most. If ntry > ntry_max = 5, exit diag loop. + const int ntry_max = 5; + // In non-self consistent calculation, do until totally converged. Else allow 5 eigenvecs to be NOT + // converged. + const int notconv_max = ("nscf" == PARAM.inp.calculation) ? 0 : 5; + // do diag and add davidson iteration counts up to avg_iter + hsolver::DiagoDavid david(precondition.data(), nband, dim, PARAM.inp.pw_diag_ndim, PARAM.inp.use_paw, comm_info); + hsolver::DiagoIterAssist::avg_iter += static_cast(david.diag(hpsi_func, spsi_func, + dim, psi, eigenvalue.data(), diag_ethr, david_maxiter, ntry_max, 0)); + } + else if (method == "dav_subspace") //need refactor + { + hsolver::Diago_DavSubspace dav_subspace(precondition, + nband, + dim, + PARAM.inp.pw_diag_ndim, + diag_ethr, + david_maxiter, + false, //always do the subspace diag (check the implementation) + comm_info); + std::vector ethr_band(nband, diag_ethr); + hsolver::DiagoIterAssist::avg_iter + += static_cast(dav_subspace.diag( + hpsi_func, psi, + dim, + eigenvalue.data(), + ethr_band.data(), + false /*scf*/)); + } + else { throw std::runtime_error("HSolverLR::solve: method not implemented"); } + } + + // 5. copy eigenvalues + for (int ist = 0;ist < nband;++ist) { eig[ist] = eigenvalue[ist]; } + + // 6. output eigenvalues and eigenvectors + print_eigs(eigenvalue, "eigenvalues: (Ry)"); + print_eigs(eigenvalue, "eigenvalues: (eV)", ModuleBase::Ry_to_eV); + + // normalization is already satisfied + // std::cout << "check normalization of eigenvectors:" << std::endl; + // for (int ist = 0;ist < nband;++ist) + // { + // double norm2 = 0; + // for (int ik = 0;ik < psi.get_nk();++ik) + // { + // for (int ib = 0;ib < psi.get_nbasis();++ib) + // { + // norm2 += std::norm(psi(ist, ik, ib)); + // // std::cout << "norm2_now=" << norm2 << std::endl; + // } + // } + // std::cout << "state " << ist << ", norm2=" << norm2 << std::endl; + // } + + // output iters + std::cout << "Average iterative diagonalization steps: " << hsolver::DiagoIterAssist::avg_iter + << " ; where current threshold is: " << hsolver::DiagoIterAssist::PW_DIAG_THR << " . " << std::endl; + } + } +} \ No newline at end of file diff --git a/source/module_lr/lr_spectrum.cpp b/source/module_lr/lr_spectrum.cpp index d889817cfb..5cd9acfdb3 100644 --- a/source/module_lr/lr_spectrum.cpp +++ b/source/module_lr/lr_spectrum.cpp @@ -7,11 +7,9 @@ #include "module_lr/utils/lr_util_hcontainer.h" #include "module_lr/utils/lr_util_print.h" template -void LR::LR_Spectrum::cal_gint_rho(double** rho, const int& nspin_solve, const int& nrxx) +void LR::LR_Spectrum::cal_gint_rho(double** rho, const int& nrxx) { - for (int is = 0;is < nspin_solve;++is) { - ModuleBase::GlobalFunc::ZEROS(rho[is], nrxx); -} + ModuleBase::GlobalFunc::ZEROS(rho[0], nrxx); Gint_inout inout_rho(rho, Gint_Tools::job_type::rho, 1, false); this->gint->cal_gint(&inout_rho); } @@ -26,53 +24,52 @@ inline void check_sum_rule(const double& osc_tot) } template<> -void LR::LR_Spectrum::oscillator_strength() +void LR::LR_Spectrum::oscillator_strength(Grid_Driver& gd, const std::vector& orb_cutoff) { ModuleBase::TITLE("LR::LR_Spectrum", "oscillator_strength"); std::vector& osc = this->oscillator_strength_; // unit: Ry - osc.resize(X.get_nbands(), 0.0); - // const int nspin0 = (this->nspin == 2) ? 2 : 1; use this in NSPIN=4 implementation + osc.resize(nstate, 0.0); double osc_tot = 0.0; elecstate::DensityMatrix DM_trans(&this->pmat, 1, this->kv.kvec_d, this->nk); - DM_trans.init_DMR(&GlobalC::GridD, &this->ucell); - this->transition_dipole_.resize(X.get_nbands(), ModuleBase::Vector3(0.0, 0.0, 0.0)); - for (int istate = 0;istate < X.get_nbands();++istate) + LR_Util::initialize_DMR(DM_trans, this->pmat, this->ucell, gd, orb_cutoff); + this->transition_dipole_.resize(nstate, ModuleBase::Vector3(0.0, 0.0, 0.0)); + for (int istate = 0;istate < nstate;++istate) { - X.fix_b(istate); - - // LR_Util::print_psi_bandfirst(X, "final X", istate); - - //1. transition density + const int offset_b = istate * ldim; //start index of band istate + for (int is = 0;is < this->nspin_x;++is) + { + const int offset_x = offset_b + is * nk * pX[0].get_local_size(); + //1. transition density #ifdef __MPI - std::vector dm_trans_2d = cal_dm_trans_pblas(X, this->pX, this->psi_ks, this->pc, this->naos, this->nocc, this->nvirt, this->pmat); - // if (this->tdm_sym) for (auto& t : dm_trans_2d) LR_Util::matsym(t.data(), naos, pmat); + std::vector dm_trans_2d = cal_dm_trans_pblas(X + offset_x, this->pX[is], psi_ks[is], this->pc, this->naos, this->nocc[is], this->nvirt[is], this->pmat); + // if (this->tdm_sym) for (auto& t : dm_trans_2d) LR_Util::matsym(t.data(), naos, pmat); #else - std::vector dm_trans_2d = cal_dm_trans_blas(X, this->psi_ks, this->nocc, this->nvirt); - // if (this->tdm_sym) for (auto& t : dm_trans_2d) LR_Util::matsym(t.data(), naos); + std::vector dm_trans_2d = cal_dm_trans_blas(X + offset_x, this->psi_ks[is], this->nocc[is], this->nvirt[is]); + // if (this->tdm_sym) for (auto& t : dm_trans_2d) LR_Util::matsym(t.data(), naos); #endif - for (int ik = 0;ik < this->nk;++ik) { DM_trans.set_DMK_pointer(ik, dm_trans_2d[ik].data()); } - DM_trans.cal_DMR(); - this->gint->transfer_DM2DtoGrid(DM_trans.get_DMR_vector()); + for (int ik = 0;ik < this->nk;++ik) { DM_trans.set_DMK_pointer(ik, dm_trans_2d[ik].data()); } + DM_trans.cal_DMR(); + this->gint->transfer_DM2DtoGrid(DM_trans.get_DMR_vector()); - // 2. transition density - double** rho_trans; - // LR_Util::new_p2(rho_trans, nspin_solve, this->rho_basis.nrxx); - LR_Util::new_p2(rho_trans, nspin, this->rho_basis.nrxx); // currently gint_kernel_rho uses PARAM.inp.nspin, it needs refactor - this->cal_gint_rho(rho_trans, nspin_solve, this->rho_basis.nrxx); + // 2. transition density + double** rho_trans; + LR_Util::new_p2(rho_trans, 1, this->rho_basis.nrxx); + this->cal_gint_rho(rho_trans, this->rho_basis.nrxx); - // 3. transition dipole moment - for (int ir = 0; ir < rho_basis.nrxx; ++ir) - { - int i = ir / (rho_basis.ny * rho_basis.nplane); - int j = ir / rho_basis.nplane - i * rho_basis.ny; - int k = ir % rho_basis.nplane + rho_basis.startz_current; - ModuleBase::Vector3 rd(static_cast(i) / rho_basis.nx, static_cast(j) / rho_basis.ny, static_cast(k) / rho_basis.nz); //+1/2 better? - rd -= ModuleBase::Vector3(0.5, 0.5, 0.5); //shift to the center of the grid (need ?) - ModuleBase::Vector3 rc = rd * ucell.latvec * ucell.lat0; // real coordinate - for (int is = 0;is < nspin_solve;++is) transition_dipole_[istate] += rc * rho_trans[is][ir]; + // 3. transition dipole moment + for (int ir = 0; ir < rho_basis.nrxx; ++ir) + { + int i = ir / (rho_basis.ny * rho_basis.nplane); + int j = ir / rho_basis.nplane - i * rho_basis.ny; + int k = ir % rho_basis.nplane + rho_basis.startz_current; + ModuleBase::Vector3 rd(static_cast(i) / rho_basis.nx, static_cast(j) / rho_basis.ny, static_cast(k) / rho_basis.nz); //+1/2 better? + rd -= ModuleBase::Vector3(0.5, 0.5, 0.5); //shift to the center of the grid (need ?) + ModuleBase::Vector3 rc = rd * ucell.latvec * ucell.lat0; // real coordinate + transition_dipole_[istate] += rc * rho_trans[0][ir]; + } + LR_Util::delete_p2(rho_trans, 1); } transition_dipole_[istate] *= (ucell.omega / static_cast(gint->get_ncxyz())); // dv - LR_Util::delete_p2(rho_trans, nspin_solve); Parallel_Reduce::reduce_all(transition_dipole_[istate].x); Parallel_Reduce::reduce_all(transition_dipole_[istate].y); Parallel_Reduce::reduce_all(transition_dipole_[istate].z); @@ -83,73 +80,70 @@ void LR::LR_Spectrum::oscillator_strength() } template<> -void LR::LR_Spectrum>::oscillator_strength() +void LR::LR_Spectrum>::oscillator_strength(Grid_Driver& gd, const std::vector& orb_cutoff) { ModuleBase::TITLE("LR::LR_Spectrum", "oscillator_strength"); std::vector& osc = this->oscillator_strength_; // unit: Ry - osc.resize(X.get_nbands(), 0.0); - // const int nspin0 = (this->nspin == 2) ? 2 : 1; use this in NSPIN=4 implementation + osc.resize(nstate, 0.0); double osc_tot = 0.0; elecstate::DensityMatrix, std::complex> DM_trans(&this->pmat, 1, this->kv.kvec_d, this->nk); - DM_trans.init_DMR(&GlobalC::GridD, &this->ucell); + LR_Util::initialize_DMR(DM_trans, this->pmat, this->ucell, gd, orb_cutoff); elecstate::DensityMatrix, double> DM_trans_real_imag(&this->pmat, 1, this->kv.kvec_d, this->nk); - DM_trans_real_imag.init_DMR(&GlobalC::GridD, &this->ucell); + LR_Util::initialize_DMR(DM_trans_real_imag, this->pmat, this->ucell, gd, orb_cutoff); - this->transition_dipole_.resize(X.get_nbands(), ModuleBase::Vector3>(0.0, 0.0, 0.0)); - for (int istate = 0;istate < X.get_nbands();++istate) + this->transition_dipole_.resize(nstate, ModuleBase::Vector3>(0.0, 0.0, 0.0)); + for (int istate = 0;istate < nstate;++istate) { - X.fix_b(istate); - // LR_Util::print_psi_bandfirst(X, "final X", istate); - - //1. transition density + const int offset_b = istate * ldim; //start index of band istate + for (int is = 0;is < this->nspin_x;++is) + { + const int offset_x = offset_b + is * nk * pX[0].get_local_size(); + //1. transition density #ifdef __MPI - std::vector dm_trans_2d = cal_dm_trans_pblas(X, this->pX, this->psi_ks, this->pc, this->naos, this->nocc, this->nvirt, this->pmat, /*renorm_k=*/false, this->nspin_solve); - // if (this->tdm_sym) for (auto& t : dm_trans_2d) LR_Util::matsym(t.data(), naos, pmat); + std::vector dm_trans_2d = cal_dm_trans_pblas(X + offset_x, this->pX[is], psi_ks[is], this->pc, this->naos, this->nocc[is], this->nvirt[is], this->pmat, /*renorm_k=*/false, 1); + // if (this->tdm_sym) for (auto& t : dm_trans_2d) LR_Util::matsym(t.data(), naos, pmat); #else - std::vector dm_trans_2d = cal_dm_trans_blas(X, this->psi_ks, this->nocc, this->nvirt,/*renorm_k=*/false, this->nspin_solve); - // if (this->tdm_sym) for (auto& t : dm_trans_2d) LR_Util::matsym(t.data(), naos); + std::vector dm_trans_2d = cal_dm_trans_blas(X + offset_x, psi_ks[is], this->nocc[is], this->nvirt[is],/*renorm_k=*/false, 1); + // if (this->tdm_sym) for (auto& t : dm_trans_2d) LR_Util::matsym(t.data(), naos); #endif - for (int ik = 0;ik < this->nk;++ik) { DM_trans.set_DMK_pointer(ik, dm_trans_2d[ik].data>()); } - // for (int ik = 0;ik < this->nk;++ik) - // LR_Util::print_tensor>(dm_trans_2d[ik], "1.DMK[ik=" + std::to_string(ik) + "]", dynamic_cast(&this->pmat)); - - DM_trans.cal_DMR(); - - // 2. transition density - double** rho_trans_real; - double** rho_trans_imag; - LR_Util::new_p2(rho_trans_real, nspin_solve, this->rho_basis.nrxx); - LR_Util::new_p2(rho_trans_imag, nspin_solve, this->rho_basis.nrxx); - // real part - LR_Util::get_DMR_real_imag_part(DM_trans, DM_trans_real_imag, ucell.nat, 'R'); - this->gint->transfer_DM2DtoGrid(DM_trans_real_imag.get_DMR_vector()); - this->cal_gint_rho(rho_trans_real, nspin_solve, this->rho_basis.nrxx); - // LR_Util::print_grid_nonzero(rho_trans_real[0], this->rho_basis.nrxx, 10, "rho_trans"); - - // imag part - LR_Util::get_DMR_real_imag_part(DM_trans, DM_trans_real_imag, ucell.nat, 'I'); - this->gint->transfer_DM2DtoGrid(DM_trans_real_imag.get_DMR_vector()); - this->cal_gint_rho(rho_trans_imag, nspin_solve, this->rho_basis.nrxx); - // LR_Util::print_grid_nonzero(rho_trans_imag[0], this->rho_basis.nrxx, 10, "rho_trans"); - - - // 3. transition dipole moment - for (int ir = 0; ir < rho_basis.nrxx; ++ir) - { - int i = ir / (rho_basis.ny * rho_basis.nplane); - int j = ir / rho_basis.nplane - i * rho_basis.ny; - int k = ir % rho_basis.nplane + rho_basis.startz_current; - ModuleBase::Vector3 rd(static_cast(i) / rho_basis.nx, static_cast(j) / rho_basis.ny, static_cast(k) / rho_basis.nz); //+1/2 better? - rd -= ModuleBase::Vector3(0.5, 0.5, 0.5); //shift to the center of the grid (need ?) - ModuleBase::Vector3 rc = rd * ucell.latvec * ucell.lat0; // real coordinate - ModuleBase::Vector3> rc_complex(rc.x, rc.y, rc.z); - for (int is = 0;is < nspin_solve;++is) - transition_dipole_[istate] += rc_complex * - std::complex(rho_trans_real[is][ir], rho_trans_imag[is][ir]); + for (int ik = 0;ik < this->nk;++ik) { DM_trans.set_DMK_pointer(ik, dm_trans_2d[ik].data>()); } + // for (int ik = 0;ik < this->nk;++ik) + // LR_Util::print_tensor>(dm_trans_2d[ik], "1.DMK[ik=" + std::to_string(ik) + "]", dynamic_cast(&this->pmat)); + DM_trans.cal_DMR(); + + // 2. transition density + double** rho_trans_real; + double** rho_trans_imag; + LR_Util::new_p2(rho_trans_real, 1, this->rho_basis.nrxx); + LR_Util::new_p2(rho_trans_imag, 1, this->rho_basis.nrxx); + // real part + LR_Util::get_DMR_real_imag_part(DM_trans, DM_trans_real_imag, ucell.nat, 'R'); + this->gint->transfer_DM2DtoGrid(DM_trans_real_imag.get_DMR_vector()); + this->cal_gint_rho(rho_trans_real, this->rho_basis.nrxx); + // LR_Util::print_grid_nonzero(rho_trans_real[0], this->rho_basis.nrxx, 10, "rho_trans"); + + // imag part + LR_Util::get_DMR_real_imag_part(DM_trans, DM_trans_real_imag, ucell.nat, 'I'); + this->gint->transfer_DM2DtoGrid(DM_trans_real_imag.get_DMR_vector()); + this->cal_gint_rho(rho_trans_imag, this->rho_basis.nrxx); + // LR_Util::print_grid_nonzero(rho_trans_imag[0], this->rho_basis.nrxx, 10, "rho_trans"); + + // 3. transition dipole moment + for (int ir = 0; ir < rho_basis.nrxx; ++ir) + { + int i = ir / (rho_basis.ny * rho_basis.nplane); + int j = ir / rho_basis.nplane - i * rho_basis.ny; + int k = ir % rho_basis.nplane + rho_basis.startz_current; + ModuleBase::Vector3 rd(static_cast(i) / rho_basis.nx, static_cast(j) / rho_basis.ny, static_cast(k) / rho_basis.nz); //+1/2 better? + rd -= ModuleBase::Vector3(0.5, 0.5, 0.5); //shift to the center of the grid (need ?) + ModuleBase::Vector3 rc = rd * ucell.latvec * ucell.lat0; // real coordinate + ModuleBase::Vector3> rc_complex(rc.x, rc.y, rc.z); + transition_dipole_[istate] += rc_complex * std::complex(rho_trans_real[0][ir], rho_trans_imag[0][ir]); + } + LR_Util::delete_p2(rho_trans_real, 1); + LR_Util::delete_p2(rho_trans_imag, 1); } transition_dipole_[istate] *= (ucell.omega / static_cast(gint->get_ncxyz())); // dv - LR_Util::delete_p2(rho_trans_real, nspin_solve); - LR_Util::delete_p2(rho_trans_imag, nspin_solve); Parallel_Reduce::reduce_all(transition_dipole_[istate].x); Parallel_Reduce::reduce_all(transition_dipole_[istate].y); Parallel_Reduce::reduce_all(transition_dipole_[istate].z); @@ -164,11 +158,11 @@ void LR::LR_Spectrum>::oscillator_strength() check_sum_rule(osc_tot); } template -void LR::LR_Spectrum::optical_absorption(const std::vector& freq, const double eta, const int ispin) +void LR::LR_Spectrum::optical_absorption(const std::vector& freq, const double eta, const std::string& spintype) { ModuleBase::TITLE("LR::LR_Spectrum", "optical_absorption"); std::vector& osc = this->oscillator_strength_; - std::ofstream ofs(PARAM.globalv.global_out_dir + "absorption_" + this->spin_types[ispin] + ".dat"); + std::ofstream ofs(PARAM.globalv.global_out_dir + "absorption_" + spintype + ".dat"); if (GlobalV::MY_RANK == 0) { ofs << "Frequency (eV) | wave length(nm) | Absorption (a.u.)" << std::endl; } double FourPI_div_c = ModuleBase::FOUR_PI / 137.036; for (int f = 0;f < freq.size();++f) @@ -182,17 +176,17 @@ void LR::LR_Spectrum::optical_absorption(const std::vector& freq, con } template -void LR::LR_Spectrum::transition_analysis(const int ispin) +void LR::LR_Spectrum::transition_analysis(const std::string& spintype) { ModuleBase::TITLE("LR::LR_Spectrum", "transition_analysis"); std::ofstream& ofs = GlobalV::ofs_running; ofs << "==================================================================== " << std::endl; - ofs << std::setw(40) << this->spin_types[ispin] << std::endl; + ofs << std::setw(40) << spintype << std::endl; ofs << "==================================================================== " << std::endl; ofs << std::setw(8) << "State" << std::setw(30) << "Excitation Energy (Ry, eV)" << std::setw(45) << "Transition dipole x, y, z (a.u.)" << std::setw(30) << "Oscillator strength(a.u.)" << std::endl; ofs << "------------------------------------------------------------------------------------ " << std::endl; - for (int istate = 0;istate < X.get_nbands();++istate) + for (int istate = 0;istate < nstate;++istate) ofs << std::setw(8) << istate << std::setw(15) << std::setprecision(6) << eig[istate] << std::setw(15) << eig[istate] * ModuleBase::Ry_to_eV << std::setw(15) << transition_dipole_[istate].x << std::setw(15) << transition_dipole_[istate].y << std::setw(15) << transition_dipole_[istate].z << std::setw(30) << oscillator_strength_[istate] << std::endl; @@ -202,38 +196,54 @@ void LR::LR_Spectrum::transition_analysis(const int ispin) << std::setw(30) << "Excitation rate" << std::setw(10) << "k-point" << std::endl; ofs << "------------------------------------------------------------------------------------ " << std::endl; - for (int istate = 0;istate < X.get_nbands();++istate) + for (int istate = 0;istate < nstate;++istate) { /// find the main contributions (> 0.5) - X.fix_b(istate); - psi::Psi X_full(X.get_nk(), 1, nocc * nvirt, nullptr, false);// one-band - X_full.zero_out(); - for (int ik = 0;ik < X.get_nk();++ik) + const int loffset_b = istate * ldim; + std::vector X_full(gdim, T(0));// one-band, global + for (int is = 0;is < nspin_x;++is) { - X.fix_k(ik); - X_full.fix_k(ik); + const int loffset_bs = loffset_b + is * nk * pX[0].get_local_size(); + const int goffset_s = is * nk * nocc[0] * nvirt[0]; + for (int ik = 0;ik < nk;++ik) + { + const int loffset_x = loffset_bs + ik * pX[is].get_local_size(); + const int goffset_x = goffset_s + ik * nocc[is] * nvirt[is]; #ifdef __MPI - LR_Util::gather_2d_to_full(this->pX, X.get_pointer(), X_full.get_pointer(), false, nvirt, nocc); + LR_Util::gather_2d_to_full(this->pX[is], X + loffset_x, X_full.data() + goffset_x, false, nvirt[is], nocc[is]); #endif + } } std::map> abs_order; - X_full.fix_k(0); - for (int i = 0;i < X.get_nk() * nocc * nvirt;++i) { double abs = std::abs(X_full.get_pointer()[i]);if (abs > 0.3) { abs_order[abs] = i; } } + for (int i = 0;i < gdim;++i) { double abs = std::abs(X_full.at(i));if (abs > ana_thr) { abs_order[abs] = i; } } if (abs_order.size() > 0) { for (auto it = abs_order.cbegin();it != abs_order.cend();++it) { - int ik = it->second / (nocc * nvirt); - int ipair = it->second - ik * nocc * nvirt; + auto pair_info = get_pair_info(it->second); + const int& is = pair_info["ispin"]; + const std::string s = nspin_x == 2 ? (is == 0 ? "a" : "b") : ""; ofs << std::setw(8) << (it == abs_order.cbegin() ? std::to_string(istate) : " ") - << std::setw(20) << ipair / nvirt + 1 << std::setw(20) << ipair % nvirt + nocc + 1// iocc and ivirt - << std::setw(30) << X_full(ik, ipair) - << std::setw(30) << std::norm(X_full(ik, ipair)) - << std::setw(10) << ik << std::endl; + << std::setw(20) << std::to_string(pair_info["iocc"] + 1) + s << std::setw(20) << std::to_string(pair_info["ivirt"] + nocc[is] + 1) + s// iocc and ivirt + << std::setw(30) << X_full.at(it->second) + << std::setw(30) << std::norm(X_full.at(it->second)) + << std::setw(10) << pair_info["ik"] + 1 << std::endl; } } } ofs << "==================================================================== " << std::endl; - X.fix_kb(0, 0); +} + +template +std::map LR::LR_Spectrum::get_pair_info(const int i) +{ + assert(i >= 0 && i < gdim); + const int dim_spin0 = nk * nocc[0] * nvirt[0]; + const int ispin = (nspin_x == 2 && i >= dim_spin0) ? 1 : 0; + const int ik = (i - ispin*dim_spin0) / (nocc[ispin] * nvirt[ispin]); + const int ipair = (i - ispin*dim_spin0) - ik * nocc[ispin] * nvirt[ispin]; + const int iocc = ipair / nvirt[ispin]; + const int ivirt = ipair % nvirt[ispin]; + return { {"ispin", ispin}, {"ik", ik}, {"iocc", iocc}, {"ivirt", ivirt} }; } template class LR::LR_Spectrum; diff --git a/source/module_lr/lr_spectrum.h b/source/module_lr/lr_spectrum.h index 673a6b46ef..4b521cd564 100644 --- a/source/module_lr/lr_spectrum.h +++ b/source/module_lr/lr_spectrum.h @@ -2,6 +2,7 @@ #include "module_lr/utils/gint_template.h" #include "module_psi/psi.h" #include "module_elecstate/module_dm/density_matrix.h" +#include "module_lr/utils/lr_util.h" namespace LR { @@ -9,38 +10,50 @@ namespace LR class LR_Spectrum { public: - LR_Spectrum(const double* eig, const psi::Psi& X, const int& nspin, const int& naos, const int& nocc, const int& nvirt, - typename TGint::type* gint, const ModulePW::PW_Basis& rho_basis, psi::Psi& psi_ks, - const UnitCell& ucell, const K_Vectors& kv_in, Parallel_2D& pX_in, Parallel_2D& pc_in, Parallel_Orbitals& pmat_in) : - eig(eig), X(X), nspin(nspin), naos(naos), nocc(nocc), nvirt(nvirt), nk(kv_in.get_nks() / nspin), - gint(gint), rho_basis(rho_basis), psi_ks(psi_ks), - ucell(ucell), kv(kv_in), pX(pX_in), pc(pc_in), pmat(pmat_in) {}; - /// $$2/3\Omega\sum_{ia\sigma} |\braket{\psi_{i}|\mathbf{r}|\psi_{a}} |^2\int \rho_{\alpha\beta}(\mathbf{r}) \mathbf{r} d\mathbf{r}$$ - void oscillator_strength(); + LR_Spectrum(const int& nspin_global, const int& naos, const std::vector& nocc, const std::vector& nvirt, + typename TGint::type* gint, const ModulePW::PW_Basis& rho_basis, psi::Psi& psi_ks_in, + const UnitCell& ucell, const K_Vectors& kv_in, Grid_Driver& gd, const std::vector& orb_cutoff, + const std::vector& pX_in, const Parallel_2D& pc_in, const Parallel_Orbitals& pmat_in, + const double* eig, const T* X, const int& nstate, const bool& openshell) : + nspin_x(openshell ? 2 : 1), naos(naos), nocc(nocc), nvirt(nvirt), nk(kv_in.get_nks() / nspin_global), + gint(gint), rho_basis(rho_basis), ucell(ucell), kv(kv_in), + pX(pX_in), pc(pc_in), pmat(pmat_in), + eig(eig), X(X), nstate(nstate), + ldim(nk* (nspin_x == 2 ? pX_in[0].get_local_size() + pX_in[1].get_local_size() : pX_in[0].get_local_size())), + gdim(nk* std::inner_product(nocc.begin(), nocc.end(), nvirt.begin(), 0)) + { + for (int is = 0;is < nspin_global;++is) { psi_ks.emplace_back(LR_Util::get_psi_spin(psi_ks_in, is, nk)); } + this->oscillator_strength(gd, orb_cutoff); + }; /// @brief calculate the optical absorption spectrum - void optical_absorption(const std::vector& freq, const double eta, const int ispin = 0); + void optical_absorption(const std::vector& freq, const double eta, const std::string& spintype); /// @brief print out the transition dipole moment and the main contributions to the transition amplitude - void transition_analysis(const int ispin = 0); + void transition_analysis(const std::string& spintype); private: - const int& nspin; - const int nspin_solve = 1; - const int& naos; - const int& nocc; - const int& nvirt; + /// $$2/3\Omega\sum_{ia\sigma} |\braket{\psi_{i}|\mathbf{r}|\psi_{a}} |^2\int \rho_{\alpha\beta}(\mathbf{r}) \mathbf{r} d\mathbf{r}$$ + void oscillator_strength(Grid_Driver& gd, const std::vector& orb_cutoff); + const int nspin_x = 1; ///< 1 for singlet/triplet, 2 for updown(openshell) + const int naos = 1; + const std::vector& nocc; + const std::vector& nvirt; const int nk = 1; + const int nstate = 1; + const int ldim = 1;///< local leading dimension of X, or the data size of each state + const int gdim = 1;///< global leading dimension of X + const double ana_thr = 0.3; ///< {abs(X) > thr} will appear in the transition analysis log const double* eig; - const psi::Psi& X; + const T* X; const K_Vectors& kv; - const psi::Psi& psi_ks; - Parallel_2D& pX; - Parallel_2D& pc; - Parallel_Orbitals& pmat; + std::vector> psi_ks; + const std::vector& pX; + const Parallel_2D& pc; + const Parallel_Orbitals& pmat; typename TGint::type* gint = nullptr; const ModulePW::PW_Basis& rho_basis; const UnitCell& ucell; - const std::vector spin_types = { "Spin Singlet", "Spin Triplet" }; - void cal_gint_rho(double** rho, const int& nspin, const int& nrxx); + void cal_gint_rho(double** rho, const int& nrxx); + std::map get_pair_info(const int i); ///< given the index in X, return its ispin, ik, iocc, ivirt std::vector> transition_dipole_; ///< $\braket{ \psi_{i} | \mathbf{r} | \psi_{a} }$ std::vector oscillator_strength_;///< $2/3\Omega |\sum_{ia\sigma} \braket{\psi_{i}|\mathbf{r}|\psi_{a}} |^2$ diff --git a/source/module_lr/operator_casida/operator_lr_diag.h b/source/module_lr/operator_casida/operator_lr_diag.h index fb2910c1e7..b18343e04a 100644 --- a/source/module_lr/operator_casida/operator_lr_diag.h +++ b/source/module_lr/operator_casida/operator_lr_diag.h @@ -12,50 +12,48 @@ namespace LR class OperatorLRDiag : public hamilt::Operator { public: - OperatorLRDiag(const ModuleBase::matrix& eig_ks_in, const Parallel_2D* pX_in, const int& nk_in, const int& nocc_in, const int& nvirt_in) - : eig_ks(eig_ks_in), pX(pX_in), nk(nk_in), nocc(nocc_in), nvirt(nvirt_in) + OperatorLRDiag(const double* eig_ks, const Parallel_2D& pX_in, const int& nk_in, const int& nocc_in, const int& nvirt_in) + : pX(pX_in), nk(nk_in), nocc(nocc_in), nvirt(nvirt_in) { // calculate the difference of eigenvalues ModuleBase::TITLE("OperatorLRDiag", "OperatorLRDiag"); -#ifdef __MPI - Parallel_Common::bcast_double(eig_ks.c, eig_ks.nr * eig_ks.nc); -#endif - this->act_type = 2; + const int nbands = nocc + nvirt; this->cal_type = hamilt::calculation_type::no; - this->eig_ks_diff.create(nk, pX->get_local_size(), false); + this->eig_ks_diff.create(nk, pX.get_local_size(), false); for (int ik = 0;ik < nk;++ik) - for (int io = 0;io < pX->get_col_size();++io) //nocc_local - for (int iv = 0;iv < pX->get_row_size();++iv) //nvirt_local + { + const int& istart = ik * nbands; + for (int io = 0;io < pX.get_col_size();++io) //nocc_local + { + for (int iv = 0;iv < pX.get_row_size();++iv) //nvirt_local { - int io_g = pX->local2global_col(io); - int iv_g = pX->local2global_row(iv); - this->eig_ks_diff(ik, io * pX->get_row_size() + iv) = eig_ks(ik, nocc + iv_g) - eig_ks(ik, io_g); + int io_g = pX.local2global_col(io); + int iv_g = pX.local2global_row(iv); + this->eig_ks_diff(ik, io * pX.get_row_size() + iv) = eig_ks[istart + nocc + iv_g] - eig_ks[istart + io_g]; } + } + } }; void init(const int ik_in) override {}; /// caution: put this operator at the head of the operator list, /// because vector_mul_vector_op directly assign to (rather than add on) psi_out. - virtual void act(const psi::Psi& psi_in, psi::Psi& psi_out, const int nbands) const override + virtual void act(const int nbands, + const int nbasis, + const int npol, + const T* psi_in, + T* hpsi, + const int ngk_ik = 0)const override { ModuleBase::TITLE("OperatorLRDiag", "act"); - assert(nbands <= psi_in.get_nbands()); - - psi::Psi psi_in_bfirst = LR_Util::k1_to_bfirst_wrapper(psi_in, this->nk, this->pX->get_local_size()); - psi::Psi psi_out_bfirst = LR_Util::k1_to_bfirst_wrapper(psi_out, this->nk, this->pX->get_local_size()); - for (int ib = 0;ib < nbands;++ib) - { - psi_in_bfirst.fix_b(ib); - psi_out_bfirst.fix_b(ib); - hsolver::vector_mul_vector_op()(this->ctx, - psi_in_bfirst.get_nk() * psi_in_bfirst.get_nbasis(), - psi_out_bfirst.get_pointer(), - psi_in_bfirst.get_pointer(), - this->eig_ks_diff.c); - } + const int nlocal_ph = nk * pX.get_local_size(); // local size of particle-hole basis + hsolver::vector_mul_vector_op()(this->ctx, + nk * pX.get_local_size(), + hpsi, + psi_in, + this->eig_ks_diff.c); } private: - const ModuleBase::matrix& eig_ks; - const Parallel_2D* pX; + const Parallel_2D& pX; ModuleBase::matrix eig_ks_diff; const int& nk; const int& nocc; diff --git a/source/module_lr/operator_casida/operator_lr_exx.cpp b/source/module_lr/operator_casida/operator_lr_exx.cpp index 3ed5f06353..fba9b5f898 100644 --- a/source/module_lr/operator_casida/operator_lr_exx.cpp +++ b/source/module_lr/operator_casida/operator_lr_exx.cpp @@ -10,24 +10,21 @@ namespace LR void OperatorLREXX::allocate_Ds_onebase() { ModuleBase::TITLE("OperatorLREXX", "allocate_Ds_onebase"); - this->Ds_onebase.resize(this->nspin_solve); - for (int is = 0;is < this->nspin_solve;++is) { - for (int iat1 = 0;iat1 < ucell.nat;++iat1) { - const int it1 = ucell.iat2it[iat1]; - for (int iat2 = 0;iat2 < ucell.nat;++iat2) { - const int it2=ucell.iat2it[iat2]; - for (auto cell : this->BvK_cells) { - this->Ds_onebase[is][iat1][std::make_pair(iat2, cell)] = aims_nbasis.empty() ? - RI::Tensor({ static_cast(ucell.atoms[it1].nw), static_cast(ucell.atoms[it2].nw) }) : - RI::Tensor({ static_cast( aims_nbasis[it1]), static_cast( aims_nbasis[it2]) }); - } + for (int iat1 = 0;iat1 < ucell.nat;++iat1) { + const int it1 = ucell.iat2it[iat1]; + for (int iat2 = 0;iat2 < ucell.nat;++iat2) { + const int it2 = ucell.iat2it[iat2]; + for (auto cell : this->BvK_cells) { + this->Ds_onebase[iat1][std::make_pair(iat2, cell)] = aims_nbasis.empty() ? + RI::Tensor({ static_cast(ucell.atoms[it1].nw), static_cast(ucell.atoms[it2].nw) }) : + RI::Tensor({ static_cast(aims_nbasis[it1]), static_cast(aims_nbasis[it2]) }); } } } } template<> - void OperatorLREXX::cal_DM_onebase(const int io, const int iv, const int ik, const int is) const + void OperatorLREXX::cal_DM_onebase(const int io, const int iv, const int ik) const { ModuleBase::TITLE("OperatorLREXX", "cal_DM_onebase"); assert(ik == 0); @@ -40,19 +37,19 @@ namespace LR { int iat1 = ucell.itia2iat(it1, ia1); int iat2 = ucell.itia2iat(it2, ia2); - auto& D2d = this->Ds_onebase[is][iat1][std::make_pair(iat2, cell)]; + auto& D2d = this->Ds_onebase[iat1][std::make_pair(iat2, cell)]; const int nw1 = aims_nbasis.empty() ? ucell.atoms[it1].nw : aims_nbasis[it1]; const int nw2 = aims_nbasis.empty() ? ucell.atoms[it2].nw : aims_nbasis[it2]; for (int iw1 = 0;iw1 < nw1;++iw1) for (int iw2 = 0;iw2 < nw2;++iw2) - if(this->pmat->in_this_processor(ucell.itiaiw2iwt(it1, ia1, iw1), ucell.itiaiw2iwt(it2, ia2, iw2))) + if (this->pmat.in_this_processor(ucell.itiaiw2iwt(it1, ia1, iw1), ucell.itiaiw2iwt(it2, ia2, iw2))) D2d(iw1, iw2) = this->psi_ks_full(ik, io, ucell.itiaiw2iwt(it1, ia1, iw1)) * this->psi_ks_full(ik, nocc + iv, ucell.itiaiw2iwt(it2, ia2, iw2)); } } } template<> - void OperatorLREXX>::cal_DM_onebase(const int io, const int iv, const int ik, const int is) const + void OperatorLREXX>::cal_DM_onebase(const int io, const int iv, const int ik) const { ModuleBase::TITLE("OperatorLREXX", "cal_DM_onebase"); for (auto cell : this->BvK_cells) @@ -66,92 +63,74 @@ namespace LR { int iat1 = ucell.itia2iat(it1, ia1); int iat2 = ucell.itia2iat(it2, ia2); - auto& D2d = this->Ds_onebase[is][iat1][std::make_pair(iat2, cell)]; + auto& D2d = this->Ds_onebase[iat1][std::make_pair(iat2, cell)]; const int nw1 = aims_nbasis.empty() ? ucell.atoms[it1].nw : aims_nbasis[it1]; const int nw2 = aims_nbasis.empty() ? ucell.atoms[it2].nw : aims_nbasis[it2]; for (int iw1 = 0;iw1 < nw1;++iw1) for (int iw2 = 0;iw2 < nw2;++iw2) - if(this->pmat->in_this_processor(ucell.itiaiw2iwt(it1, ia1, iw1), ucell.itiaiw2iwt(it2, ia2, iw2))) + if (this->pmat.in_this_processor(ucell.itiaiw2iwt(it1, ia1, iw1), ucell.itiaiw2iwt(it2, ia2, iw2))) D2d(iw1, iw2) = frac * std::conj(this->psi_ks_full(ik, io, ucell.itiaiw2iwt(it1, ia1, iw1))) * this->psi_ks_full(ik, nocc + iv, ucell.itiaiw2iwt(it2, ia2, iw2)); } } } template - void OperatorLREXX::act(const psi::Psi& psi_in, psi::Psi& psi_out, const int nbands) const + void OperatorLREXX::act(const int nbands, const int nbasis, const int npol, const T* psi_in, T* hpsi, const int ngk_ik)const { ModuleBase::TITLE("OperatorLREXX", "act"); - assert(nbands <= psi_in.get_nbands()); const int& nk = this->kv.get_nks() / this->nspin; - psi::Psi psi_in_bfirst = LR_Util::k1_to_bfirst_wrapper(psi_in, nk, this->pX->get_local_size()); - psi::Psi psi_out_bfirst = LR_Util::k1_to_bfirst_wrapper(psi_out, nk, this->pX->get_local_size()); // convert parallel info to LibRI interfaces - std::vector, std::set>> judge = RI_2D_Comm::get_2D_judge(*this->pmat); - for (int ib = 0;ib < nbands;++ib) - { - psi_out_bfirst.fix_b(ib); - // suppose Cs,Vs, have already been calculated in the ion-step of ground state, - // DM_trans(k) and DM_trans(R) has already been calculated from psi_in in OperatorLRHxc::act - // but int RI_benchmark, DM_trans(k) should be first calculated here - if (cal_dm_trans) - { -#ifdef __MPI - std::vector dm_trans_2d = cal_dm_trans_pblas(psi_in_bfirst, *pX, *psi_ks, *pc, naos, nocc, nvirt, *pmat); - if (this->tdm_sym) for (auto& t : dm_trans_2d) LR_Util::matsym(t.data(), naos, *pmat); -#else - std::vector dm_trans_2d = cal_dm_trans_blas(psi_in_bfirst, *psi_ks, nocc, nvirt); - if (this->tdm_sym) for (auto& t : dm_trans_2d) LR_Util::matsym(t.data(), naos); -#endif - // tensor to vector, then set DMK - for (int ik = 0;ik < nk;++ik) { this->DM_trans[ib]->set_DMK_pointer(ik, dm_trans_2d[ik].data()); } - } + std::vector, std::set>> judge = RI_2D_Comm::get_2D_judge(this->pmat); - // 1. set_Ds (once) - // convert to vector for the interface of RI_2D_Comm::split_m2D_ktoR (interface will be unified to ct::Tensor) - std::vector> DMk_trans_vector = this->DM_trans[ib]->get_DMK_vector(); - // assert(DMk_trans_vector.size() == nk); - std::vector*> DMk_trans_pointer(nk); - for (int ik = 0;ik < nk;++ik) {DMk_trans_pointer[ik] = &DMk_trans_vector[ik];} - // if multi-k, DM_trans(TR=double) -> Ds_trans(TR=T=complex) - std::vector>>> Ds_trans = - aims_nbasis.empty() ? - RI_2D_Comm::split_m2D_ktoR(this->kv, DMk_trans_pointer, *this->pmat, this->nspin_solve) - : RI_Benchmark::split_Ds(DMk_trans_vector, aims_nbasis, ucell); //0.5 will be multiplied - // LR_Util::print_CV(Ds_trans[0], "Ds_trans in OperatorLREXX", 1e-10); - // 2. cal_Hs - auto lri = this->exx_lri.lock(); - for (int is = 0;is < nspin_solve;++is) - { - // LR_Util::print_CV(Ds_trans[is], "Ds_trans in OperatorLREXX", 1e-10); - lri->exx_lri.set_Ds(std::move(Ds_trans[is]), lri->info.dm_threshold); - lri->exx_lri.cal_Hs(); - lri->Hexxs[is] = RI::Communicate_Tensors_Map_Judge::comm_map2_first( - lri->mpi_comm, std::move(lri->exx_lri.Hs), std::get<0>(judge[is]), std::get<1>(judge[is])); - lri->post_process_Hexx(lri->Hexxs[is]); - } + // suppose Cs,Vs, have already been calculated in the ion-step of ground state + // and DM_trans has been calculated in hPsi() outside. - // 3. set [AX]_iak = DM_onbase * Hexxs for each occ-virt pair and each k-point - // caution: parrallel + // 1. set_Ds (once) + // convert to vector for the interface of RI_2D_Comm::split_m2D_ktoR (interface will be unified to ct::Tensor) + std::vector> DMk_trans_vector = this->DM_trans->get_DMK_vector(); + // assert(DMk_trans_vector.size() == nk); + std::vector*> DMk_trans_pointer(nk); + for (int ik = 0;ik < nk;++ik) { DMk_trans_pointer[ik] = &DMk_trans_vector[ik]; } + // if multi-k, DM_trans(TR=double) -> Ds_trans(TR=T=complex) + std::vector>>> Ds_trans = + aims_nbasis.empty() ? + RI_2D_Comm::split_m2D_ktoR(this->kv, DMk_trans_pointer, this->pmat, 1) + : RI_Benchmark::split_Ds(DMk_trans_vector, aims_nbasis, ucell); //0.5 will be multiplied + // LR_Util::print_CV(Ds_trans[0], "Ds_trans in OperatorLREXX", 1e-10); + // 2. cal_Hs + auto lri = this->exx_lri.lock(); - for (int io = 0;io < this->nocc;++io) { - for (int iv = 0;iv < this->nvirt;++iv) { - for (int ik = 0;ik < nk;++ik) { - for (int is = 0;is < this->nspin_solve;++is) - { - this->cal_DM_onebase(io, iv, ik, is); //set Ds_onebase for all e-h pairs (not only on this processor) - // LR_Util::print_CV(Ds_onebase[is], "Ds_onebase of occ " + std::to_string(io) + ", virtual " + std::to_string(iv) + " in OperatorLREXX", 1e-10); - const T& ene= 2 * alpha * //minus for exchange(but here plus is right, why?), 2 for Hartree to Ry - lri->exx_lri.post_2D.cal_energy(this->Ds_onebase[is], lri->Hexxs[is]); - if(this->pX->in_this_processor(iv, io)) { - psi_out_bfirst(ik, this->pX->global2local_col(io) * this->pX->get_row_size() + this->pX->global2local_row(iv)) += ene; -} - } -} -} -} + // LR_Util::print_CV(Ds_trans[is], "Ds_trans in OperatorLREXX", 1e-10); + lri->exx_lri.set_Ds(std::move(Ds_trans[0]), lri->info.dm_threshold); + lri->exx_lri.cal_Hs(); + lri->Hexxs[0] = RI::Communicate_Tensors_Map_Judge::comm_map2_first( + lri->mpi_comm, std::move(lri->exx_lri.Hs), std::get<0>(judge[0]), std::get<1>(judge[0])); + lri->post_process_Hexx(lri->Hexxs[0]); + + // 3. set [AX]_iak = DM_onbase * Hexxs for each occ-virt pair and each k-point + // caution: parrallel + + for (int io = 0;io < this->nocc;++io) + { + for (int iv = 0;iv < this->nvirt;++iv) + { + for (int ik = 0;ik < nk;++ik) + { + const int xstart_bk = ik * pX.get_local_size(); + this->cal_DM_onebase(io, iv, ik); //set Ds_onebase for all e-h pairs (not only on this processor) + // LR_Util::print_CV(Ds_onebase[is], "Ds_onebase of occ " + std::to_string(io) + ", virtual " + std::to_string(iv) + " in OperatorLREXX", 1e-10); + const T& ene = 2 * alpha * //minus for exchange(but here plus is right, why?), 2 for Hartree to Ry + lri->exx_lri.post_2D.cal_energy(this->Ds_onebase, lri->Hexxs[0]); + if (this->pX.in_this_processor(iv, io)) + { + hpsi[xstart_bk + ik * pX.get_local_size() + this->pX.global2local_col(io) * this->pX.get_row_size() + this->pX.global2local_row(iv)] += ene; + } + } + } } + } template class OperatorLREXX; template class OperatorLREXX>; diff --git a/source/module_lr/operator_casida/operator_lr_exx.h b/source/module_lr/operator_casida/operator_lr_exx.h index 1ebea0c9f2..ac052d2655 100644 --- a/source/module_lr/operator_casida/operator_lr_exx.h +++ b/source/module_lr/operator_casida/operator_lr_exx.h @@ -22,47 +22,44 @@ namespace LR const int& nocc, const int& nvirt, const UnitCell& ucell_in, - const psi::Psi* psi_ks_in, - std::vector>>& DM_trans_in, + const psi::Psi& psi_ks_in, + std::unique_ptr>& DM_trans_in, // HContainer* hR_in, std::weak_ptr> exx_lri_in, const K_Vectors& kv_in, - Parallel_2D* pX_in, - Parallel_2D* pc_in, - Parallel_Orbitals* pmat_in, + const Parallel_2D& pX_in, + const Parallel_2D& pc_in, + const Parallel_Orbitals& pmat_in, const double& alpha = 1.0, - const bool& cal_dm_trans = false, const std::vector& aims_nbasis = {}) : nspin(nspin), naos(naos), nocc(nocc), nvirt(nvirt), psi_ks(psi_ks_in), DM_trans(DM_trans_in), exx_lri(exx_lri_in), kv(kv_in), - pX(pX_in), pc(pc_in), pmat(pmat_in), ucell(ucell_in), alpha(alpha), cal_dm_trans(cal_dm_trans), + pX(pX_in), pc(pc_in), pmat(pmat_in), ucell(ucell_in), alpha(alpha), aims_nbasis(aims_nbasis) { ModuleBase::TITLE("OperatorLREXX", "OperatorLREXX"); this->cal_type = hamilt::calculation_type::lcao_exx; - this->act_type = 2; this->is_first_node = false; // reduce psi_ks for later use this->psi_ks_full.resize(this->kv.get_nks(), nocc + nvirt, this->naos); - LR_Util::gather_2d_to_full(*this->pc, this->psi_ks->get_pointer(), this->psi_ks_full.get_pointer(), false, this->naos, nocc + nvirt); + LR_Util::gather_2d_to_full(this->pc, this->psi_ks.get_pointer(), this->psi_ks_full.get_pointer(), false, this->naos, nocc + nvirt); // get cells in BvK supercell const TC period = RI_Util::get_Born_vonKarmen_period(kv_in); this->BvK_cells = RI_Util::get_Born_von_Karmen_cells(period); this->allocate_Ds_onebase(); - this->exx_lri.lock()->Hexxs.resize(this->nspin_solve); + this->exx_lri.lock()->Hexxs.resize(1); }; void init(const int ik_in) override {}; - // virtual psi::Psi act(const psi::Psi& psi_in) const override; - virtual void act(const psi::Psi& psi_in, psi::Psi& psi_out, const int nbands) const override; + virtual void act(const int nbands, const int nbasis, const int npol, const T* psi_in, T* hpsi, const int ngk_ik = 0)const override; + private: //global sizes const int& nspin; - const int nspin_solve = 1; const int& naos; const int& nocc; const int& nvirt; @@ -71,18 +68,18 @@ namespace LR const bool tdm_sym = false; ///< whether transition density matrix is symmetric const K_Vectors& kv; /// ground state wavefunction - const psi::Psi* psi_ks = nullptr; + const psi::Psi& psi_ks = nullptr; psi::Psi psi_ks_full; const std::vector aims_nbasis={}; ///< number of basis functions for each type of atom in FHI-aims /// transition density matrix - std::vector>>& DM_trans; + std::unique_ptr>& DM_trans; /// density matrix of a certain (i, a, k), with full naos*naos size for each key /// D^{iak}_{\mu\nu}(k): 1/N_k * c^*_{ak,\mu} c_{ik,\nu} /// D^{iak}_{\mu\nu}(R): D^{iak}_{\mu\nu}(k)e^{-ikR} // elecstate::DensityMatrix* DM_onebase; - mutable std::vector>>> Ds_onebase; + mutable std::map>> Ds_onebase; // cells in the Born von Karmen supercell (direct) std::vector> BvK_cells; @@ -99,15 +96,15 @@ namespace LR const UnitCell& ucell; ///parallel info - Parallel_2D* pc = nullptr; - Parallel_2D* pX = nullptr; - Parallel_Orbitals* pmat = nullptr; + const Parallel_2D& pc; + const Parallel_2D& pX; + const Parallel_Orbitals& pmat; // allocate Ds_onebase void allocate_Ds_onebase(); - void cal_DM_onebase(const int io, const int iv, const int ik, const int is) const; + void cal_DM_onebase(const int io, const int iv, const int ik) const; }; } diff --git a/source/module_lr/operator_casida/operator_lr_hxc.cpp b/source/module_lr/operator_casida/operator_lr_hxc.cpp index a2ddfe616c..58f01932b4 100644 --- a/source/module_lr/operator_casida/operator_lr_hxc.cpp +++ b/source/module_lr/operator_casida/operator_lr_hxc.cpp @@ -8,7 +8,6 @@ #include "module_lr/utils/lr_util_print.h" // #include "module_hamilt_lcao/hamilt_lcaodft/DM_gamma_2d_to_grid.h" #include "module_hamilt_lcao/module_hcontainer/hcontainer_funcs.h" -#include "module_lr/dm_trans/dm_trans.h" #include "module_lr/AX/AX.h" #include "module_hamilt_pw/hamilt_pwdft/global.h" @@ -18,127 +17,83 @@ inline std::complex conj(std::complex a) { return std::conj(a); namespace LR { template - void OperatorLRHxc::act(const psi::Psi& psi_in, psi::Psi& psi_out, const int nbands) const + void OperatorLRHxc::act(const int nbands, const int nbasis, const int npol, const T* psi_in, T* hpsi, const int ngk_ik)const { ModuleBase::TITLE("OperatorLRHxc", "act"); - assert(nbands <= psi_in.get_nbands()); - const int& nk = this->kv.get_nks() / this->nspin; - - //print - // if (this->first_print) LR_Util::print_psi_kfirst(*psi_ks, "psi_ks"); + const int& sl = ispin_ks[0]; + const auto psil_ks = LR_Util::get_psi_spin(psi_ks, sl, nk); + const int& lgd = gint->gridt->lgd; - this->init_DM_trans(nbands, this->DM_trans); // initialize transion density matrix + this->DM_trans->cal_DMR(); //DM_trans->get_DMR_vector() is 2d-block parallized + // LR_Util::print_DMR(*DM_trans, ucell.nat, "DMR"); - psi::Psi psi_in_bfirst = LR_Util::k1_to_bfirst_wrapper(psi_in, nk, this->pX->get_local_size()); - psi::Psi psi_out_bfirst = LR_Util::k1_to_bfirst_wrapper(psi_out, nk, this->pX->get_local_size()); + // ========================= begin grid calculation========================= + this->grid_calculation(nbands); //DM(R) to H(R) + // ========================= end grid calculation ========================= - const int& lgd = gint->gridt->lgd; - for (int ib = 0;ib < nbands;++ib) - { - // if (this->first_print) LR_Util::print_psi_bandfirst(psi_in_bfirst, "psi_in_bfirst", ib); + // V(R)->V(k) + std::vector v_hxc_2d(nk, LR_Util::newTensor({ pmat.get_col_size(), pmat.get_row_size() })); + for (auto& v : v_hxc_2d) v.zero(); + int nrow = ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver) ? this->pmat.get_row_size() : this->pmat.get_col_size(); + for (int ik = 0;ik < nk;++ik) { folding_HR(*this->hR, v_hxc_2d[ik].data(), this->kv.kvec_d[ik], nrow, 1); } // V(R) -> V(k) + // LR_Util::print_HR(*this->hR, this->ucell.nat, "4.VR"); + // if (this->first_print) + // for (int ik = 0;ik < nk;++ik) + // LR_Util::print_tensor(v_hxc_2d[ik], "4.V(k)[ik=" + std::to_string(ik) + "]", &this->pmat); - // if Hxc-only, the memory of single-band DM_trans is enough. - // if followed by EXX, we need to allocate memory for all bands. - int ib_dm = (this->next_op == nullptr) ? 0 : ib; - psi_in_bfirst.fix_b(ib); - psi_out_bfirst.fix_b(ib); - - // 1. transition density matrix + // 5. [AX]^{Hxc}_{ai}=\sum_{\mu,\nu}c^*_{a,\mu,}V^{Hxc}_{\mu,\nu}c_{\nu,i} #ifdef __MPI - std::vector dm_trans_2d = cal_dm_trans_pblas(psi_in_bfirst, *pX, *psi_ks, *pc, naos, nocc, nvirt, *pmat); - if (this->tdm_sym) for (auto& t : dm_trans_2d) LR_Util::matsym(t.data(), naos, *pmat); + cal_AX_pblas(v_hxc_2d, this->pmat, psil_ks, this->pc, naos, nocc[sl], nvirt[sl], this->pX[sl], hpsi); #else - std::vector dm_trans_2d = cal_dm_trans_blas(psi_in_bfirst, *psi_ks, nocc, nvirt); - if (this->tdm_sym) for (auto& t : dm_trans_2d) LR_Util::matsym(t.data(), naos); + cal_AX_blas(v_hxc_2d, psil_ks, nocc[sl], nvirt[sl], hpsi); #endif - // tensor to vector, then set DMK - for (int ik = 0;ik < nk;++ik) { this->DM_trans[ib_dm]->set_DMK_pointer(ik, dm_trans_2d[ik].data()); } - - // if (this->first_print) - // for (int ik = 0;ik < nk;++ik) - // LR_Util::print_tensor>(dm_trans_2d[ik], "1.DMK[ik=" + std::to_string(ik) + "]", this->pmat); - - // use cal_DMR to get DMR form DMK by FT - this->DM_trans[ib_dm]->cal_DMR(); //DM_trans->get_DMR_vector() is 2d-block parallized - // LR_Util::print_DMR(*this->DM_trans[0], ucell.nat, "DM(R) (complex)"); - - // ========================= begin grid calculation========================= - this->grid_calculation(nbands, ib_dm); //DM(R) to H(R) - // ========================= end grid calculation ========================= - - // V(R)->V(k) - std::vector v_hxc_2d(this->kv.get_nks(), - ct::Tensor(ct::DataTypeToEnum::value, ct::DeviceTypeToEnum::value, - { pmat->get_col_size(), pmat->get_row_size() })); - for (auto& v : v_hxc_2d) v.zero(); - int nrow = ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver) ? this->pmat->get_row_size() : this->pmat->get_col_size(); - for (int ik = 0;ik < nk;++ik) { folding_HR(*this->hR, v_hxc_2d[ik].data(), this->kv.kvec_d[ik], nrow, 1); } // V(R) -> V(k) - // LR_Util::print_HR(*this->hR, this->ucell.nat, "4.VR"); - // if (this->first_print) - // for (int ik = 0;ik < nk;++ik) - // LR_Util::print_tensor(v_hxc_2d[ik], "4.V(k)[ik=" + std::to_string(ik) + "]", this->pmat); - - // 5. [AX]^{Hxc}_{ai}=\sum_{\mu,\nu}c^*_{a,\mu,}V^{Hxc}_{\mu,\nu}c_{\nu,i} -#ifdef __MPI - cal_AX_pblas(v_hxc_2d, *this->pmat, *this->psi_ks, *this->pc, naos, nocc, nvirt, *this->pX, psi_out_bfirst); -#else - cal_AX_blas(v_hxc_2d, *this->psi_ks, nocc, nvirt, psi_out_bfirst); -#endif - // if (this->first_print) LR_Util::print_psi_bandfirst(psi_out_bfirst, "5.AX", ib); - } } template<> - void OperatorLRHxc::grid_calculation(const int& nbands, const int& iband_dm) const + void OperatorLRHxc::grid_calculation(const int& nbands) const { ModuleBase::TITLE("OperatorLRHxc", "grid_calculation(real)"); ModuleBase::timer::tick("OperatorLRHxc", "grid_calculation"); - this->gint->transfer_DM2DtoGrid(this->DM_trans[iband_dm]->get_DMR_vector()); // 2d block to grid + this->gint->transfer_DM2DtoGrid(this->DM_trans->get_DMR_vector()); // 2d block to grid // 2. transition electron density // \f[ \tilde{\rho}(r)=\sum_{\mu_j, \mu_b}\tilde{\rho}_{\mu_j,\mu_b}\phi_{\mu_b}(r)\phi_{\mu_j}(r) \f] double** rho_trans; const int& nrxx = this->pot.lock()->nrxx; - // LR_Util::new_p2(rho_trans, nspin_solve, nrxx); LR_Util::new_p2(rho_trans, 1, nrxx); // currently gint_kernel_rho uses PARAM.inp.nspin, it needs refactor - for (int is = 0;is < nspin_solve;++is)ModuleBase::GlobalFunc::ZEROS(rho_trans[is], nrxx); + ModuleBase::GlobalFunc::ZEROS(rho_trans[0], nrxx); Gint_inout inout_rho(rho_trans, Gint_Tools::job_type::rho, 1, false); this->gint->cal_gint(&inout_rho); // 3. v_hxc = f_hxc * rho_trans - ModuleBase::matrix vr_hxc(nspin_solve, nrxx); //grid - this->pot.lock()->cal_v_eff(rho_trans, &GlobalC::ucell, vr_hxc); - LR_Util::delete_p2(rho_trans, nspin_solve); + ModuleBase::matrix vr_hxc(1, nrxx); //grid + this->pot.lock()->cal_v_eff(rho_trans, &GlobalC::ucell, vr_hxc, ispin_ks); + LR_Util::delete_p2(rho_trans, 1); // 4. V^{Hxc}_{\mu,\nu}=\int{dr} \phi_\mu(r) v_{Hxc}(r) \phi_\mu(r) - // V(R) for each spin - for (int is = 0;is < nspin_solve;++is) - { - double* vr_hxc_is = &vr_hxc.c[is * nrxx]; //v(r) at current spin - Gint_inout inout_vlocal(vr_hxc_is, is, Gint_Tools::job_type::vlocal); - this->gint->get_hRGint()->set_zero(); - this->gint->cal_gint(&inout_vlocal); - } + Gint_inout inout_vlocal(vr_hxc.c, 0, Gint_Tools::job_type::vlocal); + this->gint->get_hRGint()->set_zero(); + this->gint->cal_gint(&inout_vlocal); this->hR->set_zero(); // clear hR for each bands this->gint->transfer_pvpR(&*this->hR, &GlobalC::ucell); //grid to 2d block ModuleBase::timer::tick("OperatorLRHxc", "grid_calculation"); } template<> - void OperatorLRHxc, base_device::DEVICE_CPU>::grid_calculation(const int& nbands, const int& iband_dm) const + void OperatorLRHxc, base_device::DEVICE_CPU>::grid_calculation(const int& nbands) const { ModuleBase::TITLE("OperatorLRHxc", "grid_calculation(complex)"); ModuleBase::timer::tick("OperatorLRHxc", "grid_calculation"); - elecstate::DensityMatrix, double> DM_trans_real_imag(pmat, 1, kv.kvec_d, kv.get_nks() / nspin); + elecstate::DensityMatrix, double> DM_trans_real_imag(&pmat, 1, kv.kvec_d, kv.get_nks() / nspin); DM_trans_real_imag.init_DMR(*this->hR); - hamilt::HContainer HR_real_imag(GlobalC::ucell, this->pmat); - this->initialize_HR(HR_real_imag, ucell, gd, this->pmat); + hamilt::HContainer HR_real_imag(GlobalC::ucell, &this->pmat); + LR_Util::initialize_HR, double>(HR_real_imag, ucell, gd, orb_cutoff_); - auto dmR_to_hR = [&, this](const int& iband_dm, const char& type) -> void + auto dmR_to_hR = [&, this](const char& type) -> void { - LR_Util::get_DMR_real_imag_part(*this->DM_trans[iband_dm], DM_trans_real_imag, ucell.nat, type); + LR_Util::get_DMR_real_imag_part(*this->DM_trans, DM_trans_real_imag, ucell.nat, type); // if (this->first_print)LR_Util::print_DMR(DM_trans_real_imag, ucell.nat, "DMR(2d, real)"); this->gint->transfer_DM2DtoGrid(DM_trans_real_imag.get_DMR_vector()); @@ -147,28 +102,25 @@ namespace LR // 2. transition electron density double** rho_trans; const int& nrxx = this->pot.lock()->nrxx; - // LR_Util::new_p2(rho_trans, nspin_solve, nrxx); - LR_Util::new_p2(rho_trans, nspin, nrxx); // currently gint_kernel_rho uses PARAM.inp.nspin, it needs refactor - for (int is = 0;is < nspin_solve;++is)ModuleBase::GlobalFunc::ZEROS(rho_trans[is], nrxx); + + LR_Util::new_p2(rho_trans, 1, nrxx); // nspin=1 for transition density + ModuleBase::GlobalFunc::ZEROS(rho_trans[0], nrxx); Gint_inout inout_rho(rho_trans, Gint_Tools::job_type::rho, 1, false); this->gint->cal_gint(&inout_rho); // print_grid_nonzero(rho_trans[0], nrxx, 10, "rho_trans"); // 3. v_hxc = f_hxc * rho_trans - ModuleBase::matrix vr_hxc(nspin_solve, nrxx); //grid - this->pot.lock()->cal_v_eff(rho_trans, &GlobalC::ucell, vr_hxc); + ModuleBase::matrix vr_hxc(1, nrxx); //grid + this->pot.lock()->cal_v_eff(rho_trans, &GlobalC::ucell, vr_hxc, ispin_ks); // print_grid_nonzero(vr_hxc.c, this->poticab->nrxx, 10, "vr_hxc"); - LR_Util::delete_p2(rho_trans, nspin_solve); + LR_Util::delete_p2(rho_trans, 1); // 4. V^{Hxc}_{\mu,\nu}=\int{dr} \phi_\mu(r) v_{Hxc}(r) \phi_\mu(r) - for (int is = 0;is < nspin_solve;++is) - { - double* vr_hxc_is = &vr_hxc.c[is * nrxx]; //v(r) at current spin - Gint_inout inout_vlocal(vr_hxc_is, is, Gint_Tools::job_type::vlocal); - this->gint->get_hRGint()->set_zero(); - this->gint->cal_gint(&inout_vlocal); - } + Gint_inout inout_vlocal(vr_hxc.c, 0, Gint_Tools::job_type::vlocal); + this->gint->get_hRGint()->set_zero(); + this->gint->cal_gint(&inout_vlocal); + // LR_Util::print_HR(*this->gint->get_hRGint(), this->ucell.nat, "VR(grid)"); HR_real_imag.set_zero(); this->gint->transfer_pvpR(&HR_real_imag, &GlobalC::ucell, &GlobalC::GridD); @@ -176,8 +128,8 @@ namespace LR LR_Util::set_HR_real_imag_part(HR_real_imag, *this->hR, GlobalC::ucell.nat, type); }; this->hR->set_zero(); - dmR_to_hR(iband_dm, 'R'); //real - if (kv.get_nks() / this->nspin > 1) { dmR_to_hR(iband_dm, 'I'); } //imag for multi-k + dmR_to_hR('R'); //real + if (kv.get_nks() / this->nspin > 1) { dmR_to_hR('I'); } //imag for multi-k ModuleBase::timer::tick("OperatorLRHxc", "grid_calculation"); } diff --git a/source/module_lr/operator_casida/operator_lr_hxc.h b/source/module_lr/operator_casida/operator_lr_hxc.h index 13a3fb7efd..e9ea332f88 100644 --- a/source/module_lr/operator_casida/operator_lr_hxc.h +++ b/source/module_lr/operator_casida/operator_lr_hxc.h @@ -17,104 +17,63 @@ namespace LR //when nspin=2, nks is 2 times of real number of k-points. else (nspin=1 or 4), nks is the real number of k-points OperatorLRHxc(const int& nspin, const int& naos, - const int& nocc, - const int& nvirt, - const psi::Psi* psi_ks_in, - std::vector>>& DM_trans_in, + const std::vector& nocc, + const std::vector& nvirt, + const psi::Psi& psi_ks_in, + std::unique_ptr>& DM_trans_in, typename TGint::type* gint_in, std::weak_ptr pot_in, const UnitCell& ucell_in, const std::vector& orb_cutoff, Grid_Driver& gd_in, const K_Vectors& kv_in, - Parallel_2D* pX_in, - Parallel_2D* pc_in, - Parallel_Orbitals* pmat_in) - : nspin(nspin), naos(naos), nocc(nocc), nvirt(nvirt), + const std::vector& pX_in, + const Parallel_2D& pc_in, + const Parallel_Orbitals& pmat_in, + const std::vector& ispin_ks = { 0 }) + : nspin(nspin), naos(naos), nocc(nocc), nvirt(nvirt), nk(kv_in.get_nks() / nspin), psi_ks(psi_ks_in), DM_trans(DM_trans_in), gint(gint_in), pot(pot_in), ucell(ucell_in), orb_cutoff_(orb_cutoff), gd(gd_in), kv(kv_in), - pX(pX_in), pc(pc_in), pmat(pmat_in) + pX(pX_in), pc(pc_in), pmat(pmat_in), ispin_ks(ispin_ks) { ModuleBase::TITLE("OperatorLRHxc", "OperatorLRHxc"); this->cal_type = hamilt::calculation_type::lcao_gint; - this->act_type = 2; this->is_first_node = true; - this->hR = std::unique_ptr>(new hamilt::HContainer(pmat_in)); - this->initialize_HR(*this->hR, ucell_in, gd_in, pmat_in); - this->DM_trans[0]->init_DMR(*this->hR); + this->hR = std::unique_ptr>(new hamilt::HContainer(&pmat_in)); + LR_Util::initialize_HR(*this->hR, ucell_in, gd_in, orb_cutoff); + assert(&pmat_in == this->hR->get_paraV()); }; ~OperatorLRHxc() { }; void init(const int ik_in) override {}; - // virtual psi::Psi act(const psi::Psi& psi_in) const override; - virtual void act(const psi::Psi& psi_in, psi::Psi& psi_out, const int nbands) const override; + virtual void act(const int nbands, const int nbasis, const int npol, const T* psi_in, T* hpsi, const int ngk_ik = 0)const override; + private: - template //T=double, TR=double; T=std::complex, TR=std::complex/double - void initialize_HR(hamilt::HContainer& hR, const UnitCell& ucell, Grid_Driver& gd, const Parallel_Orbitals* pmat) const - { - for (int iat1 = 0; iat1 < ucell.nat; iat1++) - { - auto tau1 = ucell.get_tau(iat1); - int T1, I1; - ucell.iat2iait(iat1, &I1, &T1); - AdjacentAtomInfo adjs; - gd.Find_atom(ucell, tau1, T1, I1, &adjs); - for (int ad = 0; ad < adjs.adj_num + 1; ++ad) - { - const int T2 = adjs.ntype[ad]; - const int I2 = adjs.natom[ad]; - int iat2 = this->ucell.itia2iat(T2, I2); - if (pmat->get_row_size(iat1) <= 0 || pmat->get_col_size(iat2) <= 0) { continue; } - const ModuleBase::Vector3& R_index = adjs.box[ad]; - if (ucell.cal_dtau(iat1, iat2, R_index).norm() * this->ucell.lat0 >= orb_cutoff_[T1] + orb_cutoff_[T2]) { continue; } - hamilt::AtomPair tmp(iat1, iat2, R_index.x, R_index.y, R_index.z, pmat); - hR.insert_pair(tmp); - } - } - hR.allocate(nullptr, true); - hR.set_paraV(pmat); - if (std::is_same::value) { hR.fix_gamma(); } - } - template - void init_DM_trans(const int& nbands, std::vector>>& DM_trans)const - { - // LR_Util::print_DMR(*this->DM_trans[0], ucell.nat, "DMR[ib=" + std::to_string(0) + "]"); - if (this->next_op != nullptr) - { - int prev_size = DM_trans.size(); - if (prev_size > nbands) { for (int ib = nbands;ib < prev_size;++ib) { DM_trans[ib].reset(); } } - DM_trans.resize(nbands); - for (int ib = prev_size;ib < nbands;++ib) - { - // the first dimenstion of DensityMatrix is nk=nks/nspin - DM_trans[ib] = LR_Util::make_unique>(this->pmat, 1, this->kv.kvec_d, this->kv.get_nks() / nspin); - DM_trans[ib]->init_DMR(*this->hR); - } - } - } - void grid_calculation(const int& nbands, const int& iband_dm)const; + void grid_calculation(const int& nbands)const; //global sizes const int& nspin; - const int nspin_solve = 1; ///< in singlet-triplet calculation, the Casida equation is solved respectively so nspin_solve in a single problem is 1 const int& naos; - const int& nocc; - const int& nvirt; + const int nk = 1; + // const int nloc_per_band = 1; ///< local size of each state of X (passed by nbasis in act()) + const std::vector& nocc; + const std::vector& nvirt; + const std::vector ispin_ks = { 0 }; ///< the index of spin of psi_ks used in {AX, DM_trans} const K_Vectors& kv; /// ground state wavefunction - const psi::Psi* psi_ks = nullptr; + const psi::Psi& psi_ks = nullptr; /// transition density matrix - std::vector>>& DM_trans; + std::unique_ptr>& DM_trans; /// transition hamiltonian in AO representation std::unique_ptr> hR = nullptr; /// parallel info - Parallel_2D* pc = nullptr; - Parallel_2D* pX = nullptr; - Parallel_Orbitals* pmat = nullptr; + const Parallel_2D& pc; + const std::vector& pX; + const Parallel_Orbitals& pmat; std::weak_ptr pot; @@ -124,8 +83,6 @@ namespace LR std::vector orb_cutoff_; Grid_Driver& gd; - bool tdm_sym = false; ///< whether transition density matrix is symmetric - /// test mutable bool first_print = true; }; diff --git a/source/module_lr/potentials/kernel_xc.cpp b/source/module_lr/potentials/kernel_xc.cpp index 2990ce5d52..dee51ef3c5 100644 --- a/source/module_lr/potentials/kernel_xc.cpp +++ b/source/module_lr/potentials/kernel_xc.cpp @@ -120,8 +120,8 @@ void LR::KernelXC::f_xc_libxc(const int& nspin, const double& omega, const doubl for (xc_func_type& func : funcs) { - constexpr double rho_threshold = 1E-6; - constexpr double grho_threshold = 1E-10; + const double rho_threshold = 1E-6; + const double grho_threshold = 1E-10; xc_func_set_dens_threshold(&func, rho_threshold); diff --git a/source/module_lr/potentials/pot_hxc_lrtd.cpp b/source/module_lr/potentials/pot_hxc_lrtd.cpp index b2713c8670..ebdb8747c5 100644 --- a/source/module_lr/potentials/pot_hxc_lrtd.cpp +++ b/source/module_lr/potentials/pot_hxc_lrtd.cpp @@ -6,11 +6,13 @@ #include "module_hamilt_general/module_xc/xc_functional.h" #include #include "module_lr/utils/lr_util.h" + +#define FXC_PARA_TYPE const double* const rho, ModuleBase::matrix& v_eff, const std::vector& ispin_op = { 0,0 } namespace LR { // constructor for exchange-correlation kernel PotHxcLR::PotHxcLR(const std::string& xc_kernel_in, const ModulePW::PW_Basis* rho_basis_in, const UnitCell* ucell_in, - const Charge* chg_gs/*ground state*/, SpinType st_in) + const Charge* chg_gs/*ground state*/, const SpinType& st_in) :xc_kernel(xc_kernel_in), tpiba_(ucell_in->tpiba), spin_type_(st_in) { this->rho_basis_ = rho_basis_in; @@ -32,29 +34,28 @@ namespace LR } } - void PotHxcLR::cal_v_eff(double** rho, const UnitCell* ucell, ModuleBase::matrix& v_eff) + void PotHxcLR::cal_v_eff(double** rho, const UnitCell* ucell, ModuleBase::matrix& v_eff, const std::vector& ispin_op) { ModuleBase::TITLE("PotHxcLR", "cal_v_eff"); ModuleBase::timer::tick("PotHxcLR", "cal_v_eff"); auto& fxc = this->xc_kernel_components_; - const int& nspin_solve = v_eff.nr; - assert(nspin_solve == 1); // Hartree switch (this->spin_type_) { - case SpinType::S1: // check the coefficient: does S1 already include the factor 2? - v_eff += elecstate::H_Hartree_pw::v_hartree(*ucell, const_cast(this->rho_basis_), nspin_solve, rho); + case SpinType::S1: case SpinType::S2_updown: + v_eff += elecstate::H_Hartree_pw::v_hartree(*ucell, const_cast(this->rho_basis_), 1, rho); break; case SpinType::S2_singlet: - v_eff += 2 * elecstate::H_Hartree_pw::v_hartree(*ucell, const_cast(this->rho_basis_), nspin_solve, rho); + v_eff += 2 * elecstate::H_Hartree_pw::v_hartree(*ucell, const_cast(this->rho_basis_), 1, rho); + break; default: break; } // XC if (xc_kernel == "rpa" || xc_kernel == "hf") { return; } // no xc #ifdef USE_LIBXC - this->kernel_to_potential_[spin_type_](rho[0], v_eff); + this->kernel_to_potential_[spin_type_](rho[0], v_eff, ispin_op); #else throw std::domain_error("GlobalV::XC_Functional::get_func_type() =" + std::to_string(XC_Functional::get_func_type()) + " unfinished in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); @@ -70,13 +71,13 @@ namespace LR if (xc == XCType::LDA) switch (s) { case SpinType::S1: - funcs[s] = [this, &fxc](const double* const rho, ModuleBase::matrix& v_eff)->void + funcs[s] = [this, &fxc](FXC_PARA_TYPE)->void { for (int ir = 0;ir < nrxx;++ir) { v_eff(0, ir) += ModuleBase::e2 * fxc.get_kernel("v2rho2").at(ir) * rho[ir]; } }; break; case SpinType::S2_singlet: - funcs[s] = [this, &fxc](const double* const rho, ModuleBase::matrix& v_eff)->void + funcs[s] = [this, &fxc](FXC_PARA_TYPE)->void { for (int ir = 0;ir < nrxx;++ir) { @@ -87,7 +88,7 @@ namespace LR }; break; case SpinType::S2_triplet: - funcs[s] = [this, &fxc](const double* const rho, ModuleBase::matrix& v_eff)->void + funcs[s] = [this, &fxc](FXC_PARA_TYPE)->void { for (int ir = 0;ir < nrxx;++ir) { @@ -97,6 +98,14 @@ namespace LR } }; break; + case SpinType::S2_updown: + funcs[s] = [this, &fxc](FXC_PARA_TYPE)->void + { + assert(ispin_op.size() >= 2); + const int is = ispin_op[0] + ispin_op[1]; + for (int ir = 0;ir < nrxx;++ir) { v_eff(0, ir) += ModuleBase::e2 * fxc.get_kernel("v2rho2").at(3 * ir + is) * rho[ir]; } + }; + break; default: throw std::domain_error("SpinType =" + std::to_string(static_cast(s)) + " unfinished in " + std::string(__FILE__) + " line " + std::to_string(__LINE__)); @@ -105,7 +114,7 @@ namespace LR else if (xc == XCType::GGA || xc == XCType::HYB_GGA) switch (s) { case SpinType::S1: - funcs[s] = [this, &fxc](const double* const rho, ModuleBase::matrix& v_eff)->void + funcs[s] = [this, &fxc](FXC_PARA_TYPE)->void { // test: output drho // double thr = 1e-1; diff --git a/source/module_lr/potentials/pot_hxc_lrtd.h b/source/module_lr/potentials/pot_hxc_lrtd.h index 6a4df0fd19..a04ab42bb9 100644 --- a/source/module_lr/potentials/pot_hxc_lrtd.h +++ b/source/module_lr/potentials/pot_hxc_lrtd.h @@ -12,14 +12,14 @@ namespace LR /// S1: K^Hartree + K^xc /// S2_singlet: 2*K^Hartree + K^xc_{upup} + K^xc_{updown} /// S2_triplet: K^xc_{upup} - K^xc_{updown} - enum SpinType { S1 = 0, S2_singlet = 1, S2_triplet = 2 }; + enum SpinType { S1 = 0, S2_singlet = 1, S2_triplet = 2, S2_updown = 3 }; enum XCType { None = 0, LDA = 1, GGA = 2, HYB_GGA = 4 }; /// constructor for exchange-correlation kernel PotHxcLR(const std::string& xc_kernel_in, const ModulePW::PW_Basis* rho_basis_in, const UnitCell* ucell_in, const Charge* chg_gs/*ground state*/, - SpinType st_in = SpinType::S1); + const SpinType& st_in = SpinType::S1); ~PotHxcLR() {} void cal_v_eff(const Charge* chg/*excited state*/, const UnitCell* ucell, ModuleBase::matrix& v_eff) override {}; - void cal_v_eff(double** rho, const UnitCell* ucell, ModuleBase::matrix& v_eff); + void cal_v_eff(double** rho, const UnitCell* ucell, ModuleBase::matrix& v_eff, const std::vector& ispin_op = { 0,0 }); int nrxx; private: int nspin; @@ -31,14 +31,15 @@ namespace LR KernelXC xc_kernel_components_; const std::string xc_kernel; const double& tpiba_; - SpinType spin_type_ = SpinType::S1; + const SpinType spin_type_ = SpinType::S1; XCType xc_type_ = XCType::None; // enum class as key for unordered_map is not supported in C++11 sometimes // https://github.com/llvm/llvm-project/issues/49601 // struct SpinHash { std::size_t operator()(const SpinType& s) const { return std::hash()(static_cast(s)); } }; using Tfunc = std::function; + ModuleBase::matrix& /**<[out] v_eff */, + const std::vector& ispin_op) >; // std::unordered_map kernel_to_potential_; std::map kernel_to_potential_; diff --git a/source/module_lr/ri_benchmark/operator_ri_hartree.h b/source/module_lr/ri_benchmark/operator_ri_hartree.h index 311f15cd53..cc398be90c 100644 --- a/source/module_lr/ri_benchmark/operator_ri_hartree.h +++ b/source/module_lr/ri_benchmark/operator_ri_hartree.h @@ -1,3 +1,4 @@ +#pragma once #include "module_hamilt_general/operator.h" #include "module_lr/ri_benchmark/ri_benchmark.h" #include "module_lr/utils/lr_util_print.h" @@ -49,22 +50,18 @@ namespace RI_Benchmark } }; ~OperatorRIHartree() {} - void act(const psi::Psi& X_in, psi::Psi& X_out, const int nbands) const override + void act(const int nbands, const int nbasis, const int npol, const T* psi_in, T* hpsi, const int ngk_ik = 0)const override { assert(GlobalV::MY_RANK == 0); // only serial now - const int nk = 1; - const psi::Psi& X = LR_Util::k1_to_bfirst_wrapper(X_in, nk, npairs); - psi::Psi AX = LR_Util::k1_to_bfirst_wrapper(X_out, nk, npairs); - for (int ib = 0;ib < nbands;++ib) - { - TLRIX CsX_vo = cal_CsX(Cs_vo_mo, &X(ib, 0, 0)); - TLRIX CsX_ov = cal_CsX(Cs_ov_mo, &X(ib, 0, 0)); - // LR_Util::print_CsX(Cs_bX, nvirt, "Cs_bX of state " + std::to_string(ib)); - cal_AX(CV_vo, CsX_vo, &AX(ib, 0, 0), 4.); - cal_AX(CV_vo, CsX_ov, &AX(ib, 0, 0), 4.); - cal_AX(CV_ov, CsX_vo, &AX(ib, 0, 0), 4.); - cal_AX(CV_ov, CsX_ov, &AX(ib, 0, 0), 4.); - } + assert(nbasis == npairs); + TLRIX CsX_vo = cal_CsX(Cs_vo_mo, psi_in); + TLRIX CsX_ov = cal_CsX(Cs_ov_mo, psi_in); + // LR_Util::print_CsX(Cs_bX, nvirt, "Cs_bX of state " + std::to_string(ib)); + // 4 for 4 terms in the expansion of local RI + cal_AX(CV_vo, CsX_vo, hpsi, 4.); + cal_AX(CV_vo, CsX_ov, hpsi, 4.); + cal_AX(CV_ov, CsX_vo, hpsi, 4.); + cal_AX(CV_ov, CsX_ov, hpsi, 4.); } protected: const int& naos; diff --git a/source/module_lr/ri_benchmark/ri_benchmark.hpp b/source/module_lr/ri_benchmark/ri_benchmark.hpp index 671ccd7774..82ab9249ac 100644 --- a/source/module_lr/ri_benchmark/ri_benchmark.hpp +++ b/source/module_lr/ri_benchmark/ri_benchmark.hpp @@ -153,7 +153,7 @@ namespace RI_Benchmark return Amat_full; } template - TLRIX cal_CsX(const TLRI& Cs_mo, TK* X) + TLRIX cal_CsX(const TLRI& Cs_mo, const TK* X) { TLRIX CsX; for (auto& it1 : Cs_mo) diff --git a/source/module_lr/utils/gint_move.hpp b/source/module_lr/utils/gint_move.hpp index acdbdad997..7d21ca6037 100644 --- a/source/module_lr/utils/gint_move.hpp +++ b/source/module_lr/utils/gint_move.hpp @@ -9,15 +9,15 @@ template using D2 = void(*) (T**, size_t); -template -using D3 = void(*) (T***, size_t, size_t); +// template +// using D3 = void(*) (T***, size_t, size_t); // template // D2 d2 = LR_Util::delete_p2; // template // D3 d3 = LR_Util::delete_p3; // Change to C++ 11 D2 d2 = LR_Util::delete_p2; -D3 d3 = LR_Util::delete_p3; +// D3 d3 = LR_Util::delete_p3; Gint& Gint::operator=(Gint&& rhs) diff --git a/source/module_lr/utils/lr_util.cpp b/source/module_lr/utils/lr_util.cpp index 7634fe1574..a8964d5c97 100644 --- a/source/module_lr/utils/lr_util.cpp +++ b/source/module_lr/utils/lr_util.cpp @@ -212,7 +212,7 @@ namespace LR_Util int info = 0; char jobz = 'V', uplo = 'U'; double work_tmp; - constexpr int minus_one = -1; + const int minus_one = -1; dsyev_(&jobz, &uplo, &n, mat, &n, eig, &work_tmp, &minus_one, &info); // get best lwork const int lwork = work_tmp; double* work2 = new double[lwork]; @@ -241,7 +241,7 @@ namespace LR_Util int info = 0; char jobvl = 'N', jobvr = 'V'; //calculate right eigenvectors double work_tmp; - constexpr int minus_one = -1; + const int minus_one = -1; std::vector eig_real(n); std::vector eig_imag(n); const int ldvl = 1, ldvr = n; diff --git a/source/module_lr/utils/lr_util.h b/source/module_lr/utils/lr_util.h index 136b9481f1..93f26be2a6 100644 --- a/source/module_lr/utils/lr_util.h +++ b/source/module_lr/utils/lr_util.h @@ -50,21 +50,17 @@ namespace LR_Util /// =================ALGORITHM==================== //====== newers and deleters======== - //(arbitrary dimention will be supported in the future) - /// @brief delete 2d pointer - /// @tparam T - /// @param p2 - /// @param size + /// @brief delete 2d pointer template void delete_p2(T** p2, size_t size); - - /// @brief delete 3d pointer - /// @tparam T - /// @param p2 - /// @param size1 - /// @param size2 + /// @brief new 2d pointer template - void delete_p3(T*** p3, size_t size1, size_t size2); + void new_p2(T**& p2, size_t size1, size_t size2); + + template ct::Tensor newTensor(const ct::TensorShape& shape) + { + return ct::Tensor(ct::DataTypeToEnum::value, ct::DeviceTypeToEnum::value, shape); + } ///================ BLAS ====================== /// calculate (A+A^T)/2 diff --git a/source/module_lr/utils/lr_util.hpp b/source/module_lr/utils/lr_util.hpp index eb91617111..97a5113eb7 100644 --- a/source/module_lr/utils/lr_util.hpp +++ b/source/module_lr/utils/lr_util.hpp @@ -27,8 +27,6 @@ namespace LR_Util } //====== newers and deleters======== - //(arbitrary dimention will be supported in the future) - /// @brief new 2d pointer /// @tparam T /// @param size1 @@ -43,21 +41,6 @@ namespace LR_Util } }; - /// @brief new 3d pointer - /// @tparam T - /// @param size1 - /// @param size2 - /// @param size3 - template - void new_p3(T***& p3, size_t size1, size_t size2, size_t size3) - { - p3 = new T * *[size1]; - for (size_t i = 0; i < size1; ++i) - { - new_p2(p3[i], size2, size3); - } - }; - /// @brief delete 2d pointer /// @tparam T /// @param p2 @@ -69,31 +52,12 @@ namespace LR_Util { for (size_t i = 0; i < size; ++i) { - if (p2[i] != nullptr) { delete[] p2[i]; -} + if (p2[i] != nullptr) { delete[] p2[i]; } } delete[] p2; } }; - /// @brief delete 3d pointer - /// @tparam T - /// @param p2 - /// @param size1 - /// @param size2 - template - void delete_p3(T*** p3, size_t size1, size_t size2) - { - if (p3 != nullptr) - { - for (size_t i = 0; i < size1; ++i) - { - delete_p2(p3[i], size2); - } - delete[] p3; - } - }; - inline double get_conj(const double& x) { return x; @@ -131,6 +95,13 @@ namespace LR_Util } } + /// get the Psi wrapper of the selected spin from the Psi object + template + psi::Psi get_psi_spin(const psi::Psi& psi_in, const int& is, const int& nk) + { + return psi::Psi(&psi_in(is * nk, 0, 0), psi_in, nk, psi_in.get_nbands()); + } + /// psi(nk=1, nbands=nb, nk * nbasis) -> psi(nb, nk, nbasis) without memory copy template psi::Psi k1_to_bfirst_wrapper(const psi::Psi& psi_kfirst, int nk_in, int nbasis_in) diff --git a/source/module_lr/utils/lr_util_hcontainer.h b/source/module_lr/utils/lr_util_hcontainer.h index 0e0b87d124..4ac4ccb1f3 100644 --- a/source/module_lr/utils/lr_util_hcontainer.h +++ b/source/module_lr/utils/lr_util_hcontainer.h @@ -44,4 +44,39 @@ namespace LR_Util hamilt::HContainer>& HR, const int& nat, const char& type = 'R'); + + template + void initialize_HR(hamilt::HContainer& hR, const UnitCell& ucell, Grid_Driver& gd, const std::vector& orb_cutoff) + { + const auto& pmat = *hR.get_paraV(); + for (int iat1 = 0; iat1 < ucell.nat; iat1++) + { + auto tau1 = ucell.get_tau(iat1); + int T1, I1; + ucell.iat2iait(iat1, &I1, &T1); + AdjacentAtomInfo adjs; + gd.Find_atom(ucell, tau1, T1, I1, &adjs); + for (int ad = 0; ad < adjs.adj_num + 1; ++ad) + { + const int T2 = adjs.ntype[ad]; + const int I2 = adjs.natom[ad]; + int iat2 = ucell.itia2iat(T2, I2); + if (pmat.get_row_size(iat1) <= 0 || pmat.get_col_size(iat2) <= 0) { continue; } + const ModuleBase::Vector3& R_index = adjs.box[ad]; + if (ucell.cal_dtau(iat1, iat2, R_index).norm() * ucell.lat0 >= orb_cutoff[T1] + orb_cutoff[T2]) { continue; } + hamilt::AtomPair tmp(iat1, iat2, R_index.x, R_index.y, R_index.z, &pmat); + hR.insert_pair(tmp); + } + } + hR.allocate(nullptr, true); + // hR.set_paraV(&pmat); + if (std::is_same::value) { hR.fix_gamma(); } + } + template + void initialize_DMR(elecstate::DensityMatrix& dm, const Parallel_Orbitals& pmat, const UnitCell& ucell, Grid_Driver& gd, const std::vector& orb_cutoff) + { + hamilt::HContainer hR_tmp(&pmat); + initialize_HR(hR_tmp, ucell, gd, orb_cutoff); + dm.init_DMR(hR_tmp); + } } \ No newline at end of file diff --git a/source/module_lr/utils/lr_util_print.h b/source/module_lr/utils/lr_util_print.h index 8ef6981f97..80f6a704ec 100644 --- a/source/module_lr/utils/lr_util_print.h +++ b/source/module_lr/utils/lr_util_print.h @@ -12,6 +12,64 @@ namespace LR_Util return (std::abs(v) > threshold ? v : 0); } + template + int read_value(std::ifstream& ifs, T* ptr, const int& size) { for (int i = 0;i < size;++i) { ifs >> ptr[i]; } return size; } + template + int read_value(std::ifstream& ifs, T* ptr, const int& size, Args&&... args) + { + int size_now = 0; + for (int i = 0;i < size;++i) { size_now += read_value(ifs, ptr + size_now, args...); } + return size_now; + } + template + int read_value(const std::string& file, T* ptr, const int& size, Args&&... args) + { + std::ifstream ifs(file); + const int res = read_value(ifs, ptr, size, args...); + ifs.close(); + return res; + } + + template + int write_value(std::ofstream& ofs, const T* ptr, const int& size) + { + for (int i = 0;i < size;++i) { ofs << filter(ptr[i]) << " "; } + ofs << std::endl; + return size; + } + template + int write_value(std::ofstream& ofs, const T* ptr, const int& size, Args&&... args) + { + int size_now = 0; + for (int i = 0;i < size;++i) { size_now += write_value(ofs, ptr + size_now, args...); } + ofs << std::endl; + return size_now; + } + template + int write_value(const std::string& file, const int& prec, const T* ptr, const int& size, Args&&... args) + { + std::ofstream ofs(file); + ofs << std::setprecision(prec) << std::scientific; + const int res = write_value(ofs, ptr, size, args...); + ofs.close(); + return res; + } + template + int print_value(const T* ptr, const int& size) + { + for (int i = 0;i < size;++i) { std::cout << filter(ptr[i]) << " "; } + std::cout << std::endl; + return size; + } + template + int print_value(const T* ptr, const int& size, Args&&... args) + { + int size_now = 0; + for (int i = 0;i < size;++i) { size_now += print_value(ptr + size_now, args...); } + std::cout << std::endl; + return size_now; + } + template void print_psi_bandfirst(const psi::Psi& psi, const std::string& label, const int& ib, const double& threshold = 1e-10) { @@ -34,17 +92,17 @@ namespace LR_Util std::ofstream ofs(filename + "_" + std::to_string(rank) + ".dat"); ofs << std::setprecision(precision) << std::scientific; ofs << psi.get_nbands() << " " << psi.get_nk() << " " << psi.get_nbasis() << "\n"; - for (int ib = 0;ib < psi.get_nbands();++ib) - { - for (int ik = 0;ik < psi.get_nk();++ik) - { - for (int i = 0;i < psi.get_nbasis();++i) - { - ofs << filter(psi(ib, ik, i)) << " "; - } - ofs << "\n"; - } - } + assert(psi.size() == write_value(ofs, &psi(0, 0, 0), psi.get_nbands(), psi.get_nk(), psi.get_nbasis())); + ofs.close(); + } + template + void write_psi_bandfirst(const T* psi, const int& nband, const int& nk, const int& nbasis, + const std::string& filename, const int& rank, const double& threshold = 1e-10, const int& precision = 8) + { + std::ofstream ofs(filename + "_" + std::to_string(rank) + ".dat"); + ofs << std::setprecision(precision) << std::scientific; + ofs << nband << " " << nk << " " << nbasis << "\n"; + assert(nband * nk * nbasis == write_value(ofs, psi, nband, nk, nbasis)); ofs.close(); } template @@ -54,13 +112,7 @@ namespace LR_Util int nbands, nks, nbasis; ifs >> nbands >> nks >> nbasis; psi::Psi psi(nks, nbands, nbasis, nullptr, false); - for (int ib = 0;ib < psi.get_nbands();++ib) { - for (int ik = 0;ik < psi.get_nk();++ik) { - for (int i = 0;i < psi.get_nbasis();++i) { - ifs >> psi(ib, ik, i); -} -} -} + assert(psi.size() == read_value(ifs, psi.get_pointer(), nbands, nks, nbasis)); ifs.close(); return psi; } @@ -101,9 +153,7 @@ namespace LR_Util { std::cout << "first " << nnz << " non-zero elements of " << label << "\n"; int inz = 0;int i = 0; - while (inz < nnz && i < nrxx) { - if (rho[++i] - 0.0 > threshold) { std::cout << rho[i] << " ";++inz; } -}; + while (inz < nnz && i < nrxx) { if (rho[++i] - 0.0 > threshold) { std::cout << rho[i] << " ";++inz; } }; } diff --git a/source/module_lr/utils/test/lr_util_algorithms_test.cpp b/source/module_lr/utils/test/lr_util_algorithms_test.cpp index f47f46e941..f33105d32b 100644 --- a/source/module_lr/utils/test/lr_util_algorithms_test.cpp +++ b/source/module_lr/utils/test/lr_util_algorithms_test.cpp @@ -1,6 +1,7 @@ #include #include "../lr_util.h" +#include "../lr_util_print.h" TEST(LR_Util, PsiWrapper) { @@ -130,6 +131,27 @@ TEST(LR_Util, MatSymComplex) } } +TEST(LR_Util, RWValue) +{ + const std::string file = "RWValue.txt"; + std::ofstream ofs(file); + std::vector vec(2 * 3 * 4 * 5, 0); + for (int i = 0;i < vec.size();++i) vec[i] = i; + LR_Util::write_value(ofs, vec.data(), 2, 3, 4, 5); + ofs.close(); + + std::vector vec1(2 * 3 * 4 * 5, 0); + std::ifstream ifs1(file); + EXPECT_EQ(LR_Util::read_value(ifs1, vec1.data(), 2, 3, 4, 5), 120); + ifs1.close(); + for (int i = 0;i < vec1.size();++i) { EXPECT_EQ(vec1[i], vec[i]); }; + std::vector vec2(2 * 3 * 4 * 5, 0); + std::ifstream ifs2(file); + EXPECT_EQ(LR_Util::read_value(ifs2, vec2.data(), 2 * 3, 4 * 5), 120); + ifs2.close(); + for (int i = 0;i < vec2.size();++i) { EXPECT_EQ(vec2[i], vec[i]); }; +} + int main(int argc, char** argv) { srand(time(NULL)); // for random number generator diff --git a/source/module_parameter/input_parameter.h b/source/module_parameter/input_parameter.h index a4c7cfbe8c..17284d4bb9 100644 --- a/source/module_parameter/input_parameter.h +++ b/source/module_parameter/input_parameter.h @@ -302,7 +302,8 @@ struct Input_para std::string lr_solver = "dav"; ///< the eigensolver for LR-TDDFT double lr_thr = 1e-2; ///< convergence threshold of the LR-TDDFT eigensolver bool out_wfc_lr = false; ///< whether to output the eigenvectors (excitation amplitudes) in the particle-hole basis - std::vector abs_wavelen_range = {0., 0.}; ///< the range of wavelength(nm) to output the absorption spectrum + bool lr_unrestricted = false; ///< whether to use the unrestricted construction for LR-TDDFT + std::vector abs_wavelen_range = {}; ///< the range of wavelength(nm) to output the absorption spectrum double abs_broadening = 0.01; ///< the broadening (eta) for LR-TDDFT absorption spectrum std::string ri_hartree_benchmark = "none"; ///< whether to use the RI approximation for the Hartree potential in LR-TDDFT for benchmark (with FHI-aims/ABACUS read-in style) std::vector aims_nbasis = {}; ///< the number of basis functions for each atom type used in FHI-aims (for benchmark) diff --git a/tests/integrate/291_NO_KP_LR/INPUT b/tests/integrate/291_NO_KP_LR/INPUT index df499ddc16..1826d4f618 100644 --- a/tests/integrate/291_NO_KP_LR/INPUT +++ b/tests/integrate/291_NO_KP_LR/INPUT @@ -6,6 +6,7 @@ orbital_dir ../../../tests/PP_ORB calculation scf nbands 6 symmetry -1 +nspin 2 #Parameters (2.Iteration) ecutwfc 20 @@ -26,12 +27,12 @@ mixing_beta 0.4 lr_nstates 2 # for test/debug, you can try a smaller one like 2 xc_kernel lda -lr_solver dav +lr_solver dav_subspace lr_thr 1e-2 pw_diag_ndim 4 esolver_type ks-lr nvirt 2 -abs_wavelen_range 100 175 +abs_wavelen_range 200 600 diff --git a/tests/integrate/291_NO_KP_LR/result.ref b/tests/integrate/291_NO_KP_LR/result.ref index bcd8824485..3c5a8347b0 100644 --- a/tests/integrate/291_NO_KP_LR/result.ref +++ b/tests/integrate/291_NO_KP_LR/result.ref @@ -1,2 +1 @@ -totexcitationenergyref 0.391462 -totaltimeref +totexcitationenergyref 0.784963 \ No newline at end of file diff --git a/tests/integrate/391_NO_GO_LR/INPUT b/tests/integrate/391_NO_GO_LR/INPUT index 62a139c69d..38476bd933 100644 --- a/tests/integrate/391_NO_GO_LR/INPUT +++ b/tests/integrate/391_NO_GO_LR/INPUT @@ -6,6 +6,7 @@ orbital_dir ../../../tests/PP_ORB calculation scf nbands 6 symmetry -1 +nspin 2 #Parameters (2.Iteration) ecutwfc 10 diff --git a/tests/integrate/391_NO_GO_LR/result.ref b/tests/integrate/391_NO_GO_LR/result.ref index e40418c026..8d6a343a51 100644 --- a/tests/integrate/391_NO_GO_LR/result.ref +++ b/tests/integrate/391_NO_GO_LR/result.ref @@ -1,2 +1 @@ -totexcitationenergyref 1.313946 -totaltimeref +totexcitationenergyref 2.641295 \ No newline at end of file diff --git a/tests/integrate/392_NO_GO_LR_HF/INPUT b/tests/integrate/392_NO_GO_LR_HF/INPUT index 15c3503dfc..e48285b572 100644 --- a/tests/integrate/392_NO_GO_LR_HF/INPUT +++ b/tests/integrate/392_NO_GO_LR_HF/INPUT @@ -6,6 +6,7 @@ orbital_dir ../../../tests/PP_ORB calculation scf nbands 6 symmetry -1 +nspin 2 #Parameters (2.Iteration) ecutwfc 10 diff --git a/tests/integrate/392_NO_GO_LR_HF/result.ref b/tests/integrate/392_NO_GO_LR_HF/result.ref index 7c37d7c788..e5caf54572 100644 --- a/tests/integrate/392_NO_GO_LR_HF/result.ref +++ b/tests/integrate/392_NO_GO_LR_HF/result.ref @@ -1 +1 @@ -totexcitationenergyref 1.587504 \ No newline at end of file +totexcitationenergyref 3.067979 \ No newline at end of file diff --git a/tests/integrate/393_NO_GO_ULR_HF/INPUT b/tests/integrate/393_NO_GO_ULR_HF/INPUT new file mode 100644 index 0000000000..e9acbcda6d --- /dev/null +++ b/tests/integrate/393_NO_GO_ULR_HF/INPUT @@ -0,0 +1,44 @@ +INPUT_PARAMETERS +#Parameters (1.General) +suffix autotest +pseudo_dir ../../../tests/PP_ORB +orbital_dir ../../../tests/PP_ORB +calculation scf +nbands 6 +symmetry -1 +nspin 2 + +#Parameters (2.Iteration) +ecutwfc 10 +scf_thr 1e-2 +scf_nmax 20 +exx_hybrid_step 1 + +#Parameters (3.Basis) +basis_type lcao +gamma_only 1 + +#Parameters (4.Smearing) +smearing_method gaussian +smearing_sigma 0.02 + +#Parameters (5.Mixing) +mixing_type pulay +mixing_beta 0.4 +mixing_gg0 0 + +dft_functional lda +exx_real_number 1 +exx_ccp_rmesh_times 1 + +lr_nstates 3 +xc_kernel lda +lr_solver lapack +lr_thr 1e-2 +lr_unrestricted 1 +pw_diag_ndim 2 + +esolver_type ks-lr + +nvirt 2 +nocc 2 \ No newline at end of file diff --git a/tests/integrate/393_NO_GO_ULR_HF/STRU b/tests/integrate/393_NO_GO_ULR_HF/STRU new file mode 100644 index 0000000000..ceb8aaa84c --- /dev/null +++ b/tests/integrate/393_NO_GO_ULR_HF/STRU @@ -0,0 +1,29 @@ +ATOMIC_SPECIES +H 1.008 H_ONCV_PBE-1.0.upf +O 15.9994 O_ONCV_PBE-1.0.upf + +NUMERICAL_ORBITAL +H_gga_8au_60Ry_2s1p.orb +O_gga_7au_60Ry_2s2p1d.orb + + +LATTICE_CONSTANT +1 + +LATTICE_VECTORS +28 0 0 +0 28 0 +0 0 28 + +ATOMIC_POSITIONS +Cartesian + +H +0 +2 +-12.046787058887078 18.76558614676448 8.395247471328744 1 1 1 +-14.228868795885418 20.61549300274637 7.611989524516571 1 1 1 +O +0 +1 +-13.486789117423204 19.684192208418636 8.958321352749174 1 1 1 diff --git a/tests/integrate/393_NO_GO_ULR_HF/jd b/tests/integrate/393_NO_GO_ULR_HF/jd new file mode 100644 index 0000000000..ef8a41364a --- /dev/null +++ b/tests/integrate/393_NO_GO_ULR_HF/jd @@ -0,0 +1 @@ +test for linear response TDHF using open-shell solver \ No newline at end of file diff --git a/tests/integrate/393_NO_GO_ULR_HF/result.ref b/tests/integrate/393_NO_GO_ULR_HF/result.ref new file mode 100644 index 0000000000..e395c0fb9d --- /dev/null +++ b/tests/integrate/393_NO_GO_ULR_HF/result.ref @@ -0,0 +1 @@ +totexcitationenergyref 1.940703 \ No newline at end of file diff --git a/tests/integrate/CASES_CPU.txt b/tests/integrate/CASES_CPU.txt index 62baa154ce..4c1929ce32 100644 --- a/tests/integrate/CASES_CPU.txt +++ b/tests/integrate/CASES_CPU.txt @@ -244,6 +244,7 @@ 386_NO_GO_MD_S1_HSE 391_NO_GO_LR 392_NO_GO_LR_HF +393_NO_GO_ULR_HF 401_NP_KP_sp 401_NP_KP_spd #501_NO_neighboring_GaAs512 diff --git a/tests/integrate/tools/catch_properties.sh b/tests/integrate/tools/catch_properties.sh index 403330ad98..13af75d5df 100755 --- a/tests/integrate/tools/catch_properties.sh +++ b/tests/integrate/tools/catch_properties.sh @@ -539,7 +539,7 @@ if [ $is_lr == 1 ]; then lr_path=OUT.autotest/running_lr.log lrns=$(get_input_key_value "lr_nstates" "INPUT") lrns1=`echo "$lrns + 1" |bc` - grep -A$lrns1 "Excitation Energy" $lr_path | tail -$lrns | awk '{print $2}' > lr_eig.txt + grep -A$lrns1 "Excitation Energy" $lr_path | awk 'NR > 2 && $2 ~ /^[0-9]+\.[0-9]+$/ {print $2}' > lr_eig.txt lreig_tot=`sum_file lr_eig.txt` echo "totexcitationenergyref $lreig_tot" >>$1 fi