From b80b58ecd26c2134d84a2de6e6407d2bdb01ac0b Mon Sep 17 00:00:00 2001 From: dadaoqiuzhi <56991198+dadaoqiuzhi@users.noreply.github.com> Date: Thu, 8 Feb 2024 11:22:17 +0800 Subject: [PATCH] encoding debug & upgrade encoding debug & upgrade --- chemi_mechanism/Deconvolu_SysConvert.m | 7 +-- chemi_mechanism/SysConvert.m | 17 ++++--- chemi_mechanism/car_mdf_filemaker.m | 30 +++++++++++- chemi_mechanism/chemi_mechanism.m | 48 +++++++++++-------- chemi_mechanism/xyz_car_pdb_filemaker.m | 30 +++++++++++- lammpstrj2xyz_arc_pdb/Deconvolu_SysConvert.m | 12 +++++ lammpstrj2xyz_arc_pdb/SysConvert.m | 17 ++++--- .../xyz_arc_filemaker_speedupMOLE.m | 30 +++++++++++- lammpstrj2xyz_arc_pdb/xyz_arc_pdb_filemaker.m | 30 +++++++++++- 9 files changed, 182 insertions(+), 39 deletions(-) create mode 100644 lammpstrj2xyz_arc_pdb/Deconvolu_SysConvert.m diff --git a/chemi_mechanism/Deconvolu_SysConvert.m b/chemi_mechanism/Deconvolu_SysConvert.m index 3764bed..f3e7210 100644 --- a/chemi_mechanism/Deconvolu_SysConvert.m +++ b/chemi_mechanism/Deconvolu_SysConvert.m @@ -1,11 +1,12 @@ %This function is used to transform a number to decimal system. function num_10base = Deconvolu_SysConvert(given_num_char,base) -num_table = {'1','2','3','4','5','6','7','8','9','a','b','c','d',... - 'e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z'}; +num_table = {'0','1','2','3','4','5','6','7','8','9',... + 'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z',... + 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z'}; [~,matches] = strsplit(given_num_char,num_table,'CollapseDelimiters',false); num_10base = 0; for i = 1:length(matches) [~,num_true] = ismember(matches{i},num_table); - num_10base = num_10base + num_true*base^(length(matches)-i); + num_10base = num_10base + (num_true-1)*base^(length(matches)-i); end end \ No newline at end of file diff --git a/chemi_mechanism/SysConvert.m b/chemi_mechanism/SysConvert.m index 2a60171..53b66c2 100644 --- a/chemi_mechanism/SysConvert.m +++ b/chemi_mechanism/SysConvert.m @@ -1,13 +1,15 @@ %scrit file name SysConvert %purpose: %This function is used to convert a given number into an expected number -%system. ASCII is used +%system. ASCII is used. function target_num=SysConvert(given_num,base) -if given_num==round(given_num) && given_num>0 && base==round(base) && base>0 +if base > 62 + error('The base for atom id encoding is larger than 62, unrecognizable char except 0-9£¬a-z£¬A-Z may be used. Please check it') +end +if given_num == round(given_num) && given_num > 0 && base == round(base) && base > 0 control=1;remaindermat=[]; else - control=0; - error('\nConversion of number system should be with an integer!!!'); + error('\nThe base and atom id must be positive integer!'); end while control @@ -24,12 +26,15 @@ atomid='';i=size(remaindermat,1); while i - if remaindermat(i,2)<=9 + if remaindermat(i,2) <= 9 %0-9 atomid=strcat(atomid,num2str(remaindermat(i,2))); - else + elseif remaindermat(i,2) > 9 && remaindermat(i,2) <= 35 %a-z atomid=strcat(atomid,char(remaindermat(i,2)+87)); + elseif remaindermat(i,2) > 35 && remaindermat(i,2) <= 61 %A-Z + atomid=strcat(atomid,char(remaindermat(i,2)+29)); end i=i-1; end + target_num=atomid; diff --git a/chemi_mechanism/car_mdf_filemaker.m b/chemi_mechanism/car_mdf_filemaker.m index eb3b4c6..abfff9f 100644 --- a/chemi_mechanism/car_mdf_filemaker.m +++ b/chemi_mechanism/car_mdf_filemaker.m @@ -42,7 +42,35 @@ elementname=eleswap{lib,2}; end atomid_conv=SysConvert(tarBOinform{readline,1},base); - atomname=strcat(elementname,atomid_conv); + if elemax==2 + if formatout==1 || formatout==2 + if 238327>=atomnum + atomname=strcat(elementname,atomid_conv); + else + atomname=atomid_conv; + end + elseif formatout==3 + if 3843>=atomnum + atomname=strcat(elementname,atomid_conv); + else + atomname=atomid_conv; + end + end + elseif elemax==1 + if formatout==1 || formatout==2 + if 14776335>=atomnum + atomname=strcat(elementname,atomid_conv); + else + atomname=atomid_conv; + end + elseif formatout==3 + if 238327>=atomnum + atomname=strcat(elementname,atomid_conv); + else + atomname=atomid_conv; + end + end + end [trjrow,~]=size(trjdata);tartrjdata=[]; for i=1:trjrow diff --git a/chemi_mechanism/chemi_mechanism.m b/chemi_mechanism/chemi_mechanism.m index f14a41e..03c6544 100644 --- a/chemi_mechanism/chemi_mechanism.m +++ b/chemi_mechanism/chemi_mechanism.m @@ -69,61 +69,69 @@ if elemax==2 if formatout==1 || formatout==2 fprintf('\nDifferent number system is adopted according to the atom number (ASCII)\n'); - if 262143>=atomnum && atomnum>32767 - fprintf('64 base number system is recommended for atom id');base=64; + if 238327>=atomnum && atomnum>32767 + fprintf('62 base number system is recommended for atom id');base=62; elseif 32767>=atomnum && atomnum>4095 fprintf('32 base number system is recommended for atom id');base=32; elseif 4095>=atomnum && atomnum>999 fprintf('16 base number system is recommended for atom id');base=16; elseif 999>=atomnum fprintf('10 base number system (decimalism) is recommended for atom id'); - elseif atomnum<0 || atomnum>32767 - error('Atom number is less than 0 or larger than 262143. If larger, please check it and modify code accordingly!') + elseif atomnum<=916132831 || atomnum>238327 + fprintf('\nAtom number is no more than 916132831 but larger than 238327. If larger, no element name is list in the number system, indicating 5 ASCII chars are used to encode\n') + base=62; + elseif atomnum > 916132831 + error('Atom number exceeds 916132831, which can not be encoded. Please check it or contact the developer!') end elseif formatout==3 fprintf('\nDifferent number system is adopted according to the atom number (ASCII)\n'); - if 65535>=atomnum && atomnum>16383 - fprintf('256 base number system is recommended for atom id');base=256; - elseif 16383>=atomnum && atomnum>4095 - fprintf('128 base number system is recommended for atom id');base=128; - elseif 4095>=atomnum && atomnum>1023 - fprintf('64 base number system is recommended for atom id');base=64; + if 3843>=atomnum && atomnum>1023 + fprintf('62 base number system is recommended for atom id');base=62; elseif 1023>=atomnum && atomnum>255 fprintf('32 base number system is recommended for atom id');base=32; elseif 255>=atomnum && atomnum>99 fprintf('16 base number system is recommended for atom id');base=16; elseif 99>=atomnum fprintf('10 base number system (decimalism) is recommended for atom id'); - elseif atomnum<0 || atomnum>65535 - error('Atom number is less than 0 or larger than 65535. If larger, please check it and modify code accordingly!') + elseif atomnum<=14776335 || atomnum>3843 + fprintf('\nAtom number is no more than 14776335 but larger than 3843. If larger, no element name is list in the number system, indicating 4 ASCII chars are used to encode\n') + base=62; + elseif atomnum > 14776335 + error('Atom number exceeds 14776335, which can not be encoded. Please check it or contact the developer!') end end elseif elemax==1 if formatout==1 || formatout==2 fprintf('\nDifferent number system is adopted according to the atom number (ASCII)\n'); - if 16777215>=atomnum && atomnum>1048575 - fprintf('64 base number system is recommended for atom id');base=64; + if 14776335>=atomnum && atomnum>1048575 + fprintf('62 base number system is recommended for atom id');base=62; elseif 1048575>=atomnum && atomnum>65535 fprintf('32 base number system is recommended for atom id');base=32; elseif 65535>=atomnum && atomnum>9999 fprintf('16 base number system is recommended for atom id');base=16; elseif 9999>=atomnum fprintf('10 base number system (decimalism) is recommended for atom id'); - elseif atomnum<0 || atomnum>16777215 - error('Atom number is less than 0 or larger than 16777215. If larger, please check it and modify code accordingly!') + elseif atomnum<=916132831 || atomnum>14776335 + fprintf('\nAtom number is no more than 916132831 but larger than 14776335. If larger, no element name is list in the number system, indicating 5 ASCII chars are used to encode\n') + base=62; + elseif atomnum > 916132831 + error('Atom number exceeds 916132831, which can not be encoded. Please check it or contact the developer!') end elseif formatout==3 fprintf('\nDifferent number system is adopted according to the atom number (ASCII)\n'); - if 262143>=atomnum && atomnum>32767 - fprintf('64 base number system is recommended for atom id');base=64; + if 238327>=atomnum && atomnum>32767 + fprintf('64 base number system is recommended for atom id');base=62; elseif 32767>=atomnum && atomnum>4095 fprintf('32 base number system is recommended for atom id');base=32; elseif 4095>=atomnum && atomnum>999 fprintf('16 base number system is recommended for atom id');base=16; elseif 999>=atomnum fprintf('10 base number system (decimalism) is recommended for atom id'); - elseif atomnum<0 || atomnum>262143 - error('Atom number is less than 0 or larger than 262143. If larger, please check it and modify code accordingly!') + elseif atomnum<= 14776335 || atomnum>238327 + fprintf('Atom number is no more than 14776335 but larger than 238327. If larger, no element name is list in the number system, indicating 4 ASCII chars are used to encode') + base=62; + elseif atomnum > 14776335 + error('Atom number exceeds 14776335, which can not be encoded. Please check it or contact the developer!') end end else diff --git a/chemi_mechanism/xyz_car_pdb_filemaker.m b/chemi_mechanism/xyz_car_pdb_filemaker.m index 0115d3d..55171e7 100644 --- a/chemi_mechanism/xyz_car_pdb_filemaker.m +++ b/chemi_mechanism/xyz_car_pdb_filemaker.m @@ -75,7 +75,35 @@ elementname=eleswap{lib,2}; end atomid_conv=SysConvert(tarBOinform{readline,1},base); - atomname=strcat(elementname,atomid_conv); + if elemax==2 + if formatout==1 || formatout==2 + if 238327>=atomnum + atomname=strcat(elementname,atomid_conv); + else + atomname=atomid_conv; + end + elseif formatout==3 + if 3843>=atomnum + atomname=strcat(elementname,atomid_conv); + else + atomname=atomid_conv; + end + end + elseif elemax==1 + if formatout==1 || formatout==2 + if 14776335>=atomnum + atomname=strcat(elementname,atomid_conv); + else + atomname=atomid_conv; + end + elseif formatout==3 + if 238327>=atomnum + atomname=strcat(elementname,atomid_conv); + else + atomname=atomid_conv; + end + end + end [trjrow,~]=size(trjdata);tartrjdata=[]; for i=1:trjrow diff --git a/lammpstrj2xyz_arc_pdb/Deconvolu_SysConvert.m b/lammpstrj2xyz_arc_pdb/Deconvolu_SysConvert.m new file mode 100644 index 0000000..f3e7210 --- /dev/null +++ b/lammpstrj2xyz_arc_pdb/Deconvolu_SysConvert.m @@ -0,0 +1,12 @@ +%This function is used to transform a number to decimal system. +function num_10base = Deconvolu_SysConvert(given_num_char,base) +num_table = {'0','1','2','3','4','5','6','7','8','9',... + 'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z',... + 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z'}; +[~,matches] = strsplit(given_num_char,num_table,'CollapseDelimiters',false); +num_10base = 0; +for i = 1:length(matches) + [~,num_true] = ismember(matches{i},num_table); + num_10base = num_10base + (num_true-1)*base^(length(matches)-i); +end +end \ No newline at end of file diff --git a/lammpstrj2xyz_arc_pdb/SysConvert.m b/lammpstrj2xyz_arc_pdb/SysConvert.m index 6fa8d2a..53b66c2 100644 --- a/lammpstrj2xyz_arc_pdb/SysConvert.m +++ b/lammpstrj2xyz_arc_pdb/SysConvert.m @@ -1,13 +1,15 @@ %scrit file name SysConvert %purpose: %This function is used to convert a given number into an expected number -%system. ASCII is used¡£ +%system. ASCII is used. function target_num=SysConvert(given_num,base) -if given_num==round(given_num) && given_num>0 && base==round(base) && base>0 +if base > 62 + error('The base for atom id encoding is larger than 62, unrecognizable char except 0-9£¬a-z£¬A-Z may be used. Please check it') +end +if given_num == round(given_num) && given_num > 0 && base == round(base) && base > 0 control=1;remaindermat=[]; else - control=0; - error('\nConversion of number system should be with an integer!!!'); + error('\nThe base and atom id must be positive integer!'); end while control @@ -24,12 +26,15 @@ atomid='';i=size(remaindermat,1); while i - if remaindermat(i,2)<=9 + if remaindermat(i,2) <= 9 %0-9 atomid=strcat(atomid,num2str(remaindermat(i,2))); - else + elseif remaindermat(i,2) > 9 && remaindermat(i,2) <= 35 %a-z atomid=strcat(atomid,char(remaindermat(i,2)+87)); + elseif remaindermat(i,2) > 35 && remaindermat(i,2) <= 61 %A-Z + atomid=strcat(atomid,char(remaindermat(i,2)+29)); end i=i-1; end + target_num=atomid; diff --git a/lammpstrj2xyz_arc_pdb/xyz_arc_filemaker_speedupMOLE.m b/lammpstrj2xyz_arc_pdb/xyz_arc_filemaker_speedupMOLE.m index 016ce47..943ebe2 100644 --- a/lammpstrj2xyz_arc_pdb/xyz_arc_filemaker_speedupMOLE.m +++ b/lammpstrj2xyz_arc_pdb/xyz_arc_filemaker_speedupMOLE.m @@ -440,7 +440,35 @@ elementname=eleswap{lib,2}; end atomid_conv=SysConvert(tarBOinform{trjreadline,1},base); - atomname=strcat(elementname,atomid_conv); + if elemax==2 + if formatout==1 || formatout==2 + if 238327>=atomnum + atomname=strcat(elementname,atomid_conv); + else + atomname=atomid_conv; + end + elseif formatout==3 + if 3843>=atomnum + atomname=strcat(elementname,atomid_conv); + else + atomname=atomid_conv; + end + end + elseif elemax==1 + if formatout==1 || formatout==2 + if 14776335>=atomnum + atomname=strcat(elementname,atomid_conv); + else + atomname=atomid_conv; + end + elseif formatout==3 + if 238327>=atomnum + atomname=strcat(elementname,atomid_conv); + else + atomname=atomid_conv; + end + end + end [trjrow,~]=size(trjdata);tartrjdata=[]; for i=1:trjrow diff --git a/lammpstrj2xyz_arc_pdb/xyz_arc_pdb_filemaker.m b/lammpstrj2xyz_arc_pdb/xyz_arc_pdb_filemaker.m index bdf0d01..1abb989 100644 --- a/lammpstrj2xyz_arc_pdb/xyz_arc_pdb_filemaker.m +++ b/lammpstrj2xyz_arc_pdb/xyz_arc_pdb_filemaker.m @@ -610,7 +610,35 @@ elementname=eleswap{lib,2}; end atomid_conv=SysConvert(tarBOinform{trjreadline,1},base); - atomname=strcat(elementname,atomid_conv);% + if elemax==2 + if formatout==1 || formatout==2 + if 238327>=atomnum + atomname=strcat(elementname,atomid_conv); + else + atomname=atomid_conv; + end + elseif formatout==3 + if 3843>=atomnum + atomname=strcat(elementname,atomid_conv); + else + atomname=atomid_conv; + end + end + elseif elemax==1 + if formatout==1 || formatout==2 + if 14776335>=atomnum + atomname=strcat(elementname,atomid_conv); + else + atomname=atomid_conv; + end + elseif formatout==3 + if 238327>=atomnum + atomname=strcat(elementname,atomid_conv); + else + atomname=atomid_conv; + end + end + end [trjrow,~]=size(trjdata);tartrjdata=[]; for i=1:trjrow