In [2]:
import numpy as np
import re

In [30]:
use_rgx = re.compile(r'^\s*USE\s+(\w+)\s*(?:,\s*ONLY\s*:\s*)?(.*)?',re.IGNORECASE)
#useonly_rgx = re.compile(r'USE\s+(\w+)(\s*,\s*ONLY\s*:\s*)+',re.IGNORECASE)
vars_rgx = re.compile(r'(\w+)',re.IGNORECASE)
module_rgx = re.compile(r'^\s*module\s+(\w+)',re.IGNORECASE)
endmodule_rgx = re.compile(r'^\s*end\s*module\s+(\w+)',re.IGNORECASE)
subroutine_rgx = re.compile(r'^\s*subroutine\s+(\w+)\s*(?:\((.*)\))?',re.IGNORECASE)
endsubroutine_rgx = re.compile(r'^\s*end\s*subroutine\s+(\w+)',re.IGNORECASE)
function_rgx = re.compile(r'^\s*(?:(?:real|complex|int).*\s+)?function\s+(\w+)\s*(?:\((.*?)\))?\s*(?:result\s*(?:\((.*?)\)))?'
                          ,re.IGNORECASE)
endfunction_rgx = re.compile(r'^\s*end\s*function\s+(\w+)?',re.IGNORECASE)



tree_root = 1

class Line(object):
    
    def __init__(self, input_string, fileobj_in):
        self.bare = ''
        self.code = []
        self.comment = []
        continue_line = True
        #self.empty = False
        while continue_line:

            # Remove comments

            try:
                code, comment = re.match(r'\s*(.*?)(\s*!.*)$', input_string).groups()
            except:
                #pass
                code = re.match(r'\s*(.*?)\s*$', input_string).group(1)
                comment = ''



            # Concatenate continued lines

            try:
                code = re.match(r'^\s*(&)?\s*(.*?)(&)\s*$',code).group(2)
                input_string = fileobj_in.readline()
            except:
                code = re.match(r'^\s*(&)?\s*(.*?)(&)?\s*$',code).group(2)
                continue_line = False

            self.bare += code+' '
            self.code.append(code)
            self.comment.append(comment)
        self.empty = len(self.bare) < 2
            
class EmptyLine(Line):
    
    def __init__(self):
        self.bare = ''
        self.code = ['']
        self.comment = ['']
         

class Project(object):
    
    def __init__(self, name):
        self.name = name
        self.sources = []
        
    def add_sourcefile(self, filename):
        self.sources.append(Sourcefile(filename))
        
    def scan_sources(self):
        for src in self.sources:
            print('Scanning', src.name)
            src.scan()
        
    def make_project_tree(self):
        print('PROJECT '+self.name)
        for src in self.sources:
            print('  |')
            src.print_contains()
        

class Codeblock(object):
    
    def __init__(self,name):
        self.name = name
        self.contains = []
        self.imports = {}
    
    def add_import(self, modulename, variables):
        self.imports[modulename], trash = str_to_arg(variables)
        #self.imports.add_module(modulename,variables)
  
    def add_contains(self,obj):
        self.contains.append(obj)
        
    def print_contains(self):
        print('   '*self.tree_level+'---- '+self.__str__())
        #self.print_contains()
        for obj in self.contains:
            print('   '*self.tree_level+'   |')
            #print ('---'*obj.tree_level,obj.typ,obj.name)
            obj.print_contains()
    
        
        


class Sourcefile(Codeblock):
    
    def __init__(self,name):
        super().__init__(name)
        self.parent = None
        self.tree_level = tree_root
        self.typ = 'Sourcefile'
        self.lines = []
    
    def __str__(self):
        return 'FILE '+self.name
        
    def add_line(self, string, fileobj):
        self.lines.append(Line(string,fileobj))
    
    def scan(self):
        context = []
        context.append(self)
        with open(self.name,'r') as src:
            for line in src:
                
                self.lines.append(Line(line,src))
                
                # Parse line to update context
                # (i.e. are we entering a new module/routine/function?)
                
                if len(self.lines[-1].bare) > 0 :
                    output = line_to_context(self.lines[-1].bare,context)
                
                    if  not output == 0 and not output == 1 :
                        print('WARNING: ',line)
                        
    def print_source(self):
        for line in self.lines:
            npartial = len(line.code)
            if npartial > 1:
                for ipar, code in enumerate(line.code):
                    if ipar < npartial -1:
                        print( code + ' & ' + line.comment[ipar])
                    else:
                        print( code + line.comment[ipar])
                
            else:
                print (line.code[0] +' '+ line.comment[0])
        
    def print_bare_source(self):
        for line in self.lines:
            if not line.empty:
                print(line.bare)
                



class Procedure(Codeblock):
    
    def __init__(self, name, argstring, parent):
        super().__init__(name)
        self.parent = parent
        self.tree_level = parent.tree_level + 1
        self.args, self.argstring = str_to_arg(argstring)
        
    
        
        

class Subroutine(Procedure):
    
    def __init__(self,name,argstring,parent):
        super().__init__(name, argstring, parent)
        self.typ = 'Subroutine'
    
    def __str__(self):                
        return 'SUBROUTINE '+self.name+' ('+self.argstring+')'


class Function(Procedure):
    
    def __init__(self,name,argstring,parent):
        super().__init__(name, argstring, parent)
        self.typ = 'Function'
    
    def __str__(self):                
        return 'FUNCTION '+self.name+' ('+self.argstring+')'


        
class Module(Codeblock):
    
    def __init__(self,name,parent):
        super().__init__(name)
        self.parent = parent
        self.tree_level = parent.tree_level + 1
        self.typ = 'Module'
    
    def __str__(self):
        return 'MODULE '+self.name
              

def str_to_arg(string):
    if string is None:
        return None, ''
    args = vars_rgx.findall(string)
    argstring = ''
    nargs = len(args)
    if nargs == 0 :
        return None, ''
    else :
        for iarg, arg in enumerate(args):
            if iarg < nargs-1:
                argstring += arg+', '
            else:
                argstring += arg
        return args, argstring
    #return args, argstring if len(args) > 0 else None, None
    
    
def line_to_context(line, context):
    
    curr_context = context[-1]
    
    chk = 0
    
    #print(line)
    
    # CHECK USE STATEMENTS 
    mo = use_rgx.match(line)
    if mo is not None:
        chk += 1
        curr_context.add_import(mo.group(1), mo.group(2))
        return chk
           
    
    # CHECK NEW SUBROUTINE
    mo = subroutine_rgx.match(line)
    if mo is not None:
        chk += 1
        context.append(Subroutine(mo.group(1), mo.group(2),curr_context))
        curr_context.add_contains(context[-1])
        return chk
        
    
    # CHECK END SUBROUTINE
    mo = endsubroutine_rgx.match(line)
    if mo is not None:
        chk += 1
        check = mo.group(1) == curr_context.name
        if not check:
            print('WARNING: ',mo.group(1), curr_context.name, line)
        context.pop()
        return chk
    
    # CHECK NEW FUNCTION
    mo = function_rgx.match(line)
    if mo is not None:
        chk += 1
        context.append(Function(mo.group(1), mo.group(2),curr_context))
        curr_context.add_contains(context[-1])
        return chk
    
    # CHECK END FUNCTION
    mo = endfunction_rgx.match(line)
    if mo is not None:
        chk += 1
        context.pop()
        return chk
    
    
    # CHECK NEW MODULE
    mo = module_rgx.match(line)
    if mo is not None:
        chk += 1
        context.append(Module(mo.group(1),curr_context))
        curr_context.add_contains(context[-1])
        return chk
        
    # CHECK END MODULE
    mo = endmodule_rgx.match(line)
    if mo is not None:
        chk += 1
        context.pop()
        return chk
    
    return chk
    

In [33]:
files = ['fake.f90']#,'fde_routines.f90']
eQE = Project('eQE')
for file in files:
    eQE.add_sourcefile(file)
eQE.scan_sources()
eQE.make_project_tree()
eQE.sources[0].print_source()


Scanning fake.f90
PROJECT eQE
  |
   ---- FILE fake.f90
      |
      ---- MODULE funct
         |
         ---- SUBROUTINE set_screening_parameter (scrparm_)
         |
         ---- FUNCTION get_screening_parameter ()
         |
         ---- SUBROUTINE set_gau_parameter (gauparm_)
         |
         ---- FUNCTION get_gau_parameter ()
 !
 !-------------------------------------------------------------------
module funct 
 !-------------------------------------------------------------------
 !  derivatives of XC computation drivers: dmxc, dmxc_spin, dmxc_nc, dgcxc,
 !                                         dgcxc_spin
 !
USE io_global, ONLY: stdout 
USE kinds,     ONLY: DP 
IMPLICIT NONE 
 
data exc / 'NOX', 'SLA', 'SL1', 'RXC', 'OEP', 'HF', 'PB0X', 'B3LP', 'KZK' / 
data corr / 'NOC', 'PZ', 'VWN', 'LYP', 'PW', 'WIG', 'HL', 'OBZ',  & 
'OBW', 'GL' , 'KZK' /
 
data gradx / 'NOGX', 'B88', 'GGX', 'PBX',  'RPB', 'HCTH', 'OPTX', & 
'TPSS', 'PB0X', 'B3LP','PSX', 'WCX', 'HSE', 'RW86', 'PBE',  

In [406]:
srcfile = 'funct.f90'

src = Sourcefile(srcfile)

src.scan()


In [407]:
src.contains[0].imports

{'io_global': ['stdout'], 'kinds': ['DP']}

In [408]:
src.print_contains()

---- funct.f90
   |
   ---- MODULE funct
      |
      ---- SUBROUTINE set_dft_from_name (dft_, nochecks_)
      |
      ---- SUBROUTINE set_auxiliary_flags ()
      |
      ---- SUBROUTINE set_dft_value (m, i)
      |
      ---- SUBROUTINE enforce_input_dft (dft_, nomsg)
      |
      ---- SUBROUTINE enforce_dft_exxrpa ()
      |
      ---- SUBROUTINE init_dft_exxrpa ()
      |
      ---- SUBROUTINE start_exx ()
      |
      ---- SUBROUTINE stop_exx ()
      |
      ---- FUNCTION exx_is_active ()
      |
      ---- SUBROUTINE set_exx_fraction (exxf_)
      |
      ---- SUBROUTINE set_screening_parameter (scrparm_)
      |
      ---- FUNCTION get_screening_parameter ()
      |
      ---- SUBROUTINE set_gau_parameter (gauparm_)
      |
      ---- FUNCTION get_gau_parameter ()
      |
      ---- FUNCTION get_iexch ()
      |
      ---- SUBROUTINE set_iexch (value)
      |
      ---- FUNCTION get_icorr ()
      |
      ---- SUBROUTINE set_icorr (value)
      |
      ---- FUNCTION get_igc

In [365]:
print(src.contains[0].contains[0].imports)

<__main__.Imports object at 0x7efde7f860f0>


In [65]:
lines = []
lines.append('  USE mp_bands,   only : me_bgrp, nproc_bgrp, root_bgrp, intra_bgrp_comm')
lines.append('  USE gvect,only:g  ')
lines.append('use fft_base')
imp = Imports()

for line in lines:
    mo = use_rgx.match(line)
    if mo is not None:
        imp.add_module(mo.group(1), mo.group(2))

print(imp.modules)

print('gvecl' in imp.modules)

for module in imp.modules :
    print(module)



#vars_rgx.findall(mo.group(2))
#if mo is not None:
#    module_name = mo.group(1)
#mo.groups()
#print(mo.group(1))
#print(mo.group(2))

{'gvect': ['g'], 'fft_base': None, 'mp_bands': ['me_bgrp', 'nproc_bgrp', 'root_bgrp', 'intra_bgrp_comm']}
False
gvect
fft_base
mp_bands


In [71]:

#def string_to_args(string, rgx):
    
    


src_file = 'data_structure.f90'
#src_file = 'recvec.f90'
use_module_forbidden = ['fft_base','cell_base','gvect','gvecs','stick_set','wvfct','scf','ion_base','vlocal']
with open(src_file) as src:
    modules = []
    subroutines = []
    functions = []
    in_module = False
    in_subroutine = False
    in_function = False
    for line in src:
        continue_line = True
        try:
            line = re.match(r'^.*?\!', line).group(0)[:-1]
        except:
            pass
        while continue_line:
            try:
                line = re.match(r'^.*?\&', line).group(0)[:-1]
                line2 = src.readline()
                line = line + line2[re.match(r'\s*', line2).span(0)[1]:]
            except:
                continue_line = False
        line_s = line.split()
        if len(line_s) > 0:
            print(line)
            if 'module ' in line or 'MODULE ' in line :
                if not in_module :
                    #print(line)
                    in_module = True
                    curr_module = line_s[1]
                    modules.append(curr_module)
                else:
                    in_module = False

            #while line_s[-1] == '&':
                
            #    prev_line = line
                
            #print (line)

print(modules)
        

IndentationError: expected an indented block (<ipython-input-71-8fd632af7845>, line 15)

In [52]:
lines = ['  USE mp_bands,   only : me_bgrp, nproc_bgrp, root_bgrp, intra_bgrp_comm, &\n',
'                         ntask_groups\n']

In [32]:
for line in lines:
    print(re.match(r'^.*?\&', line))
    #print(line)

<_sre.SRE_Match object; span=(0, 75), match='  USE mp_bands,   ONLY : me_bgrp, nproc_bgrp, roo>
None


In [70]:
use_rgx = re.compile(r'USE\s+(\w+)',re.IGNORECASE)
useonly_rgx = re.compile(r'USE\s+(\w+)(\s*,\s*ONLY\s*:\s*)+',re.IGNORECASE)
vars_rgx = re.compile(r'(\w+)')

rgx = re.compile(r'USE\s+(\w+),\s*ONLY\s*:\s*((\w+)(,\s*)?)+',re.IGNORECASE)
rgx2 = re.compile(r'(\w+)(,\s*)?')
mo = use_rgx.search(lines[0])
#mo.group()
if mo is not None:
    module = mo.group(1)
    print(module)
    mo = useonly_rgx.search(lines[0])
    if mo is not None:
        print(mo.group())
        vars_str = lines[0][mo.span()[1]:]
        vars_list = vars_rgx.findall(vars_str)
        for var in vars_list:
            print(var)
        print(lines[0][mo.span()[1]:])
    else:
        pass
#    fall = rgx2.findall(lines[0])
#    for mtch in fall:
#        print(mtch[0])

#mtch = re.match(r'USE', lines[0])
#mtch
#mtch = re.match(r'^\S*?\&', lines[0])

mp_bands
USE mp_bands,   only : 
me_bgrp
nproc_bgrp
root_bgrp
intra_bgrp_comm
me_bgrp, nproc_bgrp, root_bgrp, intra_bgrp_comm, &



In [107]:
line = lines[0][re.match(r'\s*', lines[0]).span(0)[1]:]
#re.match(r'\s*', lines[0]).span(0)[1]
line

'USE mp_bands,   ONLY : me_bgrp, nproc_bgrp, root_bgrp, intra_bgrp_comm, &\n'

In [72]:
a = ['a','b','c']
a

['a', 'b', 'c']

In [73]:
a.append('d')
a

['a', 'b', 'c', 'd']

In [77]:
a.pop()


'd'

In [78]:
a

['a', 'b', 'c']