# comMotif & disMotif

In [None]:
class MotiDict:
    stageMatrix = None
    stageMatrixIndex = None
    executionTrace = None # include "tick" featureprofile

    stage2commonDi = None
    stage2HkDi = None
    stage2nodeDi = None
    stageGaplb = None
    
    comMoti_dict = dict()
    disMoti_dict = dict()
    
    def __init__(self, stageMatrix, Motif, execTrace_dit, outputStage):
        print("==Calculate common moitf==")
        self.stageMatrix = stageMatrix
        self.stageMatrixIndex = Motif.getStageMatrixIndex()
        self.executionTrace = execTrace_dit
        
        self.stage2commonDi = outputStage.getStage2common()
        self.stage2HkDi = outputStage.getStage2Hk()
        self.stage2nodeDi = outputStage.getStage2node()
        self.stageGaplb = outputStage.getStageGap()

        self.__setComMoti()
        self.__setDisMoti()
        
        
    #===private function 
    
    # Capture complete info of common motif from executionTrace
    def __setComMoti(self):
        for stage in self.stage2commonDi:
            if self.stage2commonDi[stage]:
                motifID = list(self.stage2HkDi[stage+1].keys())[0][1:] #because common stage only have one motif
                hk = self.stage2HkDi[stage+1]['M'+motifID][0]
                start = self.stageMatrixIndex[hk][stage][0]      #will capture the range of executionTrace
                end = self.stageMatrixIndex[hk][stage][1]
                
                self.comMoti_dict['s'+str(stage+1)+'_'+motifID] = self.executionTrace[hk][start:end+1]
        
        print("--- 1 set CommontMotif dict fin---")
                
    # Capture complete info of distinct motif from executionTrace
    def __setDisMoti(self):        
        for stage in self.stage2nodeDi:
            for stage1 in self.stage2HkDi: 
                for motifID in self.stage2HkDi[stage1]:
                    if motifID != self.stageGaplb:
                        hk = self.stage2HkDi[stage1][motifID][0]  
                        start = self.stageMatrixIndex[hk][stage][0]
                        end = self.stageMatrixIndex[hk][stage][1]
                        self.disMoti_dict['s'+str(stage+1)+'_'+motifID[1:]] = self.executionTrace[hk][start:end+1]
    
        print("--- 2 set DistinctMotif dict fin---")
    
    #===public function
    def getComMoti(self):
        return self.comMoti_dict
    
    def getDisMoti(self):
        return self.disMoti_dict
        

# Analysis Resource

In [None]:
class ComResAnalysis:
    motiDict = None
    motiDiKeySort = None
    
    type2API_dict = None
    SC_API = None
    
    cat2res_dict = None
    res_list = None
    
    resFreq_dist = None
    res2poli_dict = None  #res2pointList_dict
    
    winCat2res_dict = None
    winResFreq_dist = None
    winRes2poli_dict = None
    #winRes_list=> winResFreq_dist.keys()
    
    def __init__(self, motiDict):
        print("==Analyzing common motif resources==")
        self.motiDict = motiDict
        self.motiDiKeySort = sorted(self.motiDict, key=lambda k: int(k[1:k.index('_')]))
        
        self.__setAPI()
        self.__setScApiRes()
        self.__checkMotiSubset()
        self._setResStatis()
        self.__setWinnowRes()
        
    
    #===private function
    
    def __setAPI(self):
        self.type2API_dict = {
            'Library':['LoadLibrary'], 
            'Process':['CreateProcess', 'CreateProcessInternal',
                      'OpenProcess', 'ExitProcess', 'TerminateProcess',
                      'WinExec', 'CreateRemoteThread', 'OpenThread',
                      'CreateThread', 'TerminateThread', 'CloseHandle'],
            'File':['CopyFile', 'CreateFile', 'WriteFile', 
                    'DeleteFile', 'ReadFile'],
            'Registry':['RegOpenCurrentUser', 'RegQueryValue', 'RegEnumValue',
                       'RegOpenKey', 'RegCloseKey', 'RegSetValue',
                       'RegCreateKey', 'RegDeleteValue', '=RegDeleteKey', 'RegDeleteKey'], #=====RegDeleteKey
            'Network':['WinHttpConnect', 'WinHttpCreateUrl', 'WinHttpOpen',
                      'WinHttpOpenRequest', 'WinHttpReadData',
                      'WinHttpSendRequest', 'WinHttpWriteData',
                      'WinHttpGetProxyForUrl', 'InternetOpen', 
                      'InternetConnect', 'HttpSendRequest', 
                      'GetUrlCacheEntryInfo']
        }
        
        NSC_API = ['ReadFile',
                   'RegOpenCurrentUser',
                   'RegQueryValue', 
                   'RegEnumValue']

        self.SC_API = [api for cat in self.type2API_dict for api in self.type2API_dict[cat] if api not in NSC_API]
        
        print("---1 Set API Done---")
    
    
    #get SC_API and parameter in motifs
    def __setScApiRes(self): 
        self.cat2res_dict = {c:[] for c in self.type2API_dict}
        self.res_list = list()

       
        for m in self.motiDiKeySort:
            for t,a in self.motiDict[m]:
                api = a.split('#PR#')[0]
                if len(a.split('#PR#'))<2: #======2/24
                    print(a)
                    p = 'noPar'
                else:
                    p = a.split('#PR#')[1]

                if api in self.SC_API:
                    if '?data=' in p and '?type=REG_SZ?' in p:
                        self.__addCat2res( 'data=', p, api)

                    elif '?data=' in p and '?type=REG_EXPAND_SZ?' in p:
                        self.__addCat2res( 'data=', p, api)

                    elif '?data=' in p and '?type=REG_MULTI_SZ?' in p:
                        self.__addCat2res( 'data=', p, api)

                    if 'hKey=' in p:
                        self.__addCat2res( 'hKey=', p, api)

                    if 'hName' in p:
                        self.__addCat2res( 'hName=', p, api)

                    if 'dwThreadId=' in p:
                        self.__addCat2res( 'dwThreadId=', p, api)

                    if 'fileName=' in p:
                        self.__addCat2res( 'fileName=', p, api)

                    if 'lpFileName=' in p:
                        self.__addCat2res( 'lpFileName=', p, api)

                    if 'procName=' in p:
                        self.__addCat2res( 'procName=', p, api)

                    if 'dwProcessId=' in p:
                        self.__addCat2res( 'dwProcessId=', p, api)

                    if 'pswzServerName' in p:
                        self.__addCat2res( 'pswzServerName=', p, api)
                        
        self.res_list = sorted(self.res_list)
        print("---2 Set SC_API res done---")

    def __addCat2res(self ,par, p, api ):
        for s in p.split('?'):
            if par in s:
                var = s[s.index('=')+1:].strip()
                self.res_list.append(var)
                real_cat = [cat for cat in self.type2API_dict if api in self.type2API_dict[cat]][0]
                self.cat2res_dict[real_cat].append(var)  
    
    def __checkMotiSubset(self):
        codeblock_dict = {m:set() for m in self.motiDict}

        for m in self.motiDiKeySort:
            for t,a in self.motiDict[m]:
                if len(a.split('#PR#'))>1:
                    parlist = filter(None, a.split('#PR#')[1].split('?') ) #====2/24
                else:
                    parlist = 'noPar'
                
                codeblock_dict[m].add(tuple(parlist))

        print("\ttest for codeblock")
        for m1 in self.motiDict:
            for m2 in self.motiDict:
                if m1 != m2 and codeblock_dict[m1]!= set()  and codeblock_dict[m1].issubset(codeblock_dict[m2]):
                    print("\t", m1, "in", m2)        
        
        print("---3 Check Motif subset---")
    
    def __setWinnowRes(self):
        self.winResFreq_dist = dict()
        self.winRes2poli_dict = dict()
        self.winCat2res_dict = dict()

        for v in self.resFreq_dist:
            newV = v.split('\\')[-1].lower()

            if newV in self.winResFreq_dist.keys():
                self.winRes2poli_dict[newV] = self.winRes2poli_dict[newV] + self.res2poli_dict[v]

                self.winResFreq_dist[newV][0] = self.winResFreq_dist[newV][0] + self.resFreq_dist[v][0] # freq
                self.winResFreq_dist[newV][2] = self.winResFreq_dist[newV][2] + self.resFreq_dist[v][2] # cat_list
                self.winResFreq_dist[newV][1] = len(set(self.winResFreq_dist[newV][2])) # cat
            else:
                self.winResFreq_dist[newV] = self.resFreq_dist[v]
                self.winRes2poli_dict[newV] = self.res2poli_dict[v]

        for cat in self.cat2res_dict:
            self.winCat2res_dict[cat] = []
            for res in self.cat2res_dict[cat]:
                newRes = res.split('\\')[-1].lower() 
                if newRes not in self.winCat2res_dict[cat]:
                    self.winCat2res_dict[cat].append(newRes)
        
        print("---5 Set Winnow res---")
    
    
    
    #===protect function
    #all SC_API's parameter (Statistics info.)
    def _setResStatis(self): 
        self.resFreq_dist={}     
        self.res2poli_dict = {res:[] for res in set(self.res_list)} #res2pointList_dict

        # i, freq, len(set(cat_list)), v => 'index, freq, cat #, value'
        for i,res in enumerate(set(self.res_list)):
            freq = 0
            cat_list = []
            for ii,m in enumerate(self.motiDiKeySort):
                for t,a in self.motiDict[m]:
                    api = a.split('#PR#')[0]
                    if res in a:
                        freq += 1
                        cat = [cat for cat in self.type2API_dict if api in self.type2API_dict[cat]][0]
                        cat_list.append(cat)

                        if cat == 'Library':
                            self.res2poli_dict[res].append((ii+1,1))
                        elif cat == 'Process':
                            self.res2poli_dict[res].append((ii+1,2))
                        elif cat == 'File':
                            self.res2poli_dict[res].append((ii+1,3))
                        elif cat == 'Registry':
                            self.res2poli_dict[res].append((ii+1,4))
                        elif cat == 'Network':
                            self.res2poli_dict[res].append((ii+1,5))           

            self.resFreq_dist[res] = [freq, len(set(cat_list)), cat_list] 
        
        print("---4 Set Res Statistics---")
            
    #===public function
    def getType2API(self):
        return self.type2API_dict
    
    def getScAPI(self):         
        return self.SC_API

    def getScRes(self):           # [res_list]
        return self.res_list
    
    def getCat2res(self):         # cat: [res_list]
        return self.cat2res_dict
    
    def getResFreq(self):
        return self.resFreq_dist  # res: ['freq, cat#, value']
    
    def getRes2point(self):
        return self.res2poli_dict # res: (comMotiStageSortedIndex, cat) 
    
    def getWinCat2Res(self):
        return self.winCat2res_dict
    
    def getWinResFreq(self):
        return self.winResFreq_dist
    
    def getWinRes2point(self):
        return self.winRes2poli_dict
    
    def getMotiDict(self):
        return self.motiDict
    

