Mine the patterns about coarse-gained and fine-grained configutation dependencies from succesfully executed serverless applications

In [4]:
import yaml
import os
from collections import Counter
from GeneralMethod import uniRepAll, transforRepPattern
import itertools

In [26]:
from itertools import combinations
from collections import defaultdict

def n_painting_growth(transactions, min_count):
    # Step 1: Count 2-item permutations
    two_item_counts = defaultdict(int)
    for transaction in transactions:
        for pair in combinations(sorted(set(transaction)), 2):
            two_item_counts[pair] += 1

    # Step 2: Filter frequent 2-itemsets
    freq_two_items = {item: count for item, count in two_item_counts.items() if count >= min_count}

    # Step 3: Build association sets
    association_sets = defaultdict(dict)
    for (item1, item2), count in freq_two_items.items():
        association_sets[item1][item2] = count
        association_sets[item2][item1] = count

    # Step 4: Build frequent 3-itemsets
    freq_three_items = dict()
    for base_item in association_sets:
        neighbors = list(association_sets[base_item].keys())
        for comb in combinations(neighbors, 2):
            x, y = comb
            candidate = tuple(sorted([base_item, x, y]))
            count = sum(1 for t in transactions if all(i in t for i in candidate))
            if count >= min_count:
                freq_three_items[candidate] = count

    # Step 5: Build frequent 4-itemsets from frequent 3-itemsets
    freq_four_items = dict()
    three_item_keys = list(freq_three_items.keys())
    for i in range(len(three_item_keys)):
        for j in range(i + 1, len(three_item_keys)):
            union = tuple(sorted(set(three_item_keys[i]) | set(three_item_keys[j])))
            if len(union) == 4:
                count = sum(1 for t in transactions if all(i in t for i in union))
                if count >= min_count:
                    freq_four_items[union] = count

    # Combine all
    all_frequent_itemsets = {
        **freq_two_items,
        **freq_three_items,
        **freq_four_items
    }

    print(f"2-itemsets: {len(freq_two_items)}")
    print(f"3-itemsets: {len(freq_three_items)}")
    print(f"4-itemsets: {len(freq_four_items)}")

    return all_frequent_itemsets


In [None]:
# mine coarse-gained configuration dependencies
def mainAllRuleSAMCorse(directoryPath):

    simpData = loadSimpDatSAMCorse(directoryPath) 
    minSup = 0.05 * len(simpData)
    freqItemList = n_painting_growth(simpData, minSup)
    print("print result")
    print(freqItemList)
    
    
    newfreqItemList = []
    for item in freqItemList:
        item=list(item)
        if ("Transform" not in item) and ("AWSTemplateFormatVersion" not in item) and ("Description" not in item):
            flag = 0
            for field_i in item:
                rootname = field_i.split(".")[0]
                if rootname != "Outputs" and rootname != "Parameters" and "Description" not in field_i:
                    flag = flag + 0
                else:
                    flag = flag + 1
            if flag==0:
                newfreqItemList.append(item)

    print(newfreqItemList)
    left,right = handlefreqItemListSAMCorse(newfreqItemList, simpData)

    return left, right


def handlefreqItemListSAMCorse(freqItemList, simpData):
    saveleft = []
    saveright = []

    for tmp in freqItemList:
       
        combinations = generate_combinations(tmp)
        
        for i, (left, right) in enumerate(combinations, 1):
            
            left = list(left)
            right = list(right)
            
            tolen = len(left) + len(right)
            if tolen >1 and ('Transform' not in left) and ('Transform' not in right) and ('AWSTemplateFormatVersion' not in left) and ('AWSTemplateFormatVersion' not in right):
                flag = 0
                for data_i in simpData:
                    left_tmp = set(left)
                    data_i_tmp = set(data_i)
                    if left_tmp.issubset(data_i_tmp):
                        right_tmp = set(right)
                        if right_tmp.issubset(data_i_tmp):
                            flag = flag + 0  
                        else:
                            flag = flag + 1
                if flag==0:
                    saveleft.append(left)
                    saveright.append(right)
        
    saveleft_tmp = saveleft
    saveright_tmp = saveright

    filter_saveleft = []
    filter_saveright = []

    for index, value in enumerate(saveleft):
        tag = 0
        for index1, value1 in enumerate(saveleft_tmp):
           
            if index!=index1 and set(value) == set(value1):
                rightvalue = set(saveright[index])
                rightvalue1 = set(saveright_tmp[index1])
                if rightvalue.issubset(rightvalue1):
                    
                    tag = tag +1
                    # break
                else:
                    tag = tag + 0
        # print(tag)
        if tag == 0:
            newtag = 0
            for f in filter_saveleft:
                if set(saveleft[index])==set(f):
                   newtag = newtag + 1
                else:
                   newtag = newtag + 0
            if newtag == 0:               
                filter_saveleft.append(saveleft[index]) 
                filter_saveright.append(saveright[index])
            
    rootContentleft = "NewcorrelationleftCoarse"
    rootContentright = "NewcorrelationrightCoarse"
    f_left = open("Patterns/sam_{}.txt".format(rootContentleft), "w")
    f_right = open("Patterns/sam_{}.txt".format(rootContentright), "w")
    
    for i in range(len(filter_saveleft)):
        
        text_left = "+++".join(filter_saveleft[i])
        text_right = "+++".join(filter_saveright[i])
        
        f_left.write(text_left)
        f_left.write("\n")

        f_right.write(text_right)
        f_right.write("\n")
        
    f_left.close()
    f_right.close()

    print(len(filter_saveleft))

    return filter_saveleft, filter_saveright
 


# data processing 
def loadSimpDatSAMCorse(directoryPath):

    
    simpData = addEntryValueRepSAMCorse(directoryPath)
   
    return simpData

def addEntryValueRepSAMCorse(directoryPath):
    total_flat_config = uniRepAll(directoryPath)
    entryAll = []
    for file_i in total_flat_config:
        entryfile = []
        flat_config_key_new, flat_config_value_new = transforRepPattern(file_i)
        for index in range(len(flat_config_key_new)):
            tmp = "{}".format(flat_config_key_new[index])
            entryfile.append(tmp)
        entryfile = list(set(entryfile))
        entryAll.append(entryfile)
    return entryAll

def generate_combinations(elements):
    all_combinations = []
    for r in range(1, len(elements)):
        left_combinations = itertools.combinations(elements, r)
        for left in left_combinations:
            right = tuple(sorted(e for e in elements if e not in left))
            if right not in [x[1] for x in all_combinations]:
                all_combinations.append((left, right))
    return all_combinations


In [None]:
# mine configuration dependencies
directoryPath = "../Dataset/configuration files-real"
# mine coarse-gained configuration dependencies
mainAllRuleSAMCorse(directoryPath)

In [27]:
# mine fine-grained configuration dependencies
def mainAllRuleSAMFine(directoryPath):
    
    simpData = loadSimpDatSAM(directoryPath) 
    minSup = 0.02 * len(simpData)

    freqItemList = n_painting_growth(simpData, minSup)
    print("print result")
    # print(freqItemList)


    newfreqItemList = []
    for item in freqItemList:
        item=list(item)
    
        flag = 0
        for field_i in item:
            tmp = field_i.split("=")
            rootname = tmp[0].split(".")[0]
            if rootname != "Outputs" and rootname != "Parameters" and tmp[0] != "Transform" and tmp[0] != "AWSTemplateFormatVersion" and tmp[0] != "Description" and "Description" not in tmp[0]:
                flag = flag + 0
            else:
                flag = flag + 1
        if flag==0:
            newfreqItemList.append(item)
    
    # print(len(freqItemList))
    # print(len(newfreqItemList))

    
    left,right = handlefreqItemListSAM(newfreqItemList, simpData)

    return left, right
    



def loadSimpDatSAM(directoryPath):
    simpData = addEntryValueRepSAM(directoryPath)
    return simpData


def addEntryValueRepSAM(directoryPath):
    total_flat_config = uniRepAll(directoryPath)
    entryAll = []
    for file_i in total_flat_config:
        entryfile = []
        flat_config_key_new, flat_config_value_new = transforRepPattern(file_i)
        for index in range(len(flat_config_key_new)):
            tmp = "{}={}".format(flat_config_key_new[index], flat_config_value_new[index])
            entryfile.append(tmp)
        entryfile = list(set(entryfile))
        entryAll.append(entryfile)
    return entryAll




def handlefreqItemListSAM(freqItemList, simpData):
    saveleft = []
    saveright = []

    for tmp in freqItemList:
       
        combinations = generate_combinations(tmp)
      
        for i, (left, right) in enumerate(combinations, 1):
            
            left = list(left)
            right = list(right)
            
            tolen = len(left) + len(right)
            if tolen >1 and ('Transform=AWS::Serverless-2016-10-31' not in left) and ('Transform=AWS::Serverless-2016-10-31' not in right) and ('AWSTemplateFormatVersion=2010-09-09' not in left) and ('AWSTemplateFormatVersion=2010-09-09' not in right):
                flag = 0
                for data_i in simpData:
                    left_tmp = set(left)
                    data_i_tmp = set(data_i)
                    if left_tmp.issubset(data_i_tmp):
                        right_tmp = set(right)
                        if right_tmp.issubset(data_i_tmp):
                            flag = flag + 0  
                        else:
                            flag = flag + 1
                if flag==0:
                    saveleft.append(left)
                    saveright.append(right)

    saveleft_tmp = saveleft
    saveright_tmp = saveright

    filter_saveleft = []
    filter_saveright = []

    for index, value in enumerate(saveleft):
        tag = 0
        for index1, value1 in enumerate(saveleft_tmp):
            
            if index!=index1 and set(value) == set(value1):
                rightvalue = set(saveright[index])
                rightvalue1 = set(saveright_tmp[index1])
                if rightvalue.issubset(rightvalue1):
                   
                    tag = tag +1
                    # break
                else:
                    tag = tag + 0
        # print(tag)
        if tag == 0:
            newtag = 0
            for f in filter_saveleft:
                if set(saveleft[index])==set(f):
                   newtag = newtag + 1
                else:
                   newtag = newtag + 0
            if newtag == 0:               
                filter_saveleft.append(saveleft[index]) 
                filter_saveright.append(saveright[index])


            
    
    rootContentleft = "NewcorrelationleftFine"
    rootContentright = "NewcorrelationrightFine"
    f_left = open("Patterns/sam_{}.txt".format(rootContentleft), "w")
    f_right = open("Patterns/sam_{}.txt".format(rootContentright), "w")
    
    for i in range(len(filter_saveleft)):
        
        text_left = "+++".join(filter_saveleft[i])
        text_right = "+++".join(filter_saveright[i])
        
        f_left.write(text_left)
        f_left.write("\n")

        f_right.write(text_right)
        f_right.write("\n")
        
    f_left.close()
    f_right.close()

    print(len(filter_saveleft))

    return filter_saveleft, filter_saveright
              



In [28]:
# mine configuration dependencies
directoryPath = "../Dataset/configuration files-real"
# mine fine-gained configuration dependencies
mainAllRuleSAMFine(directoryPath)

2-itemsets: 485
3-itemsets: 957
4-itemsets: 1067
print result
303


([['Resources.PHAWS::Serverless::Function.Properties.CodeUri.Bucket=<%REPO_BUCKET%>'],
  ['Resources.PHAWS::Serverless::Function.Properties.Environment.Variables.LOG_LEVEL=INFO'],
  ['Resources.PHAWS::Serverless::Function.Properties.Events.PHCloudWatchEvent.Type=CloudWatchEvent'],
  ['Resources.PHAWS::Serverless::Function.Properties.MemorySize=128'],
  ['Resources.PHAWS::Serverless::Function.Properties.Runtime=python3.6'],
  ['Resources.PHAWS::Serverless::Function.Properties.Timeout=30'],
  ['Resources.PHAWS::Serverless::Function.Properties.Events.PHSNS.Type=SNS'],
  ['Resources.PHAWS::Serverless::Function.Properties.Runtime=nodejs16.x'],
  ['Resources.PHAWS::Serverless::Function.Properties.Timeout=10'],
  ['Resources.PHAWS::Serverless::Function.Properties.Runtime=python3.8'],
  ['Resources.PHAWS::Serverless::Function.Properties.Timeout=15'],
  ['Resources.PHAWS::Serverless::Function.Properties.Runtime=nodejs12.x'],
  ['Resources.PHAWS::Serverless::Function.Properties.Timeout=300'],
  