In [1]:
from cloudscape import load_studies, extract_svc_info
import csv

In [2]:
architectures = load_studies("./data/graphs/")
print(f"There are {len(architectures)} architectures in the dataset.")
architectures = [a for a in architectures if a.use_for_analysis()]
print(f"We will analyze {len(architectures)} of them.")

There are 396 architectures in the dataset.
We will analyze 340 of them.


In [3]:
svc_info = extract_svc_info("./data/services.csv")


In [4]:
print(svc_info)

{'Transcribe': {'name': 'Transcribe', 'capability': 'compute', 'image_url': 'transcribe', 'is_aws': True, 'schema': '', 'aws_product_categories': 'ML'}, 'LookoutForVision': {'name': 'LookoutForVision', 'capability': 'compute', 'image_url': 'lookoutforvision', 'is_aws': True, 'schema': '', 'aws_product_categories': 'ML'}, 'PrivateLink': {'name': 'PrivateLink', 'capability': 'networking', 'image_url': 'privatelink', 'is_aws': True, 'schema': '', 'aws_product_categories': 'Networking and Content Delivery'}, 'SystemsManager': {'name': 'SystemsManager', 'capability': 'control', 'image_url': 'systemsmanager', 'is_aws': True, 'schema': '', 'aws_product_categories': 'Management and Governance'}, 'UserCompanyDeveloper': {'name': 'UserCompanyDeveloper', 'capability': 'User', 'image_url': 'usercompanydeveloper', 'is_aws': False, 'schema': '', 'aws_product_categories': ''}, 'LakeFormation': {'name': 'LakeFormation', 'capability': 'control', 'image_url': 'lakeformation', 'is_aws': True, 'schema': '

In [5]:
def cleanup_url(s) -> str:
    return s.split("&list=")[0]

In [6]:
with open("cloudscape.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerow(["name", "link", "num_nodes", "num_edges", "services", "goals"])
    for arch in architectures:
        g = arch.nx_graph
        g_dict = g.graph
        services = set(arch.get_all_nodes_services())
        goals = set(arch.get_functional_goals())
        writer.writerow(
            [
                g_dict["name"],
                cleanup_url(g_dict["link"]),
                g.number_of_nodes(),
                g.number_of_edges(),
                ",".join(services),
                ",".join(goals),
            ]
        )

In [7]:
def is_store_type(svc):
    return (svc_info[svc]["capability"] == "storage" and svc != "EBS") or (svc in {"OpenSearch", "MSK", "QuickSight"}) or ("Kinesis" in svc)

def get_storage_types(arch):
    return {svc for svc in arch.get_all_nodes_services() if is_store_type(svc)}


In [8]:
with open("multistore.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerow(["name", "link", "stores"])
    for arch in architectures:
        stores = get_storage_types(arch)
        if len(stores) >= 3:
            g_dict = arch.nx_graph.graph
            writer.writerow([g_dict["name"], cleanup_url(g_dict["link"]), ",".join(stores)])