### lscpu/CPU(s) visualization.csv의 데이터셋에서 더 이상 지원되지 않는 인스턴스, 지원하나 데이터셋에 누락된 인스턴스를 검사

In [1]:
# 코드 자동완성 허용
%config Completer.use_jedi = False

# cell 너비 확장
from IPython.display import display, HTML

display(
    HTML(
        data="""
            <style>
                div#notebook-container      { width: 99%}
                div#menubar-container       { width: 99%}
                div#maintoolbar-container   { width: 99%}
            </style>
        """
    )
)

In [2]:
# import library
from IPython.display import display
import pandas as pd
pd.set_option('display.max_columns', None) # Unlimiting Output Length
pd.set_option('display.max_row', None) # Unlimiting Output Length
pd.set_option('display.max_colwidth', None) # Unlimiting Output Column Width Length

import gspread as gs
from gspread_formatting import *

import boto3

In [40]:
# EC2 클라이언트 생성
ec2_client = boto3.client('ec2', region_name = 'us-west-2')

createable_x86_64_instances = []
createable_arm64_instances = []
unsupported_instances = []
next_token = None

while True:
    # 인스턴스 타입 정보 가져오기
    if next_token:
        response = ec2_client.describe_instance_types(NextToken=next_token)
    else:
        response = ec2_client.describe_instance_types()

    # 현재 페이지의 인스턴스 유형 이름 추가
    for instance_type in response['InstanceTypes']:
        if instance_type['CurrentGeneration']:
            architectures = instance_type['ProcessorInfo']['SupportedArchitectures']
            if 'x86_64' in architectures:
                createable_x86_64_instances.append(instance_type['InstanceType'])
            elif 'arm64' in architectures:
                createable_arm64_instances.append(instance_type['InstanceType'])
        else:
            unsupported_instances.append(instance_type['InstanceType'])

    # 다음 페이지 토큰 설정
    next_token = response.get('NextToken', None)

    # 다음 페이지 토큰이 없으면 종료
    if not next_token:
        break

print(f"Number of all instances : {len(createable_x86_64_instances) + len(createable_arm64_instances) + len(unsupported_instances)}")
print(f"Number of createable x86_64 instances : {len(createable_x86_64_instances)}")
print(f"Number of createable arm64 instances : {len(createable_arm64_instances)}")
print(f"Number of unsupported instances : {len(unsupported_instances)}")


Number of all instances : 626
Number of createable x86_64 instances : 467
Number of createable arm64 instances : 123
Number of unsupported instances : 36


In [28]:
# read google spread sheet(core features)

gc = gs.service_account(filename='./secure-outpost-380004-8d45b1504f3e.json')

sheet = gc.open('CPU Feature Visualization').worksheet('groupby aws(core)')
df = pd.DataFrame(sheet.get_all_records())
featureGroups = df['feature groups'].tolist()
groups = []

for i in range(len(featureGroups)):
    groups.append(featureGroups[i].split(", "))

print(groups)

[['m2.xlarge', 'm2.2xlarge', 'm2.4xlarge'], ['m1.large', 'm1.xlarge'], ['c3.large', 'r3.large', 'm3.large', 'r3.xlarge', 'c3.xlarge', 'm3.xlarge', 'r3.2xlarge', 'm3.2xlarge', 'c3.2xlarge', 'r3.4xlarge', 'c3.4xlarge', 'r3.8xlarge', 'c3.8xlarge'], ['m5a.large', 'r5a.large', 'r5ad.large', 'm5ad.large', 't3a.medium', 't3a.xlarge', 'm5a.xlarge', 'r5a.xlarge', 'm5ad.xlarge', 'r5ad.xlarge', 't3a.2xlarge', 'm5a.2xlarge', 'r5a.2xlarge', 'r5ad.2xlarge', 'm5ad.2xlarge', 'm5a.4xlarge', 'm5ad.4xlarge', 'r5a.4xlarge', 'r5ad.4xlarge', 'r5ad.8xlarge', 'r5a.8xlarge', 'm5a.8xlarge', 'm5ad.8xlarge', 'm5a.16xlarge', 'r5a.16xlarge', 'r5ad.16xlarge', 'm5ad.16xlarge'], ['c5ad.large', 'c5a.large', 'c5ad.xlarge', 'c5a.xlarge', 'c5ad.2xlarge', 'c5a.2xlarge', 'c5ad.4xlarge', 'c5a.4xlarge', 'c5a.8xlarge', 'c5ad.8xlarge', 'c5ad.16xlarge', 'c5a.16xlarge'], ['t2.large', 'c4.large', 'm4.large', 'c4.xlarge', 'm4.xlarge', 'm4.2xlarge', 'c4.2xlarge', 't2.2xlarge', 'm4.4xlarge', 'c4.4xlarge'], ['r6a.large', 'c6a.large', 

In [41]:
unsupported = []
not_exist_x86_64 = []
not_exist_arm64 = []

for group in groups:
    for instance in group:
        if instance in unsupported_instances:
            unsupported.append(instance)

for instance_type, instance_list in [("x86_64", createable_x86_64_instances), ("arm64", createable_arm64_instances)]:
    for instance in instance_list:
        if not any(instance in group for group in groups):
            if instance_type == "x86_64":
                not_exist_x86_64.append(instance)
            else:
                not_exist_arm64.append(instance)

print(unsupported)
print(f'Number of unsupported instances extracted from the dataset: {len(unsupported)}\n')
print(not_exist_x86_64)
print(f'Number of non-existent x86_64 instances extracted from the dataset: {len(not_exist_x86_64)}\n')
print(not_exist_arm64)
print(f'Number of non-existent arm64 instances extracted from the dataset: {len(not_exist_arm64)}\n')


['m2.xlarge', 'm2.2xlarge', 'm2.4xlarge', 'm1.large', 'm1.xlarge', 'c3.large', 'r3.large', 'm3.large', 'r3.xlarge', 'c3.xlarge', 'm3.xlarge', 'r3.2xlarge', 'm3.2xlarge', 'c3.2xlarge', 'r3.4xlarge', 'c3.4xlarge', 'r3.8xlarge', 'c3.8xlarge']
Number of unsupported instances extracted from the dataset: 18

['r6in.12xlarge', 'i3en.3xlarge', 'c6in.24xlarge', 'm6idn.12xlarge', 'x2iedn.xlarge', 'm5dn.12xlarge', 'x2iedn.2xlarge', 'trn1.32xlarge', 'g5.4xlarge', 'g3.16xlarge', 'i3en.12xlarge', 'm5ad.12xlarge', 'r6i.xlarge', 'x1e.8xlarge', 'x2iezn.4xlarge', 'r5.24xlarge', 'd3en.2xlarge', 'c6a.24xlarge', 'c6a.48xlarge', 'c6i.32xlarge', 'r6in.metal', 'r5ad.24xlarge', 'r6in.24xlarge', 'f1.2xlarge', 'i3en.24xlarge', 'c5ad.24xlarge', 'r6a.metal', 'x1e.2xlarge', 'g5.2xlarge', 'd2.8xlarge', 'r5n.12xlarge', 'r5b.12xlarge', 'd3.8xlarge', 'm6i.24xlarge', 'x2iezn.8xlarge', 'c5d.12xlarge', 'r5b.metal', 'x2idn.24xlarge', 'c6in.12xlarge', 'm6a.metal', 'trn1.2xlarge', 'c6a.32xlarge', 't3a.small', 'm5zn.6xlarge',