# 수집해야 하는 리소스 가격들
* EC2
-  NAT Gateway
-  ELB
-  EBS
* EKS
* RDS/Aurora
* ElastiCache
* MSK
* EFS
* S3
* VPC

# 0. 관련 라이브러리 다운로드 및 모듈 불러오기

In [7]:
!pip install pandas requests

Collecting requests
  Downloading requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting charset-normalizer<4,>=2 (from requests)
  Downloading charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl.metadata (33 kB)
Collecting idna<4,>=2.5 (from requests)
  Downloading idna-3.10-py3-none-any.whl.metadata (10 kB)
Collecting urllib3<3,>=1.21.1 (from requests)
  Downloading urllib3-2.2.3-py3-none-any.whl.metadata (6.5 kB)
Collecting certifi>=2017.4.17 (from requests)
  Downloading certifi-2024.8.30-py3-none-any.whl.metadata (2.2 kB)
Downloading requests-2.32.3-py3-none-any.whl (64 kB)
Downloading certifi-2024.8.30-py3-none-any.whl (167 kB)
Downloading charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl (118 kB)
Downloading idna-3.10-py3-none-any.whl (70 kB)
Downloading urllib3-2.2.3-py3-none-any.whl (126 kB)
Installing collected packages: urllib3, idna, charset-normalizer, certifi, requests
Successfully installed certifi-2024.8.30 charset-normalizer-3.3.2 idna-3.10 reque

In [1]:

import pandas as pd
import json
import requests
import datetime
from pprint import pprint


In [2]:
pd.set_option("display.max_colwidth", 100)

# 1. AWS 가격정보 수집

In [4]:
def download_resource_price_json(resource: str, region: str):
    request_template = "https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/{resource}/current/{region}/index.json"
    url = request_template.format(resource=resource, region=region)
    response = requests.get(url)
    today = datetime.date.today().isoformat()
    with open(f"{resource}_{today}.json", "w") as f:
        f.write(response.text)

def download_prices(resources: list[str], region: str):
    for resource in resources:
        download_resource_price_json(resource, region)

In [94]:
download_resource_price_json("AmazonS3", "ap-northeast-2")
# "TimedStorage",

In [6]:
# 수집대상 리소스
resources = ["AmazonEC2", "AmazonRDS", "AmazonVPC", "AmazonMSK", "AmazonElastiCache"]
download_prices(resources, "ap-northeast-2")

# 2. 가격정보 전처리

## 2-1. EC2 가격정보 전처리 (EC2, NAT Gateway, ELB, EBS)

In [3]:
with open("AmazonEC2_2024-10-03.json", "r") as f:
    raw_ec2_price_data = json.load(f)

In [4]:
def flatten_price(data: dict, product_family: str):
    result = []
    
    price_data = data["terms"]["OnDemand"]
    
    for sku, value in data["products"].items():
        new_row = value["attributes"]
        if value.get("productFamily") != product_family:
            continue
        
        # 딕셔너리의 첫번째 값만 가져옴
        price_data_row = next(iter(price_data[sku].values()))
        # 딕셔너리의 첫번째 값만 가져옴
        price_data_row = next(iter(price_data_row["priceDimensions"].values()))
        new_row["description"] = price_data_row["description"]
        new_row["pricePerUnit"] = price_data_row["pricePerUnit"]["USD"]
        new_row["unit"] = price_data_row["unit"]
        result.append(new_row)

    return pd.DataFrame(result)

In [5]:
def filter_df(df: pd.DataFrame, filters: dict):
    for column, value in filters.items():
        if column not in df.columns:
            raise KeyError(f"데이터프레임에 '{column}' 컬럼이 존재하지 않습니다.")
        if isinstance(value, list):
            df = df[df[column].isin(value)]
        else:
            df = df[df[column] == value]
    return df

In [9]:
ec2_df = flatten_price(raw_ec2_price_data, "Compute Instance")

ec2_filters = {
    "operatingSystem": ["Linux", "Windows"],
    "tenancy": "Shared",
    "currentGeneration": "Yes",
    "preInstalledSw": "NA",
    "storage": "EBS only",
    "processorArchitecture": "64-bit",
    "capacitystatus": "Used",
    "marketoption": "OnDemand",
}
# ec2_columns = [
#     "instanceType",
#     "instanceFamily",
#     "vcpu",
#     "memory",
#     "gpuMemory",
#     "operatingSystem",
#     "physicalProcessor",
#     "pricePerUnit",
#     "unit",
# ]

ec2_df = filter_df(ec2_df, ec2_filters)
# 필터링된 컬럼만 선택
# ec2_df = ec2_df[ec2_columns]
# 중복된 행 제거
ec2_df.drop_duplicates(subset=["instanceType", "pricePerUnit"])

Unnamed: 0,servicecode,location,locationType,instanceType,currentGeneration,instanceFamily,vcpu,physicalProcessor,clockSpeed,memory,...,normalizationSizeFactor,preInstalledSw,processorFeatures,regionCode,servicename,vpcnetworkingsupport,description,pricePerUnit,unit,gpu
29,AmazonEC2,Asia Pacific (Seoul),AWS Region,c7i.12xlarge,Yes,Compute optimized,48,Intel Xeon Scalable (Sapphire Rapids),3.2 GHz,96 GiB,...,96,,Intel AVX; Intel AVX2; Intel AVX512; Intel Turbo; Intel AMX,ap-northeast-2,Amazon Elastic Compute Cloud,true,$2.4192 per On Demand Linux c7i.12xlarge Instance Hour,2.4192000000,Hrs,
70,AmazonEC2,Asia Pacific (Seoul),AWS Region,r5.4xlarge,Yes,Memory optimized,16,Intel Xeon Platinum 8175,3.1 GHz,128 GiB,...,32,,Intel AVX; Intel AVX2; Intel AVX512; Intel Turbo,ap-northeast-2,Amazon Elastic Compute Cloud,true,$1.216 per On Demand Windows BYOL r5.4xlarge Instance Hour,1.2160000000,Hrs,
219,AmazonEC2,Asia Pacific (Seoul),AWS Region,g3s.xlarge,Yes,GPU instance,4,Intel Xeon E5-2686 v4 (Broadwell),2.3 GHz,30.5 GiB,...,8,,,ap-northeast-2,Amazon Elastic Compute Cloud,true,$1.118 per On Demand Windows g3s.xlarge Instance Hour,1.1180000000,Hrs,1
228,AmazonEC2,Asia Pacific (Seoul),AWS Region,r6i.16xlarge,Yes,Memory optimized,64,Intel Xeon 8375C (Ice Lake),3.5 GHz,512 GiB,...,128,,Intel AVX; Intel AVX2; Intel AVX512; Intel Turbo,ap-northeast-2,Amazon Elastic Compute Cloud,true,$4.864 per On Demand Linux r6i.16xlarge Instance Hour,4.8640000000,Hrs,
249,AmazonEC2,Asia Pacific (Seoul),AWS Region,m6i.xlarge,Yes,General purpose,4,Intel Xeon 8375C (Ice Lake),3.5 GHz,16 GiB,...,8,,Intel AVX; Intel AVX2; Intel AVX512; Intel Turbo,ap-northeast-2,Amazon Elastic Compute Cloud,true,$0.236 per On Demand Linux m6i.xlarge Instance Hour,0.2360000000,Hrs,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39240,AmazonEC2,Asia Pacific (Seoul),AWS Region,p2.xlarge,Yes,GPU instance,4,Intel Xeon E5-2686 v4 (Broadwell),2.3 GHz,61 GiB,...,8,,Intel AVX; Intel AVX2; Intel Turbo,ap-northeast-2,Amazon Elastic Compute Cloud,true,$1.649 per On Demand Windows p2.xlarge Instance Hour,1.6490000000,Hrs,1
39266,AmazonEC2,Asia Pacific (Seoul),AWS Region,m6i.xlarge,Yes,General purpose,4,Intel Xeon 8375C (Ice Lake),3.5 GHz,16 GiB,...,8,,Intel AVX; Intel AVX2; Intel AVX512; Intel Turbo,ap-northeast-2,Amazon Elastic Compute Cloud,true,$0.42 per On Demand Windows m6i.xlarge Instance Hour,0.4200000000,Hrs,
39397,AmazonEC2,Asia Pacific (Seoul),AWS Region,r5.4xlarge,Yes,Memory optimized,16,Intel Xeon Platinum 8175,3.1 GHz,128 GiB,...,32,,Intel AVX; Intel AVX2; Intel AVX512; Intel Turbo,ap-northeast-2,Amazon Elastic Compute Cloud,true,$1.952 per On Demand Windows r5.4xlarge Instance Hour,1.9520000000,Hrs,
39425,AmazonEC2,Asia Pacific (Seoul),AWS Region,inf1.xlarge,Yes,Machine Learning ASIC Instances,4,Intel Xeon Platinum 8275CL (Cascade Lake),,8 GiB,...,,,,ap-northeast-2,Amazon Elastic Compute Cloud,true,$0.281 per On Demand Linux inf1.xlarge Instance Hour,0.2810000000,Hrs,


In [10]:
ec2_df.dtypes

servicecode                    object
location                       object
locationType                   object
instanceType                   object
currentGeneration              object
instanceFamily                 object
vcpu                           object
physicalProcessor              object
clockSpeed                     object
memory                         object
storage                        object
networkPerformance             object
processorArchitecture          object
tenancy                        object
operatingSystem                object
licenseModel                   object
usagetype                      object
operation                      object
availabilityzone               object
capacitystatus                 object
classicnetworkingsupport       object
dedicatedEbsThroughput         object
ecu                            object
enhancedNetworkingSupported    object
gpuMemory                      object
instancesku                    object
intelAvxAvai

In [13]:
# EBS 가격정보 dataframe으로 변환

ebs_df = flatten_price(raw_ec2_price_data, "Storage")
ebs_filters = {
    "volumeApiName": ["gp2", "gp3"],
}
ebs_df = filter_df(ebs_df, ebs_filters)

In [14]:
ebs_df

Unnamed: 0,servicecode,location,locationType,storageMedia,volumeType,maxVolumeSize,maxIopsvolume,maxThroughputvolume,usagetype,operation,regionCode,servicename,volumeApiName,description,pricePerUnit,unit,maxIopsBurstPerformance
0,AmazonEC2,Asia Pacific (Seoul),AWS Region,SSD-backed,General Purpose,16 TiB,16000,1000 MiB/s,APN2-EBS:VolumeUsage.gp3,,ap-northeast-2,Amazon Elastic Compute Cloud,gp3,$0.0912 per GB-month of General Purpose (gp3) provisioned storage - Asia Pacific (Seoul),0.0912,GB-Mo,
5,AmazonEC2,Asia Pacific (Seoul),AWS Region,SSD-backed,General Purpose,16 TiB,16000,250 MiB/s,APN2-EBS:VolumeUsage.gp2,,ap-northeast-2,Amazon Elastic Compute Cloud,gp2,$0.114 per GB-month of General Purpose SSD (gp2) provisioned storage - Asia Pacific (Seoul),0.114,GB-Mo,3000 for volumes <= 1 TiB


In [15]:
# Load Balancer 가격정보 dataframe으로 변환
alb_df = flatten_price(raw_ec2_price_data, "Load Balancer-Application")
alb_filters = {"usagetype": "APN2-LoadBalancerUsage"}
alb_df = filter_df(alb_df, alb_filters)
alb_df

Unnamed: 0,servicecode,location,locationType,group,groupDescription,usagetype,operation,regionCode,servicename,description,pricePerUnit,unit
0,AmazonEC2,Asia Pacific (Seoul),AWS Region,ELB:Balancer,LoadBalancer hourly usage by Application Load Balancer,APN2-LoadBalancerUsage,LoadBalancing:Application,ap-northeast-2,Amazon Elastic Compute Cloud,$0.0225 per Application LoadBalancer-hour (or partial hour),0.0225,Hrs


In [16]:
nlb_df = flatten_price(raw_ec2_price_data, "Load Balancer-Network")
nlb_filters = {"usagetype": "APN2-LoadBalancerUsage"}
nlb_df = filter_df(nlb_df, nlb_filters)
nlb_df

Unnamed: 0,servicecode,location,locationType,group,groupDescription,usagetype,operation,regionCode,servicename,description,pricePerUnit,unit
0,AmazonEC2,Asia Pacific (Seoul),AWS Region,ELB:Balancer,LoadBalancer hourly usage by Network Load Balancer,APN2-LoadBalancerUsage,LoadBalancing:Network,ap-northeast-2,Amazon Elastic Compute Cloud,$0.0225 per Network LoadBalancer-hour (or partial hour),0.0225,Hrs


In [29]:
nat_df = flatten_price(raw_ec2_price_data, "NAT Gateway")
nat_df

Unnamed: 0,servicecode,location,locationType,group,groupDescription,usagetype,operation,regionCode,servicename,description,pricePerUnit,unit
0,AmazonEC2,Asia Pacific (Seoul),AWS Region,NGW:NatGateway,Charge for per GB data processed by NatGateways,APN2-NatGateway-Bytes,NatGateway,ap-northeast-2,Amazon Elastic Compute Cloud,$0.059 per GB Data Processed by NAT Gateways,0.059,GB
1,AmazonEC2,Asia Pacific (Seoul),AWS Region,NGW:NatGateway,Hourly charge for NAT Gateways,APN2-NatGateway-Hours,NatGateway,ap-northeast-2,Amazon Elastic Compute Cloud,$0.059 per NAT Gateway Hour,0.059,Hrs


In [30]:
nat_filter = {
    "unit": "Hrs",
    "usagetype": "APN2-NatGateway-Hours",
}
nat_df = filter_df(nat_df, nat_filter)

In [31]:
nat_df

Unnamed: 0,servicecode,location,locationType,group,groupDescription,usagetype,operation,regionCode,servicename,description,pricePerUnit,unit
1,AmazonEC2,Asia Pacific (Seoul),AWS Region,NGW:NatGateway,Hourly charge for NAT Gateways,APN2-NatGateway-Hours,NatGateway,ap-northeast-2,Amazon Elastic Compute Cloud,$0.059 per NAT Gateway Hour,0.059,Hrs


## 2-2. RDS/Aurora 가격정보 전처리

In [4]:
with open("AmazonRDS_2024-10-03.json", "r") as f:
    raw_rds_price_data = json.load(f)

In [5]:
rds_df = flatten_price(raw_rds_price_data, "Database Instance")
rds_filters = {
    "deploymentOption": "Single-AZ",
    "locationType": "AWS Region",
    "currentGeneration": "Yes",
}
rds_df = filter_df(rds_df, rds_filters)

In [6]:
rds_df

Unnamed: 0,servicecode,location,locationType,instanceType,currentGeneration,instanceFamily,vcpu,physicalProcessor,memory,storage,...,servicename,description,pricePerUnit,unit,deploymentModel,clockSpeed,dedicatedEbsThroughput,enhancedNetworkingSupported,processorFeatures,pricingUnit
0,AmazonRDS,Asia Pacific (Seoul),AWS Region,db.r5b.2xlarge.tpc1.mem2x,Yes,Memory optimized,8,Intel Xeon Platinum 8000 series,128 GiB,EBS Only,...,Amazon Relational Database Service,USD 2.634 per RDS db.r5b.2xlarge.tpc1.mem2x Si...,2.6340000000,Hrs,,,,,,
5,AmazonRDS,Asia Pacific (Seoul),AWS Region,db.r5b.large,Yes,Memory optimized,2,Intel Xeon Platinum 8000 series,16 GiB,EBS Only,...,Amazon Relational Database Service,USD 0.329 per RDS db.r5b.large Single-AZ insta...,0.3290000000,Hrs,,Up to 3.1 GHz,156 Mbps,Yes,,
10,AmazonRDS,Asia Pacific (Seoul),AWS Region,db.m6gd.16xlarge,Yes,General purpose,64,AWS Graviton2,256 GiB,2 x 1900 NVMe SSD,...,Amazon Relational Database Service,$ 8.352 per RDS db.m6gd.16xlarge Single-AZ ins...,8.3520000000,Hrs,,,,,,
11,AmazonRDS,Asia Pacific (Seoul),AWS Region,db.r6i.8xlarge,Yes,Memory optimized,32,Intel Xeon Scalable Ice Lake,256 GiB,EBS Only,...,Amazon Relational Database Service,USD 4.432 per RDS db.r6i.8xlarge Single-AZ ins...,4.4320000000,Hrs,,,,,,
14,AmazonRDS,Asia Pacific (Seoul),AWS Region,db.r5.8xlarge,Yes,Memory optimized,32,Intel Xeon Platinum 8175,256 GiB,Aurora IO Optimization Mode,...,Amazon Relational Database Service,$7.2800 per RDS db.r5.8xlarge IO-optimized Sin...,7.2800000000,Hrs,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2632,AmazonRDS,Asia Pacific (Seoul),AWS Region,db.r5b.8xlarge.tpc2.mem3x,Yes,Memory optimized,32,Intel Xeon Platinum 8000 series,768 GiB,EBS Only,...,Amazon Relational Database Service,USD 15.806 per RDS db.r5b.8xlarge.tpc2.mem3x S...,15.8060000000,Hrs,,,,,,
2633,AmazonRDS,Asia Pacific (Seoul),AWS Region,db.r5.8xlarge,Yes,Memory optimized,32,Intel Xeon Platinum 8175,256 GiB,EBS Only,...,Amazon Relational Database Service,$ 4.56 per RDS db.r5.8xlarge Single-AZ instanc...,4.5600000000,Hrs,,,,,,
2635,AmazonRDS,Asia Pacific (Seoul),AWS Region,db.x2idn.24xlarge,Yes,Memory optimized,96,Intel Xeon Scalable Ice Lake,1536 GiB,2 x 1425 NVMe SSD,...,Amazon Relational Database Service,$ 24.384 per RDS db.x2idn.24xlarge Single-AZ i...,24.3840000000,Hrs,,,,,,
2636,AmazonRDS,Asia Pacific (Seoul),AWS Region,db.m6g.4xlarge,Yes,General purpose,16,AWS Graviton2,64 GiB,EBS Only,...,Amazon Relational Database Service,$ 1.678 per RDS db.m6g.4xlarge Single-AZ insta...,1.6780000000,Hrs,,,,,,


In [7]:
rds_df = rds_df.drop_duplicates(subset=["instanceType", "pricePerUnit"])
rds_columns = [
    "servicecode", 
    "instanceType",
    "databaseEngine",
    "vcpu",
    "memory",
    "pricePerUnit",
    "unit",
]
rds_df = rds_df[rds_columns]
rds_df

Unnamed: 0,servicecode,instanceType,databaseEngine,vcpu,memory,pricePerUnit,unit
0,AmazonRDS,db.r5b.2xlarge.tpc1.mem2x,Oracle,8,128 GiB,2.6340000000,Hrs
5,AmazonRDS,db.r5b.large,Oracle,2,16 GiB,0.3290000000,Hrs
10,AmazonRDS,db.m6gd.16xlarge,PostgreSQL,64,256 GiB,8.3520000000,Hrs
11,AmazonRDS,db.r6i.8xlarge,Oracle,32,256 GiB,4.4320000000,Hrs
14,AmazonRDS,db.r5.8xlarge,Aurora MySQL,32,256 GiB,7.2800000000,Hrs
...,...,...,...,...,...,...,...
2604,AmazonRDS,db.t3.2xlarge,SQL Server,8,32 GiB,4.6320000000,Hrs
2612,AmazonRDS,db.m6g.2xlarge,MySQL,8,32 GiB,0.8390000000,Hrs
2624,AmazonRDS,db.m5.2xlarge,SQL Server,8,32 GiB,2.7230000000,Hrs
2632,AmazonRDS,db.r5b.8xlarge.tpc2.mem3x,Oracle,32,768 GiB,15.8060000000,Hrs


In [44]:
rds_df.head()

Unnamed: 0,instanceType,databaseEngine,vcpu,memory,pricePerUnit,unit
0,db.r5b.2xlarge.tpc1.mem2x,Oracle,8,128 GiB,2.634,Hrs
5,db.r5b.large,Oracle,2,16 GiB,0.329,Hrs
10,db.m6gd.16xlarge,PostgreSQL,64,256 GiB,8.352,Hrs
11,db.r6i.8xlarge,Oracle,32,256 GiB,4.432,Hrs
14,db.r5.8xlarge,Aurora MySQL,32,256 GiB,7.28,Hrs


In [45]:
len(rds_df)

551

## 2-3. EKS 가격정보 전처리

In [88]:
with open("AmazonEKS_2024-10-03.json", "r") as f:
    raw_eks_price_data = json.load(f)

In [91]:
eks_df = flatten_price(raw_eks_price_data, "Compute")
eks_filters = {
    "locationType": "AWS Region",
    "usagetype": "APN2-AmazonEKS-Hours:perCluster",
    
}
eks_df = filter_df(eks_df, eks_filters)

In [92]:
eks_df

Unnamed: 0,servicecode,location,locationType,usagetype,operation,regionCode,servicename,storagetype,description,pricePerUnit,unit,tenancy,memorytype,tiertype,cputype
2,AmazonEKS,Asia Pacific (Seoul),AWS Region,APN2-AmazonEKS-Hours:perCluster,CreateOperation,ap-northeast-2,Amazon Elastic Container Service for Kubernetes,,Amazon EKS local cluster usage on AWS Outposts,0.1,hours,,,HAStandard,


## 2-4. ElastiCache 가격정보 전처리

In [68]:
with open("AmazonElastiCache_2024-10-03.json", "r") as f:
    raw_redis_price_data = json.load(f)

In [75]:
redis_df = flatten_price(raw_redis_price_data, "Cache Instance")
redis_filters = {
    "locationType": "AWS Region",
    "currentGeneration": "Yes",
}
redis_df = filter_df(redis_df, redis_filters)

In [76]:
len(redis_df)

120

In [77]:
redis_df = redis_df.drop_duplicates(subset=["instanceType", "pricePerUnit"])
# redis_columns = [
#     "servicecode", 
#     "instanceType",
#     "cacheEngine",
#     "vcpu",
#     "memory",
#     "pricePerUnit",
#     "unit",
# ]
# redis_df = redis_df[redis_columns]

In [78]:
redis_df.head()

Unnamed: 0,servicecode,location,locationType,instanceType,currentGeneration,instanceFamily,vcpu,memory,networkPerformance,cacheEngine,usagetype,operation,regionCode,servicename,description,pricePerUnit,unit
1,AmazonElastiCache,Asia Pacific (Seoul),AWS Region,cache.m7g.4xlarge,Yes,Standard,16,52.26 GiB,Up to 15 Gigabit,Redis,APN2-NodeUsage:cache.m7g.4xlarge,CreateCacheCluster:0002,ap-northeast-2,Amazon ElastiCache,$1.54 per Gen Purpose m7g.4xlarge node hour running Redis,1.54,Hrs
2,AmazonElastiCache,Asia Pacific (Seoul),AWS Region,cache.t2.small,Yes,Standard,1,1.55 GiB,Low to Moderate,Redis,APN2-NodeUsage:cache.t2.small,CreateCacheCluster:0002,ap-northeast-2,Amazon ElastiCache,$0.052 per T2 Small Cache node-hour (or partial hour) running Redis,0.052,Hrs
6,AmazonElastiCache,Asia Pacific (Seoul),AWS Region,cache.m5.2xlarge,Yes,Standard,8,26.04 GiB,Up to 10 Gigabit,Redis,APN2-NodeUsage:cache.m5.2xlarge,CreateCacheCluster:0002,ap-northeast-2,Amazon ElastiCache,$0.765 per Gen Purpose M5.2xlarge node hour running Redis,0.765,Hrs
7,AmazonElastiCache,Asia Pacific (Seoul),AWS Region,cache.r4.2xlarge,Yes,Memory optimized,8,50.47 GiB,Up to 10 Gigabit,Memcached,APN2-NodeUsage:cache.r4.2xlarge,CreateCacheCluster:0001,ap-northeast-2,Amazon ElastiCache,$1.092 per Enhanced High memory R4.2xlarge node hour running Memcached,1.092,Hrs
10,AmazonElastiCache,Asia Pacific (Seoul),AWS Region,cache.r5.xlarge,Yes,Memory optimized,4,26.32 GiB,Up to 10 Gigabit,Memcached,APN2-NodeUsage:cache.r5.xlarge,CreateCacheCluster:0001,ap-northeast-2,Amazon ElastiCache,$0.517 per Enhanced High memory R5.xlarge node hour running Memcached,0.517,Hrs


## 2-5 S3 가격정보 전처리

In [49]:
with open("AmazonS3_2024-10-03.json", "r") as f:
    raw_s3_price_data = json.load(f)

In [62]:
s3_df = flatten_price(raw_s3_price_data, "Storage")
s3_filters = {
    # "availability": ["99.99%", "99.9%", "99.5%"]
    "storageClass": [
        "General Purpose",
        "Infrequent Access",
        "Archive Instant Retrieval",
    ]
}
s3_df = filter_df(s3_df, s3_filters)

In [63]:
s3_df

Unnamed: 0,servicecode,location,locationType,availability,storageClass,volumeType,usagetype,operation,durability,regionCode,servicename,description,pricePerUnit,unit,overhead
6,AmazonS3,Asia Pacific (Seoul),AWS Region,99.99%,General Purpose,Standard,APN2-TimedStorage-ByteHrs,,99.999999999%,ap-northeast-2,Amazon Simple Storage Service,$0.025 per GB - first 50 TB / month of storage used,0.025,GB-Mo,
8,AmazonS3,Asia Pacific (Seoul),AWS Region,99.9%,Infrequent Access,Standard - Infrequent Access,APN2-TimedStorage-SIA-ByteHrs,,99.999999999%,ap-northeast-2,Amazon Simple Storage Service,$0.0138 per GB-Month of storage used in Standard-Infrequent Access,0.0138,GB-Mo,
10,AmazonS3,Asia Pacific (Seoul),AWS Region,99.9%,Archive Instant Retrieval,Glacier Instant Retrieval,APN2-TimedStorage-GIR-ByteHrs,,99.999999999%,ap-northeast-2,Amazon Simple Storage Service,$0.005 per GB-Month of storage used in Glacier Instant Retrieval,0.005,GB-Mo,
14,AmazonS3,Asia Pacific (Seoul),AWS Region,99.5%,Infrequent Access,One Zone - Infrequent Access,APN2-TimedStorage-ZIA-ByteHrs,,99.999999999%,ap-northeast-2,Amazon Simple Storage Service,$0.011 per GB-Month of storage used in One Zone-Infrequent Access,0.011,GB-Mo,


데이터 처리 과정

1. 가격 파일 엑셀로 다운로드 (다운받을 리소스는 환경변수로 관리)
2. json 파일로 오픈
3. dataframe 변환 (flatten_price)
4. 필터링 (filter_df, 필터할 요소는 따로 yaml로 관리?)

# 3. 데이터 저장

* dynamoDB에 데이터 저장
* version (날짜) 확인해서 저장