# AWS RDS - CPU
This notebook shows the CPU on an AWS RDS: 

This early version of a notebook brings the data as is, without any analysis of the information. 
## Cells
- Configuration - open an AWS session to CloudWatch. 
- Get a lits of RDS servers
- Show the CPU (min, max, avg) of a given RDS, between two dates. 

## Configuration
- Configure the AWS credentials: access_key_id, secret_access_key, region_name
- Configure the PG connections string

In [6]:
import configparser

# Read from the Config file
try: 
    config = configparser.ConfigParser() 
    config.read_file(open(r'../ipynb.cfg'))
except Exception as e:
    print(f"Error opening the configuration file: {e}")

try: 
    # con_str = config.get('con_str', 'PG_AIRBASES')
    access_key_id = config.get('credentials', 'ACCESS_KEY_ID') 
    secret_access_key = config.get('credentials', 'SECRET_ACCESS_KEY')   
    region_name =  'eu-central-1' 
except Exception as e:
    print(f"Error opening the configuration file: {e}")



## Prereq
- pip install boto3
- pip install dash

## Get a list of RDS Instances
The AWS credentials might manage many RDS instances. 

In [7]:
import boto3

def create_aws_session(access_key_id, secret_access_key, region_name):
    try:
        # Create a session using AWS credentials
        session = boto3.Session(
            aws_access_key_id=access_key_id,
            aws_secret_access_key=secret_access_key,
            region_name=region_name
        )
        return session
    except Exception as e:
        print("An error occurred while creating the AWS session:", str(e))
        return None

def list_rds_instances(session):
    try:
        if session is None:
            print("AWS session is not valid.")
            return []

        # Create an RDS client using the session
        rds_client = session.client('rds')

        # Retrieve the RDS instances
        response = rds_client.describe_db_instances()

        # Get the RDS instance information
        rds_instances = response['DBInstances']
        return rds_instances

    except Exception as e:
        print("An error occurred while listing RDS instances:", str(e))
        return []

session = create_aws_session(access_key_id, secret_access_key, region_name)
cloudwatch = session.client('cloudwatch')

rds_instances = list_rds_instances(session)

if len(rds_instances) > 0:
    print("RDS Instances:")
    for instance in rds_instances:
        instance_id = instance['DBInstanceIdentifier']
        instance_class = instance['DBInstanceClass']
        endpoint = instance['Endpoint']['Address']
        print("Instance ID:", instance_id)
        print("Instance Class:", instance_class)
        print("Endpoint:", endpoint)
        print("---")
else:
    print("No RDS instances found.")


RDS Instances:
Instance ID: database-2
Instance Class: db.m6g.large
Endpoint: database-2.cofhrj7zmyn4.eu-central-1.rds.amazonaws.com
---
Instance ID: db-4-v14
Instance Class: db.t4g.micro
Endpoint: db-4-v14.cofhrj7zmyn4.eu-central-1.rds.amazonaws.com
---
Instance ID: db-v-137
Instance Class: db.t4g.micro
Endpoint: db-v-137.cofhrj7zmyn4.eu-central-1.rds.amazonaws.com
---
Instance ID: metis-prod-v2
Instance Class: db.m6g.large
Endpoint: metis-prod-v2.cofhrj7zmyn4.eu-central-1.rds.amazonaws.com
---
Instance ID: mysql-for-test
Instance Class: db.t3.micro
Endpoint: mysql-for-test.cofhrj7zmyn4.eu-central-1.rds.amazonaws.com
---


## Show the CPU 
Using Boto3, to get the data from the RDS object. 

TODO: Calculate the actual CPU, memory and IO from the instance type (such as  db.m6g.large)

In [8]:
import boto3
import pandas as pd
from datetime import datetime, timedelta


# Shows the CPU utilization of an RDS instance
# period - Period for metric data retrieval (in seconds). For ex 300 means 5 minutes. 
def rds_cpu(rds_instance_id, session,  period = 300):
    try:
        metric_name = 'CPUUtilization'

        # Create a CloudWatch client using the existing AWS session
        cloudwatch = session.client('cloudwatch')

        # Calculate 7 days ago from the current time
        end_time = datetime.utcnow()
        start_time = end_time - timedelta(days=7)

        # Convert start_time and end_time to ISO format
        start_time_iso = start_time.isoformat()
        end_time_iso = end_time.isoformat()

        # Get CPU utilization metric data
        response = cloudwatch.get_metric_data(
            MetricDataQueries=[
                {
                    'Id': 'm1',
                    'MetricStat': {
                        'Metric': {
                            'Namespace': 'AWS/RDS',
                            'MetricName': metric_name,
                            'Dimensions': [
                                {
                                    'Name': 'DBInstanceIdentifier',
                                    'Value': rds_instance_id
                                },
                            ]
                        },
                        'Period': period,
                        'Stat': 'Average',
                    },
                    'ReturnData': True,
                },
            ],
            StartTime=start_time_iso,
            EndTime=end_time_iso,
        )

        # Extract and return the CPU utilization data as a Pandas DataFrame
        timestamps = response['MetricDataResults'][0]['Timestamps']
        values = response['MetricDataResults'][0]['Values']
        cpu_data = pd.DataFrame({'Timestamp': timestamps, metric_name: values})

        return cpu_data

    except Exception as e:
        print("An error occurred while retrieving CPU utilization:", str(e))
        return None

# Replace these values with your AWS credentials and RDS instance name
rds_instance_id = 'database-2'

# Get the CPU utilization data for the specified RDS instance using the existing session
cpu_data = rds_cpu(rds_instance_id, session)

# Print the CPU utilization data
if cpu_data is not None:
    print(cpu_data)


                     Timestamp  CPUUtilization
0    2023-09-17 12:32:00+00:00        7.205000
1    2023-09-17 12:27:00+00:00        7.076667
2    2023-09-17 12:22:00+00:00        7.193333
3    2023-09-17 12:17:00+00:00        6.823333
4    2023-09-17 12:12:00+00:00        6.688333
...                        ...             ...
2011 2023-09-10 12:57:00+00:00       11.593333
2012 2023-09-10 12:52:00+00:00        9.566667
2013 2023-09-10 12:47:00+00:00        9.150000
2014 2023-09-10 12:42:00+00:00        8.660000
2015 2023-09-10 12:37:00+00:00        8.856667

[2016 rows x 2 columns]


In [9]:
import boto3
import pandas as pd
from datetime import datetime, timedelta

# Function to round a datetime object to the nearest 10 minutes
def round_to_nearest_10_minutes(dt):
    minute = (dt.minute // 10) * 10
    return dt.replace(second=0, microsecond=0, minute=minute)

# Shows the CPU utilization of an RDS instance
def rds_cpu(rds_instance_id, session, period=300):
    try:
        metric_name = 'CPUUtilization'

        # Create a CloudWatch client using the existing AWS session
        cloudwatch = session.client('cloudwatch')

        # Calculate the end time (now) and round it to the nearest 10 minutes
        end_time = round_to_nearest_10_minutes(datetime.utcnow())

        # Calculate the start time as 7 days ago from the end time
        start_time = end_time - timedelta(days=3)

        # Convert start_time and end_time to ISO format
        start_time_iso = start_time.isoformat()
        end_time_iso = end_time.isoformat()

        # Get CPU utilization metric data (average, minimum, and maximum)
        response = cloudwatch.get_metric_data(
            MetricDataQueries=[
                {
                    'Id': 'm1',
                    'MetricStat': {
                        'Metric': {
                            'Namespace': 'AWS/RDS',
                            'MetricName': metric_name,
                            'Dimensions': [
                                {
                                    'Name': 'DBInstanceIdentifier',
                                    'Value': rds_instance_id
                                },
                            ]
                        },
                        'Period': period,
                        'Stat': 'Average',
                    },
                    'ReturnData': True,
                },
                {
                    'Id': 'm2',
                    'MetricStat': {
                        'Metric': {
                            'Namespace': 'AWS/RDS',
                            'MetricName': metric_name,
                            'Dimensions': [
                                {
                                    'Name': 'DBInstanceIdentifier',
                                    'Value': rds_instance_id
                                },
                            ]
                        },
                        'Period': period,
                        'Stat': 'Minimum',
                    },
                    'ReturnData': True,
                },
                {
                    'Id': 'm3',
                    'MetricStat': {
                        'Metric': {
                            'Namespace': 'AWS/RDS',
                            'MetricName': metric_name,
                            'Dimensions': [
                                {
                                    'Name': 'DBInstanceIdentifier',
                                    'Value': rds_instance_id
                                },
                            ]
                        },
                        'Period': period,
                        'Stat': 'Maximum',
                    },
                    'ReturnData': True,
                },
            ],
            StartTime=start_time_iso,
            EndTime=end_time_iso,
        )

        # Extract and return the CPU utilization data as a Pandas DataFrame
        avg_values = response['MetricDataResults'][0]['Values']
        min_values = response['MetricDataResults'][1]['Values']
        max_values = response['MetricDataResults'][2]['Values']
        timestamps = response['MetricDataResults'][0]['Timestamps']

        cpu_data = pd.DataFrame({
            'Timestamp': timestamps,
            'Average_CPUUtilization': avg_values,
            'Minimum_CPUUtilization': min_values,
            'Maximum_CPUUtilization': max_values
        })

        return cpu_data

    except Exception as e:
        print("An error occurred while retrieving CPU utilization:", str(e))
        return None

# Replace these values with your AWS credentials and RDS instance name
rds_instance_id = 'database-2'

# Get the CPU utilization data for the specified RDS instance using the existing session
cpu_data = rds_cpu(rds_instance_id, session, period=600)  # Use a different period (e.g., 600) if needed

# Print the CPU utilization data
#if cpu_data is not None:
#     print(cpu_data)


In [10]:
import plotly.subplots as sp
import plotly.graph_objs as go
import pandas as pd
from datetime import datetime, timedelta

# Assuming you have the cpu_data DataFrame containing the CPU utilization data
# Make sure the DataFrame is correctly populated from your previous code

# Create a subplot with multiple lines
fig = sp.make_subplots(rows=1, cols=1)  # You can adjust the number of rows and columns as needed

# Add traces for average, minimum, and maximum CPU utilization
trace_avg = go.Scatter(x=cpu_data['Timestamp'], y=cpu_data['Average_CPUUtilization'], mode='lines', name='Average CPU Utilization')
trace_min = go.Scatter(x=cpu_data['Timestamp'], y=cpu_data['Minimum_CPUUtilization'], mode='lines', name='Minimum CPU Utilization')
trace_max = go.Scatter(x=cpu_data['Timestamp'], y=cpu_data['Maximum_CPUUtilization'], mode='lines', name='Maximum CPU Utilization')

# Add the traces to the subplot
fig.add_trace(trace_avg)
fig.add_trace(trace_min)
fig.add_trace(trace_max)

# Customize the layout
fig.update_layout(
    title='RDS CPU Utilization Over Time',
    xaxis=dict(title='Timestamp'),
    yaxis=dict(title='CPU Utilization (%)'),
    legend=dict(title='Metric Type')
)

# Show the interactive chart
fig.show()
