# Networking

## Dependencies

In [102]:
from packages.helpers.helpers import joel_boto
import boto3

## Functions

In [103]:
def dependent_variables():
    website_name = website_name_com[:-4]
    www_domain = f"www.{website_name_com}"
    s3_log_bucket_name = website_name_com + '-logs'
    cloudfront_logs_location = f's3://{s3_log_bucket_name}/AWSLogs/'
    ATHENA_DATABASE = website_name + "_website"
    ATHENA_TABLE = "cloudfront_logs"
    ATHENA_OUTPUT_BUCKET = f"s3://{s3_log_bucket_name}/{website_name}_website_athena_parquet/" 
    S3_WEBSITE_ENDPOINT = f"{website_name_com}.s3.{aws_region}.amazonaws.com"

    return website_name, www_domain, s3_log_bucket_name, cloudfront_logs_location, ATHENA_DATABASE, ATHENA_TABLE, ATHENA_OUTPUT_BUCKET, S3_WEBSITE_ENDPOINT

In [104]:
def create_acm_certificate():

    www_name_2 = 'www.' + website_name_com
    subject_alternative_names = [www_name_2]

    # Create ACM client in us-east-1 — CloudFront only uses this region
    acm = boto3.client('acm', region_name='us-east-1')

    # Request a public certificate
    response = acm.request_certificate(
        DomainName=website_name_com,
        ValidationMethod='DNS',
        SubjectAlternativeNames=subject_alternative_names,
        Options={
            'CertificateTransparencyLoggingPreference': 'ENABLED'
        },
        IdempotencyToken=f'{website_name}cert01',  # Can be any unique token to avoid duplicates
        Tags=[
            {'Key': 'Name', 'Value': f'{website_name}-ssl'}
        ]
    )

    return response['CertificateArn']

In [105]:
def get_website_vistor_counts():
    create_and_fill_table_query = f"""
        SELECT COUNT(DISTINCT c_ip) AS unique_visitors
        FROM {ATHENA_DATABASE}.{ATHENA_TABLE}
        WHERE sc_status IN (200, 304)
        AND x_edge_result_type IN ('Hit', 'Miss');
    """
    x = jb.query_athena(create_and_fill_table_query, ATHENA_DATABASE, ATHENA_OUTPUT_BUCKET)
    x = jb.create_df_from_athena_query(x)
    total_unique_visitors = x["Unique Visitors"][0]

    create_and_fill_table_query = f"""
        SELECT COUNT(*) AS successful_requests
        FROM {ATHENA_DATABASE}.{ATHENA_TABLE}
        WHERE sc_status IN (200, 304)
        AND x_edge_result_type IN ('Hit', 'Miss');
    """
    x = jb.query_athena(create_and_fill_table_query, ATHENA_DATABASE, ATHENA_OUTPUT_BUCKET)
    x = jb.create_df_from_athena_query(x)
    total_requests = x["Successful Requests"][0]

    return total_unique_visitors, total_requests

In [106]:
def create_cloudfront_oac():
    client = boto3.client("cloudfront")
    
    # Generate a unique name for the OAC
    oac_name = f"{website_name}.com"
    
    response = client.create_origin_access_control(
        OriginAccessControlConfig={
            "Name": oac_name,
            "Description": "OAC for accessing private S3 bucket",
            "SigningProtocol": "sigv4",
            "SigningBehavior": "always",
            "OriginAccessControlOriginType": "s3"
        }
    )

    oac_id = response['OriginAccessControl']['Id']

    return oac_id

In [107]:
def delete_cloudfront_oac(oac_id):
    client = boto3.client("cloudfront")

    # Get the current config and ETag required for deletion
    get_resp = client.get_origin_access_control(Id=oac_id)
    etag = get_resp['ETag']

    # Now delete using the Id and ETag
    del_resp = client.delete_origin_access_control(
        Id=oac_id,
        IfMatch=etag
    )

    print(f"✅ Deleted CloudFront OAC: {oac_id}")
    return del_resp

In [122]:
def create_cloudfront_distribution_with_oac(
    bucket_name,
    aws_region,
    domain_name,
    www_domain,
    ssl_cert_arn,
    oac_id
):
    cf = boto3.client("cloudfront")

    origin_domain = f"{bucket_name}.s3.{aws_region}.amazonaws.com"

    response = cf.create_distribution(
        DistributionConfig={
            "CallerReference": domain_name,
            "Origins": {
                "Quantity": 1,
                "Items": [
                    {
                        "Id": domain_name,
                        "DomainName": origin_domain,
                        "OriginAccessControlId": oac_id,
                        "S3OriginConfig": {
                            "OriginAccessIdentity": ""
                        }
                    }
                ]
            },
            "DefaultCacheBehavior": {
                "TargetOriginId": domain_name,
                "ViewerProtocolPolicy": "redirect-to-https",
                "AllowedMethods": {
                    "Quantity": 2,
                    "Items": ["GET", "HEAD"],
                    "CachedMethods": {
                        "Quantity": 2,
                        "Items": ["GET", "HEAD"]
                    }
                },
                "Compress": True,
                "ForwardedValues": {
                    "QueryString": False,
                    "Cookies": {"Forward": "none"}
                },
                "TrustedSigners": {"Enabled": False, "Quantity": 0},
                "TrustedKeyGroups": {"Enabled": False, "Quantity": 0},
                "MinTTL": 0,
                "DefaultTTL": 86400,
                "MaxTTL": 31536000
            },
            "DefaultRootObject": "index.html",
            "Logging": {
                "Enabled": True,
                "IncludeCookies": False,
                "Bucket": f"{s3_log_bucket_name}.s3.amazonaws.com",
                "Prefix": f"AWSLogs/{jb.account_id}/CloudFront/"
            },
            "Enabled": True,
            "ViewerCertificate": {
                "ACMCertificateArn": ssl_cert_arn,
                "SSLSupportMethod": "sni-only",
                "MinimumProtocolVersion": "TLSv1.2_2021",
                "Certificate": ssl_cert_arn,
                "CertificateSource": "acm"
            },
            "Aliases": {
                "Quantity": 2,
                "Items": [domain_name, www_domain]
            },
            "Comment": f"CloudFront distribution for {domain_name}"
        }
    )

    dist_id = response['Distribution']['Id']
    dist_domain = response['Distribution']['DomainName']

    print(f"✅ CloudFront distribution created:")
    print(f"  ID: {dist_id}")
    print(f"  Domain name: {dist_domain}")

    return response

## Custom Variables

In [109]:
website_name_com = 'chalkjuice.com'
aws_region = jb.region

### Dependent Variables():

In [110]:
# Connect to custom AWS class
jb = joel_boto()
aws_region = jb.region
website_name, www_domain, s3_log_bucket_name, cloudfront_logs_location, ATHENA_DATABASE, ATHENA_TABLE, ATHENA_OUTPUT_BUCKET, S3_WEBSITE_ENDPOINT = dependent_variables()

running local credentials
✅ Logged in to ECR successfully.
✅ Connected to all clients successfully.


## Create s3 buckets

In [111]:
# TO DO: Manual for now

## Setup cloudfront logs with athena

In [112]:
create_db_query = f"CREATE DATABASE IF NOT EXISTS {ATHENA_DATABASE}"
jb.query_athena(create_db_query, ATHENA_DATABASE, ATHENA_OUTPUT_BUCKET)

Query finished with status: SUCCEEDED


'e8521cdf-870a-44b9-8285-f68f11a24a3c'

In [113]:
remove_table_query = f'DROP TABLE IF EXISTS {ATHENA_DATABASE}.{ATHENA_TABLE};'
jb.query_athena(remove_table_query, ATHENA_DATABASE, ATHENA_OUTPUT_BUCKET)

Query finished with status: SUCCEEDED


'14e1db20-c63d-48f8-9eb6-951241839141'

In [114]:
create_and_fill_table_query = f"""
    CREATE EXTERNAL TABLE IF NOT EXISTS {ATHENA_DATABASE}.{ATHENA_TABLE} (
    `date` date,
    time string,
    x_edge_location string,
    sc_bytes bigint,
    c_ip string,
    cs_method string,
    cs_host string,
    cs_uri_stem string,
    sc_status int,
    cs_referer string,
    cs_user_agent string,
    cs_uri_query string,
    cs_cookie string,
    x_edge_result_type string,
    x_edge_request_id string,
    x_host_header string,
    cs_protocol string,
    cs_bytes bigint,
    time_taken double,
    x_forwarded_for string,
    ssl_protocol string,
    ssl_cipher string,
    x_edge_response_result_type string,
    cs_protocol_version string,
    fle_status string,
    fle_encrypted_fields string,
    c_port int,
    time_to_first_byte double,
    x_edge_detailed_result_type string,
    sc_content_type string,
    sc_content_len string,
    sc_range_start string,
    sc_range_end string
    )
    ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
    WITH SERDEPROPERTIES (
    'field.delim' = '\t',
    'serialization.format' = '\t'
    )
    LOCATION '{cloudfront_logs_location}'
    TBLPROPERTIES ('skip.header.line.count'='2');
"""
jb.query_athena(create_and_fill_table_query, ATHENA_DATABASE, ATHENA_OUTPUT_BUCKET)

Query finished with status: SUCCEEDED


'b49e19d8-6169-468e-8308-7c83b727ad70'

### Monitor website visitor count

In [115]:
total_unique_visitors, total_requests = get_website_vistor_counts()
print("Total Unique Visitors: ", total_unique_visitors, "Total Requests: ", total_requests)

Query finished with status: SUCCEEDED
Query finished with status: SUCCEEDED
Total Unique Visitors:  8 Total Requests:  11


## Deploy Site via Cloudfront 

In [117]:
# Get the certificate ARN
acm_cert_arn = create_acm_certificate()

## Create cloudfront oac
#delete_cloudfront_oac("E3MR1IF04E4HTX") # If need be while developing
oac_id = create_cloudfront_oac()


In [123]:
## Create cloudFront
create_cloudfront_distribution_with_oac(website_name_com,aws_region, website_name_com, www_domain, acm_cert_arn, oac_id)

✅ CloudFront distribution created:
  ID: E3R61PZPW95NK0
  Domain name: d2ywzz6etyffco.cloudfront.net


{'ResponseMetadata': {'RequestId': '6c7aa92f-2ca2-4d34-b7d1-a06e6a40629f',
  'HTTPStatusCode': 201,
  'HTTPHeaders': {'x-amzn-requestid': '6c7aa92f-2ca2-4d34-b7d1-a06e6a40629f',
   'etag': 'E3BJAFEA6CK16',
   'location': 'https://cloudfront.amazonaws.com/2020-05-31/distribution/E3R61PZPW95NK0',
   'content-type': 'text/xml',
   'content-length': '4028',
   'date': 'Fri, 13 Jun 2025 10:57:37 GMT'},
  'RetryAttempts': 0},
 'Location': 'https://cloudfront.amazonaws.com/2020-05-31/distribution/E3R61PZPW95NK0',
 'ETag': 'E3BJAFEA6CK16',
 'Distribution': {'Id': 'E3R61PZPW95NK0',
  'ARN': 'arn:aws:cloudfront::026090519913:distribution/E3R61PZPW95NK0',
  'Status': 'InProgress',
  'LastModifiedTime': datetime.datetime(2025, 6, 13, 10, 57, 36, 891000, tzinfo=tzutc()),
  'InProgressInvalidationBatches': 0,
  'DomainName': 'd2ywzz6etyffco.cloudfront.net',
  'ActiveTrustedSigners': {'Enabled': False, 'Quantity': 0},
  'ActiveTrustedKeyGroups': {'Enabled': False, 'Quantity': 0},
  'DistributionConfi

## add cname and A records to route 53

In [101]:
# Do this step manually. Ensure set-up looks good. 

## Update 'chalkjuice.com' s3 bucket policy to include new distrobution id