# EKS CSI EBS Setup
https://docs.aws.amazon.com/eks/latest/userguide/eks-ug.pdf

In [31]:
#!pygmentize eks-ebs-storage-class.yaml

In [32]:
#!kubectl delete -f eks-ebs-storage-class.yaml

In [33]:
#!kubectl create -f eks-ebs-storage-class.yaml

# Setup Amazon EBS CSI driver

In [7]:
!kubectl get storageclass

NAME                        PROVISIONER                                                RECLAIMPOLICY   VOLUMEBINDINGMODE      ALLOWVOLUMEEXPANSION   AGE
gp2 (default)               kubernetes.io/aws-ebs                                      Delete          Immediate              false                  20s
local-hostpath              openebs.io/local                                           Delete          WaitForFirstConsumer   false                  70m
openebs-device              openebs.io/local                                           Delete          WaitForFirstConsumer   false                  100m
openebs-hostpath            openebs.io/local                                           Delete          WaitForFirstConsumer   false                  88m
openebs-jiva-default        openebs.io/provisioner-iscsi                               Delete          Immediate              false                  100m
openebs-snapshot-promoter   volumesnapshot.external-storage.k8s.io/snapshot-prom

In [9]:
!curl -O https://raw.githubusercontent.com/kubernetes-sigs/aws-ebs-csi-driver/v0.6.0/docs/example-iam-policy.json

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   599  100   599    0     0   2980      0 --:--:-- --:--:-- --:--:--  2980


In [10]:
!cat example-iam-policy.json

{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Effect": "Allow",
      "Action": [
        "ec2:AttachVolume",
        "ec2:CreateSnapshot",
        "ec2:CreateTags",
        "ec2:CreateVolume",
        "ec2:DeleteSnapshot",
        "ec2:DeleteTags",
        "ec2:DeleteVolume",
        "ec2:DescribeAvailabilityZones",
        "ec2:DescribeInstances",
        "ec2:DescribeSnapshots",
        "ec2:DescribeTags",
        "ec2:DescribeVolumes",
        "ec2:DescribeVolumesModifications",
        "ec2:DetachVolume",
        "ec2:ModifyVolume"
      ],
      "Resource": "*"
    }
  ]
}


In [11]:
!aws iam create-policy --policy-name Amazon_EBS_CSI_Driver \
 --policy-document file://example-iam-policy.json

{
    "Policy": {
        "PolicyName": "Amazon_EBS_CSI_Driver",
        "PolicyId": "ANPATLVNRE7W2RTE2BRUD",
        "Arn": "arn:aws:iam::231218423789:policy/Amazon_EBS_CSI_Driver",
        "Path": "/",
        "DefaultVersionId": "v1",
        "AttachmentCount": 0,
        "PermissionsBoundaryUsageCount": 0,
        "IsAttachable": true,
        "CreateDate": "2020-10-29T11:41:21Z",
        "UpdateDate": "2020-10-29T11:41:21Z"
    }
}


In [34]:
policy_arn='arn:aws:iam::231218423789:policy/Amazon_EBS_CSI_Driver'

In [35]:
!kubectl -n kube-system describe configmap aws-auth

Name:         aws-auth
Namespace:    kube-system
Labels:       <none>
Annotations:  <none>

Data
====
mapRoles:
----
- groups:
  - system:bootstrappers
  - system:nodes
  rolearn: arn:aws:iam::231218423789:role/eksctl-cluster-nodegroup-cpu-node-NodeInstanceRole-1QB4MRINE2FO9
  username: system:node:{{EC2PrivateDNSName}}

Events:  <none>


In [14]:
role_name = 'eksctl-cluster-nodegroup-cpu-node-NodeInstanceRole-1QB4MRINE2FO9'

In [15]:
!aws iam attach-role-policy \
    --policy-arn $policy_arn \
    --role-name $role_name

In [16]:
!kubectl version --client --short

Client Version: v1.15.10-eks-bac369


In [17]:
!kubectl apply -k "github.com/kubernetes-sigs/aws-ebs-csi-driver/deploy/kubernetes/overlays/stable/?ref=master"

serviceaccount/ebs-csi-controller-sa created
clusterrole.rbac.authorization.k8s.io/ebs-external-attacher-role created
clusterrole.rbac.authorization.k8s.io/ebs-external-provisioner-role created
clusterrolebinding.rbac.authorization.k8s.io/ebs-csi-attacher-binding created
clusterrolebinding.rbac.authorization.k8s.io/ebs-csi-provisioner-binding created
deployment.apps/ebs-csi-controller created
daemonset.apps/ebs-csi-node created
csidriver.storage.k8s.io/ebs.csi.aws.com created


In [36]:
# EXAMPLE CODE
#!git clone https://github.com/kubernetes-sigs/aws-ebs-csi-driver.git

# Create Storage Class 

In [19]:
!pygmentize eks-csi-ebs/storageclass.yaml

[94mkind[39;49;00m: StorageClass
[94mapiVersion[39;49;00m: storage.k8s.io/v1
[94mmetadata[39;49;00m:
  [94mannotations[39;49;00m:
    [94mstorageclass.kubernetes.io/is-default-class[39;49;00m: [33m"[39;49;00m[33mtrue[39;49;00m[33m"[39;49;00m
  [94mname[39;49;00m: ebs-sc
[94mprovisioner[39;49;00m: ebs.csi.aws.com
[94mvolumeBindingMode[39;49;00m: WaitForFirstConsumer


In [20]:
!kubectl apply -f eks-csi-ebs/storageclass.yaml

storageclass.storage.k8s.io/ebs-sc created


In [4]:
!kubectl delete -f eks-csi-ebs/storageclass.yaml

storageclass.storage.k8s.io "ebs-sc" deleted


In [23]:
!kubectl get storageclass

NAME                        PROVISIONER                                                RECLAIMPOLICY   VOLUMEBINDINGMODE      ALLOWVOLUMEEXPANSION   AGE
ebs-sc (default)            ebs.csi.aws.com                                            Delete          WaitForFirstConsumer   false                  57s
local-hostpath              openebs.io/local                                           Delete          WaitForFirstConsumer   false                  83m
openebs-device              openebs.io/local                                           Delete          WaitForFirstConsumer   false                  113m
openebs-hostpath            openebs.io/local                                           Delete          WaitForFirstConsumer   false                  101m
openebs-jiva-default        openebs.io/provisioner-iscsi                               Delete          Immediate              false                  113m
openebs-snapshot-promoter   volumesnapshot.external-storage.k8s.io/snapshot-pro

# Create Claim

In [24]:
!pygmentize eks-csi-ebs/claim.yaml

[94mapiVersion[39;49;00m: v1
[94mkind[39;49;00m: PersistentVolumeClaim
[94mmetadata[39;49;00m:
  [94mname[39;49;00m: ebs-claim
[94mspec[39;49;00m:
  [94maccessModes[39;49;00m:
    - ReadWriteOnce
  [94mstorageClassName[39;49;00m: ebs-sc
  [94mresources[39;49;00m:
    [94mrequests[39;49;00m:
      [94mstorage[39;49;00m: 5Gi


In [25]:
!kubectl apply -f eks-csi-ebs/claim.yaml

persistentvolumeclaim/ebs-claim created


In [2]:
!kubectl delete -f eks-csi-ebs/claim.yaml

persistentvolumeclaim "ebs-claim" deleted
^C


# Create Pod

In [26]:
!pygmentize bert-csi-ebs.yaml

[04m[36m---[39;49;00m 
[94mapiVersion[39;49;00m: v1
[94mkind[39;49;00m: Pod
[94mmetadata[39;49;00m:
  [94mname[39;49;00m: bert-ml-pod-eks
[94mspec[39;49;00m:
  [94mvolumes[39;49;00m:
  - [94mname[39;49;00m: persistent-storage
    [94mpersistentVolumeClaim[39;49;00m:
      [94mclaimName[39;49;00m: ebs-claim
  [94mcontainers[39;49;00m: 
    - [94mname[39;49;00m: bert
      [94mcommand[39;49;00m: 
        - python
        - /opt/ml/code/train.py
        - --train_steps_per_epoch=1
        - --epochs=1
        - --learning_rate=0.00001
        - --epsilon=0.00000001
        - --train_batch_size=36
        - --validation_batch_size=18
        - --test_batch_size=18
        - --train_steps_per_epoch=1
        - --validation_steps=1
        - --test_steps=1
        - --use_xla=True
        - --use_amp=False
        - --max_seq_length=64
        - --freeze_bert_layer=True
        - --enable_sagemaker_debugger=False
        - --enable_checkpointing=False
        - --e

In [27]:
!kubectl apply -f bert-csi-ebs.yaml

pod/bert-ml-pod-eks created


In [1]:
!kubectl get pod bert-csi-ebs

Error from server (NotFound): pods "bert-csi-ebs" not found


In [29]:
!kubectl describe pod bert-csi-ebs

Name:         bert-ml-pod-eks
Namespace:    kubeflow
Priority:     0
Node:         ip-192-168-67-206.us-west-2.compute.internal/192.168.67.206
Start Time:   Thu, 29 Oct 2020 11:58:46 +0000
Labels:       <none>
Annotations:  kubectl.kubernetes.io/last-applied-configuration:
                {"apiVersion":"v1","kind":"Pod","metadata":{"annotations":{},"name":"bert-ml-pod-eks","namespace":"kubeflow"},"spec":{"containers":[{"comm...
              kubernetes.io/psp: eks.privileged
Status:       Pending
IP:           
Containers:
  bert:
    Container ID:  
    Image:         231218423789.dkr.ecr.us-west-2.amazonaws.com/dlc-demo:bert
    Image ID:      
    Port:          <none>
    Host Port:     <none>
    Command:
      python
      /opt/ml/code/train.py
      --train_steps_per_epoch=1
      --epochs=1
      --learning_rate=0.00001
      --epsilon=0.00000001
      --train_batch_size=36
      --validation_batch_size=18
      --test_batch_size=18
      --train_steps_per_epoch=1
      --valid

In [None]:
# pvc-12f27651-af46-4dfb-a181-5c768c87add9

In [30]:
!kubectl logs -f bert-csi-ebs

python: can't open file '/opt/ml/code/train.py': [Errno 2] No such file or directory


In [37]:
!kubectl describe storageclass ebs-sc

Name:            ebs-sc
IsDefaultClass:  Yes
Annotations:     kubectl.kubernetes.io/last-applied-configuration={"apiVersion":"storage.k8s.io/v1","kind":"StorageClass","metadata":{"annotations":{"storageclass.kubernetes.io/is-default-class":"true"},"name":"ebs-sc"},"provisioner":"ebs.csi.aws.com","volumeBindingMode":"WaitForFirstConsumer"}
,storageclass.kubernetes.io/is-default-class=true
Provisioner:           ebs.csi.aws.com
Parameters:            <none>
AllowVolumeExpansion:  <unset>
MountOptions:          <none>
ReclaimPolicy:         Delete
VolumeBindingMode:     WaitForFirstConsumer
Events:                <none>


In [38]:
!kubectl get pv

NAME                                       CAPACITY   ACCESS MODES   RECLAIM POLICY   STATUS   CLAIM                          STORAGECLASS     REASON   AGE
pvc-097f3863-4fa9-4303-9195-d84d51a89985   20Gi       RWO            Delete           Bound    kubeflow/mysql-pv-claim        gp2                       53d
pvc-12f27651-af46-4dfb-a181-5c768c87add9   5Gi        RWO            Delete           Bound    kubeflow/ebs-claim             ebs-sc                    36m
pvc-36899807-7262-46fe-9627-5b3338abd535   10Gi       RWO            Delete           Bound    kubeflow/katib-mysql           gp2                       53d
pvc-47f293c3-6406-4e1d-ac68-97190bbe8e5f   10Gi       RWO            Delete           Bound    anonymous/workspace-notebook   gp2                       53d
pvc-64da95de-d5fc-4df6-b449-19439dbc7345   20Gi       RWO            Delete           Bound    kubeflow/minio-pv-claim        gp2                       53d
pvc-a252cad0-6cc0-4f08-8c81-d0e257849351   5G         RWO       

In [39]:
!kubectl describe pv pvc-12f27651-af46-4dfb-a181-5c768c87add9

Name:              pvc-12f27651-af46-4dfb-a181-5c768c87add9
Labels:            <none>
Annotations:       pv.kubernetes.io/provisioned-by: ebs.csi.aws.com
Finalizers:        [kubernetes.io/pv-protection external-attacher/ebs-csi-aws-com]
StorageClass:      ebs-sc
Status:            Bound
Claim:             kubeflow/ebs-claim
Reclaim Policy:    Delete
Access Modes:      RWO
VolumeMode:        Filesystem
Capacity:          5Gi
Node Affinity:     
  Required Terms:  
    Term 0:        topology.ebs.csi.aws.com/zone in [us-west-2d]
Message:           
Source:
    Type:              CSI (a Container Storage Interface (CSI) volume source)
    Driver:            ebs.csi.aws.com
    VolumeHandle:      vol-0e9e845913141f9db
    ReadOnly:          false
    VolumeAttributes:      storage.kubernetes.io/csiProvisionerIdentity=1603971912642-8081-ebs.csi.aws.com
Events:                <none>


In [None]:
ebs_volume='vol-0e9e845913141f9db'

In [42]:
!kubectl exec -it bert-csi-ebs ls /opt/ml/model

error: cannot exec into a container in a completed pod; current phase is Succeeded


In [44]:
!aws ec2 describe-volumes \
    --volume-ids vol-0e9e845913141f9db

{
    "Volumes": [
        {
            "Attachments": [],
            "AvailabilityZone": "us-west-2d",
            "CreateTime": "2020-10-29T11:58:37.002Z",
            "Encrypted": false,
            "Size": 5,
            "SnapshotId": "",
            "State": "available",
            "VolumeId": "vol-0e9e845913141f9db",
            "Iops": 100,
            "Tags": [
                {
                    "Key": "CSIVolumeName",
                    "Value": "pvc-12f27651-af46-4dfb-a181-5c768c87add9"
                }
            ],
            "VolumeType": "gp2",
            "MultiAttachEnabled": false
        }
    ]
}
