## S3 Bucket Operations

1. Creating client using resource
2. Upload Files to S3 Bucket
3. Download Files from S3
4. Loading CSV file from S3 bucket using Pandas
5. Upload bulk of files on S3 bucket
6. Download bulk of files from bucket
7. Getting names of all files from the bucket
8. Getting names of bucket from AWS S3 bucket

In [1]:
import boto3
import pandas as pd
import os
from botocore.client import ClientError

### 1. Creating client using resource

In [3]:
# Create S3 bucket on AWS, get aws_access_key_id and aws_secret_access_key credentials from IAM AWS

s3 = boto3.resource(
    service_name='s3',
    region_name='us-east-2',
    aws_access_key_id='<aws_access_key_id>',
    aws_secret_access_key='<aws_secret_access_key>'
)

my_bucket = s3.Bucket('predictionwaferfiles')

### 2. Upload Files to S3 Bucket

In [4]:
# Connect to bucket specifying the bucket's name created on AWS S3

my_bucket = s3.Bucket('predictionwaferfiles')

# Key name can be any name given to file to save it on S3 bucket
# Filename is the file to upload (file_path needs to specified if it's in a different folder)

my_bucket.upload_file(Filename='Input.csv', Key='Input.csv')

In [6]:
# Check if file has been uploaded

for files in my_bucket.objects.all():
    print(files.key)

Input.csv


### 3. Download Files from S3

In [10]:
# Connect to bucket
my_bucket = s3.Bucket('predictionwaferfiles')

# Creating a directory to save the file from bucket
os.makedirs("S3")

# Key is the name of the file in the bucket
# Filename is the name that you want your file to be saved as, also specifying the path where the file has to be saved

my_bucket.download_file(Filename='S3/InputDownload.csv', Key='Input.csv')

In [11]:
# Checking if file has been downloaded

os.listdir("S3")

['InputDownload.csv']

### 4. Loading CSV file from S3 bucket using Pandas

In [13]:
# Connect to bucket
my_bucket = s3.Bucket('predictionwaferfiles')


# Specify the name of file to be read and create an object
obj = my_bucket.Object('Input.csv').get()


# Read the csv file specifying the file name as obj['Body']
df = pd.read_csv(obj['Body'])
df.head()

Unnamed: 0.1,Unnamed: 0,_id,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,0,620dfa7bd16ece6bc2785777,1,0,3,Dikshya Pradhan,male,22,1,0,A/5 21171,7.25,,S
1,1,620dfa7bd16ece6bc2785778,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38,1,0,PC 17599,71.2833,C85,C
2,2,620dfa7bd16ece6bc2785779,3,1,1,Sam Claflin,female,38,1,0,PC 17599,71.2833,C85,C
3,3,620dfa7bd16ece6bc278577a,4,1,1,Chris Evans,female,38,1,0,PC 17599,71.2833,C85,C
4,4,620dfa7bd16ece6bc278577b,1,0,3,"Heikkinen, Miss. Laina",male,22,1,0,A/5 21171,7.25,,S


### 5. Upload bulk of files on S3 bucket

In [22]:
import boto3
import pandas as pd
import os
from botocore.client import ClientError

class UploadRawDataToCloud:
    """
    This class shall be used for uploading all the data to AWS cloud from the local machine.

    Written By: Dikshya Pradhan

    """

    def __init__(self):
        pass


    def trainingbatchfilestocloud(self):
        try:
            s3 = boto3.resource(
            service_name='s3',
            region_name='us-east-2',
            aws_access_key_id='<aws_access_key_id>', # get the access_key_id from IAM AWS
            aws_secret_access_key='<aws_secret_access_key>' # get the secret_access_key from IAM AWS
            ) 
            
            
            # Specify the name of bucket 
            bucket=s3.Bucket('predictionwaferfiles')


            data_path = os.path.join(os.getcwd(), 'Good_Raw/') # Specify the name of folder where files are stored
            
            # Listing the name of the files in the directory
            files = os.listdir(data_path)

            for file in files:
                bucket.upload_file(Filename=data_path + file, Key=file)
                
        except ClientError as e:
            return e

In [23]:
# Checking if files are present in the local directory

os.listdir('Good_Raw')

['wafer_16012020_051629.csv',
 'wafer_21012020_080913.csv',
 'wafer_20022020_090716.csv',
 'wafer_13012020_090817.csv']

In [24]:
# Creating object of class to push the above csv files to s3 Bucket

push = UploadRawDataToCloud()


# This method will push data to s3 bucket

push.trainingbatchfilestocloud()

In [29]:
# Checking if files have been uploaded

bucket=s3.Bucket('predictionwaferfiles')


for files in bucket.objects.all():
    print(files.key)

Input.csv
wafer_13012020_090817.csv
wafer_16012020_051629.csv
wafer_20022020_090716.csv
wafer_21012020_080913.csv


### 6. Download bulk of files from bucket

In [30]:
# Creating directory to store files

os.makedirs('DOWNLOADS')

In [31]:
# Creating a path to store the files from S3 bucket to local
path=os.path.join(os.getcwd(), 'DOWNLOADS/')
path

'/Users/dikshyakasaju/Desktop/CODE FOR WAFER/DOWNLOADS/'

In [32]:
my_bucket=s3.Bucket('predictionwaferfiles')


for files in my_bucket.objects.all():
    my_bucket.download_file(Key=files.key, Filename=path+files.key)

In [33]:
# Checking if files have been downloaded to the local directory

os.listdir('DOWNLOADS')

['wafer_16012020_051629.csv',
 'Input.csv',
 'wafer_21012020_080913.csv',
 'wafer_20022020_090716.csv',
 'wafer_13012020_090817.csv']

### 7. Getting names of all files from the bucket


In [40]:
for files in my_bucket.objects.all():
    print(files)

s3.ObjectSummary(bucket_name='predictionwaferfiles', key='Input.csv')
s3.ObjectSummary(bucket_name='predictionwaferfiles', key='wafer_13012020_090817.csv')
s3.ObjectSummary(bucket_name='predictionwaferfiles', key='wafer_16012020_051629.csv')
s3.ObjectSummary(bucket_name='predictionwaferfiles', key='wafer_20022020_090716.csv')
s3.ObjectSummary(bucket_name='predictionwaferfiles', key='wafer_21012020_080913.csv')


In [36]:
# Use key for getting only the names of files

for files in my_bucket.objects.all():
    print(files.key)

Input.csv
wafer_13012020_090817.csv
wafer_16012020_051629.csv
wafer_20022020_090716.csv
wafer_21012020_080913.csv


### 8. Getting names of bucket from AWS S3 bucket

In [42]:
for bucket in s3.buckets.all():
    print(bucket)

s3.Bucket(name='predictionwaferfiles')
s3.Bucket(name='trainingwaferfiles')


In [43]:
# Use name for getting only the names of buckets

for bucket in s3.buckets.all():
    print(bucket.name)

predictionwaferfiles
trainingwaferfiles
