# S3 연동

In [10]:
import boto3

# 연결 Parameter
AWS_ACCESS_KEY_ID =""
AWS_SECRET_ACCESS_KEY = ""
AWS_DEFAULT_REGION = ""

In [11]:
# s3 Client 연결 함수
def s3_client_connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_DEFAULT_REGION):
    try: # s3 Client 생성
        s3 = boto3.client(
            service_name = "s3",
            region_name = AWS_DEFAULT_REGION,
            aws_access_key_id = AWS_ACCESS_KEY_ID,
            aws_secret_access_key = AWS_SECRET_ACCESS_KEY
        )
    except Exception as e:
        print(e)
    else:
        print("s3 bucket connected!")
        return s3
    
# s3 Resource 연결 함수
def s3_resource_connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_DEFAULT_REGION):
    try: # s3 Resource 생성
        s3 = boto3.resource(
            service_name = "s3",
            region_name = AWS_DEFAULT_REGION,
            aws_access_key_id = AWS_ACCESS_KEY_ID,
            aws_secret_access_key = AWS_SECRET_ACCESS_KEY
        )
    except Exception as e:
        print(e)
    else:
        print("s3 bucket connected!")
        return s3
    
# s3 Session 연결 함수
def s3_session_connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_DEFAULT_REGION):
    try: # s3 Resource 생성
        session = boto3.Session(
            region_name = AWS_DEFAULT_REGION,
            aws_access_key_id = AWS_ACCESS_KEY_ID,
            aws_secret_access_key = AWS_SECRET_ACCESS_KEY
        )
    except Exception as e:
        print(e)
    else:
        print("s3 bucket connected!")
        return session

---
### Bucket 확인

In [14]:
session = s3_session_connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_DEFAULT_REGION)

# s3에 대한 권한 및 상태를 session_s3에 저장
session_s3 = session.resource('s3')

for bucket in session_s3.buckets.all():
    print(bucket.name)

s3 bucket connected!
donghee-deltalake-test
donghee-s3-alldata


In [32]:
# client 연동
client = s3_client_connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_DEFAULT_REGION)
response = client.list_buckets() # bucket 목록
response

s3 bucket connected!


{'ResponseMetadata': {'RequestId': 'F17MWB7NPQF24AXB',
  'HostId': 'NzzSAmi29eVYbtm/9RPWiQQ2v3KK2ViCzcEapcWcVmKw4+1xgiLpOKrR5QlCLFhycg9ZrjLgUZg=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'NzzSAmi29eVYbtm/9RPWiQQ2v3KK2ViCzcEapcWcVmKw4+1xgiLpOKrR5QlCLFhycg9ZrjLgUZg=',
   'x-amz-request-id': 'F17MWB7NPQF24AXB',
   'date': 'Thu, 15 Feb 2024 17:18:42 GMT',
   'content-type': 'application/xml',
   'transfer-encoding': 'chunked',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'Buckets': [{'Name': 'donghee-deltalake-test',
   'CreationDate': datetime.datetime(2024, 2, 6, 19, 56, 11, tzinfo=tzutc())},
  {'Name': 'donghee-s3-alldata',
   'CreationDate': datetime.datetime(2024, 2, 13, 8, 29, 31, tzinfo=tzutc())}],
 'Owner': {'ID': 'e8e06575e9bebe78f523a0fb24cc697097b9c6c8895e313bf77390af8176a617'}}

---
### Bucket Upload / DownLoad 기능

In [75]:
# Upload File
# local_path : 업로드 하려는 파일의 Local 파일 경로
# bucket_name : Upload하고자 할 bucket 이름
# key : 버킷 안에서 저장하고자 하는 경로
def upload_file(local_path, bucket_name, key):
    try:
        s3 = s3_resource_connection()
        s3.meta.client.upload_file(local_path, bucket_name, key)
    except Exception as e:
        print(e)
    else:
        print("complete Save File to S3")

# DownLoad File
# local_path : 다운로드할 파일 Local 저장 경로
# bucket_name : 다운로드하고자 하는 파일이 저장된 bucket 이름
# key : 다운로드 하고자 하는 파일이 저장된 버킷안의 경로
def donwload_file(local_path, bucket_name, key):
    try:
        s3 = s3_resource_connection()
        bucket = s3.Bucket(bucket_name)
        objects = list(bucket.objects.filter(Prefix=key))

        if objects and objects[0].key == key:
            bucket.download_file(objects[0].key, local_path)
    except Exception as e:
        print(e)
    else:
        print("complete Save File to S3")

In [17]:
# Parameter
bucket_name = "donghee-s3-alldata"
key = "csv/totaldf3.csv"

# Upload Parameter
Upload_path = "./data/totaldf3.csv"

# Download Parameter
Download_path = "./data/downloadingtest.csv"

In [48]:
# Upload Cell
upload_file(Upload_path, bucket_name, key)

s3 bucket connected!
complete Save File to S3


In [76]:
# Download Cell
donwload_file(Download_path, bucket_name, key)

s3 bucket connected!
complete Save File to S3


---
### CSV 파일 다이렉트로 읽기

In [33]:
import pandas as pd
import io

# 해당 s3 디렉토리 확인하기
s3_client = s3_client_connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_DEFAULT_REGION)

# 확인할 디렉토리 경로 Parameter
prefix = "csv/"

dir_info = s3_client.list_objects(Bucket = bucket_name, Prefix = prefix, Delimiter = '/')

for content in dir_info['Contents']:
    print(content['Key'])

s3 bucket connected!
csv/
csv/sf-fire-calls.csv
csv/totaldf3.csv


In [35]:
# 불러올 csv 명선택
# ex totaldf3.csv를 불러온다?

# 불러올 파일 명
key = "csv/sf-fire-calls.csv"
obj = s3_client.get_object(Bucket = bucket_name, Key = key)
df = pd.read_csv(io.BytesIO(obj["Body"].read()))
df

  df = pd.read_csv(io.BytesIO(obj["Body"].read()))


Unnamed: 0,CallNumber,UnitID,IncidentNumber,CallType,CallDate,WatchDate,CallFinalDisposition,AvailableDtTm,Address,City,...,CallTypeGroup,NumAlarms,UnitType,UnitSequenceInCallDispatch,FirePreventionDistrict,SupervisorDistrict,Neighborhood,Location,RowID,Delay
0,20110016,T13,2003235,Structure Fire,01/11/2002,01/10/2002,Other,01/11/2002 01:51:44 AM,2000 Block of CALIFORNIA ST,SF,...,,1,TRUCK,2.0,4,5,Pacific Heights,"(37.7895840679362, -122.428071912459)",020110016-T13,2.950000
1,20110022,M17,2003241,Medical Incident,01/11/2002,01/10/2002,Other,01/11/2002 03:01:18 AM,0 Block of SILVERVIEW DR,SF,...,,1,MEDIC,1.0,10,10,Bayview Hunters Point,"(37.7337623673897, -122.396113802632)",020110022-M17,4.700000
2,20110023,M41,2003242,Medical Incident,01/11/2002,01/10/2002,Other,01/11/2002 02:39:50 AM,MARKET ST/MCALLISTER ST,SF,...,,1,MEDIC,2.0,3,6,Tenderloin,"(37.7811772186856, -122.411699931232)",020110023-M41,2.433333
3,20110032,E11,2003250,Vehicle Fire,01/11/2002,01/10/2002,Other,01/11/2002 04:16:46 AM,APPLETON AV/MISSION ST,SF,...,,1,ENGINE,1.0,6,9,Bernal Heights,"(37.7388432849018, -122.423948785199)",020110032-E11,1.500000
4,20110043,B04,2003259,Alarms,01/11/2002,01/10/2002,Other,01/11/2002 06:01:58 AM,1400 Block of SUTTER ST,SF,...,,1,CHIEF,2.0,4,2,Western Addition,"(37.7872890372638, -122.424236212664)",020110043-B04,3.483333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
175291,183034235,T08,18127270,Structure Fire,10/30/2018,10/30/2018,Fire,10/30/2018 10:04:27 PM,700 Block of LONG BRIDGE ST,San Francisco,...,Alarm,1,TRUCK,3.0,3,6,Mission Bay,"(37.77139049567131, -122.39416141919062)",183034235-T08,2.483333
175292,183034238,86,18127271,Medical Incident,10/30/2018,10/30/2018,Code 2 Transport,10/30/2018 11:18:37 PM,300 Block of WILLIAMS AVE,San Francisco,...,Potentially Life-Threatening,1,MEDIC,2.0,10,10,Bayview Hunters Point,"(37.73017420329081, -122.39943089033613)",183034238-86,1.950000
175293,183034268,E08,18127272,Medical Incident,10/30/2018,10/30/2018,Code 2 Transport,10/30/2018 10:26:01 PM,1100 Block of 4TH ST,San Francisco,...,Potentially Life-Threatening,1,ENGINE,1.0,3,6,Mission Bay,"(37.77389059449051, -122.3915765620042)",183034268-E08,4.416666
175294,183034485,KM03,18127291,Medical Incident,10/30/2018,10/30/2018,Code 2 Transport,10/31/2018 12:52:43 AM,800 Block of HAIGHT ST,San Francisco,...,Non Life-threatening,1,PRIVATE,1.0,5,5,Haight Ashbury,"(37.77131921762313, -122.43619838315635)",183034485-KM03,3.566667


---
* 이 뒤는 ConvertoDelta로 불러온 파일을 delta 형식으로 변경해주어야 함