In [260]:

# Using Python to load data into an AWS S3 bucket offers several advantages:

#  Flexibility: Python provides a wide range of libraries and tools for data manipulation, allowing you to preprocess 
#  and format your data according to your specific requirements before uploading it to S3.

#  Automation: Python scripts can be automated to run on a schedule or triggered by specific events, making it easy to 
#  integrate data loading processes into your workflow without manual intervention.

#  Scalability: AWS S3 is highly scalable, and Python allows you to easily handle large volumes of data by chunking or 
#  parallelizing uploads if necessary.

#  Cost-effectiveness: Python scripts can be run on-demand or through serverless architectures like AWS Lambda, allowing 
#  you to minimize infrastructure costs by paying only for the resources you use.

#  Integration: Python seamlessly integrates with AWS SDKs, such as Boto3, which provides a convenient interface for 
#  interacting with AWS services, including S3. This makes it easy to incorporate S3 data loading tasks into your 
#  existing Python-based applications or workflows.

#  Customization: With Python, you have full control over the data loading process, allowing you to customize and 
#  extend functionality as needed to meet your specific requirements.
        

In [2]:

# Let's fetch some temperature data and structure it in a dataframe.
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry

# two libraries fro AWS jobs
import boto3
from io import StringIO

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
# The Lat & Lon are for New York City
url = "https://api.open-meteo.com/v1/forecast"
params = {
        "latitude": 40.712776,
        "longitude": -74.005974,
        "hourly": "temperature_2m"
}
responses = openmeteo.weather_api(url, params=params)


# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")


# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
    start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
    end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
    freq = pd.Timedelta(seconds = hourly.Interval()),
    inclusive = "left"
)}
hourly_data["temperature_2m"] = hourly_temperature_2m

df = pd.DataFrame(data = hourly_data)
print(df)


Coordinates 40.71033477783203°N -73.99307250976562°E
Elevation 32.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s
                         date  temperature_2m
0   2024-04-09 00:00:00+00:00       13.331000
1   2024-04-09 01:00:00+00:00       13.331000
2   2024-04-09 02:00:00+00:00       12.131001
3   2024-04-09 03:00:00+00:00       12.531000
4   2024-04-09 04:00:00+00:00       11.531000
..                        ...             ...
163 2024-04-15 19:00:00+00:00       18.376499
164 2024-04-15 20:00:00+00:00       18.526499
165 2024-04-15 21:00:00+00:00       18.426498
166 2024-04-15 22:00:00+00:00       17.976500
167 2024-04-15 23:00:00+00:00       17.226500

[168 rows x 2 columns]


In [None]:

# Now we have to create some credentials because we will be logging into AWS remotely

# Using AWS Management Console:
#  Sign in to the AWS Management Console.
#  Open the IAM console at https://console.aws.amazon.com/iam/.
#  In the navigation pane, choose "Users".
#  Select your IAM user name.
#  Choose the "Security credentials" tab.
#  Under "Access keys", you can create a new access key pair or view the existing one.
    

In [None]:

# In addition, you will almost certainly need to create a custom IAM policy granting access to external resources.

# Here's how you can create a custom IAM policy granting s3:PutObject permission:
#  Go to the IAM console: https://console.aws.amazon.com/iam/
#  In the left navigation pane, choose "Policies" and then click on "Create policy".
#  Choose the "JSON" tab and paste the following policy document:

# json

{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": "s3:PutObject",
            "Resource": "arn:aws:s3:::my-bucket/*"
        }
    ]
}

# Replace "my-bucket" with the name of your S3 bucket.


In [1]:

# Convert DataFrame to CSV format in memory
csv_buffer = StringIO()
df.to_csv(csv_buffer, index=False)


# Initialize S3 client with your credentials
s3 = boto3.client(
    's3',
    aws_access_key_id='your_access_key',
    aws_secret_access_key='your_secret_access_key'
)


# Specify your S3 bucket name and file name
bucket_name = 'my-bucket-housing'
file_name = 'hourly_temps.csv'


# Upload CSV file to S3 bucket
s3.put_object(Bucket=bucket_name, Key=file_name, Body=csv_buffer.getvalue())


In [247]:

# Overall, using Python to load data into AWS S3 offers a powerful and flexible solution for managing and processing your 
# data in the cloud!
