In [8]:
import os
import boto3
import pandas as pd
import joblib
from dotenv import load_dotenv

In [9]:
## download data test.csv and model model.pkl

In [10]:
# Load environment variables
load_dotenv()
AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID')
AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY')

# Initialize the S3 client
s3 = boto3.client(
    's3', 
    aws_access_key_id=AWS_ACCESS_KEY_ID,
    aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
    region_name='us-east-2'
)

# S3 bucket details
bucket_name = 'mle-e2e-1'

# Download test.csv from S3
test_file = 'inputs/test.csv'
local_test_file = 'inputs/test.csv'
s3.download_file(bucket_name, test_file, local_test_file)

# Download the model from S3
model_file = 'models/model.pkl'
local_model_file = 'models/model.pkl'
s3.download_file(bucket_name, model_file, local_model_file)

In [11]:
## make inference

In [12]:
# Load the test dataset and the model
test_data = pd.read_csv(local_test_file)
model = joblib.load(local_model_file)

# Predict using the model
test_data['y_pred'] = model.predict(test_data[['x']])

In [13]:
## save inference

In [14]:
# Include x, y_test (actual y values), and y_pred in the output
output_data = test_data[['x', 'y', 'y_pred']]
output_data.columns = ['x', 'y_test', 'y_pred']

# Save the output to a CSV file
output_file = 'outputs/inference.csv'
output_data.to_csv(output_file, index=False)

# Upload the results to S3
output_s3_file = 'outputs/inference.csv'
s3.upload_file(output_file, bucket_name, output_s3_file)
print(f"Uploaded {output_file} to S3 bucket {bucket_name} under {output_s3_file}")

Uploaded outputs/inference.csv to S3 bucket mle-e2e-1 under outputs/inference.csv
