In [9]:
import boto3
import os
from dotenv import load_dotenv
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, mean_absolute_error
import joblib

In [10]:
## download data train.csv

In [11]:
# Load environment variables
load_dotenv()
AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID')
AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY')

# Initialize the S3 client
s3 = boto3.client(
    's3', 
    aws_access_key_id=AWS_ACCESS_KEY_ID,
    aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
    region_name='us-east-2'
)

# Download the file
bucket_name = 'mle-e2e-1'
file_to_download = 'inputs/train.csv'
local_file_name = 'inputs/train.csv'

s3.download_file(bucket_name, file_to_download, local_file_name)
print(f"Downloaded {file_to_download} from S3 bucket {bucket_name}")

Downloaded inputs/train.csv from S3 bucket mle-e2e-1


In [12]:
## model trian

In [13]:

# Load the dataset
data = pd.read_csv(local_file_name)

# Prepare the data
X = data[['x']]  # Predictor variable
y = data['y']    # Response variable

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Create a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
err = mean_absolute_error(y_test, y_pred)
maytest = np.mean(np.abs(y_test))
print(f"Mean Abs Error: {err}")
print(f"Mean Abs Ytest: {maytest}")

Mean Abs Error: 0.09036438483682596
Mean Abs Ytest: 0.7849316738622192


In [14]:
## model save

In [15]:
# Save the model to a file
model_file_name = 'models/model.pkl'
joblib.dump(model, model_file_name)
print(f"Saved model to {model_file_name}")

# Upload the model file to S3
model_s3_file = 'models/model.pkl'
s3.upload_file(model_file_name, bucket_name, model_s3_file)
print(f"Uploaded {model_file_name} to S3 bucket {bucket_name}")


Saved model to models/model.pkl
Uploaded models/model.pkl to S3 bucket mle-e2e-1
