1. Importation des librairies

In [59]:
import numpy as np
import json 
import pandas as pd 
from sklearn.model_selection import train_test_split 
import xgboost as xgb 

import boto3
import os
import subprocess

2. Entrainement du modèle

In [3]:
data = pd.read_csv("../data/kc_house_data.csv")

In [5]:
data.select_dtypes(include=["float64"]).columns

Index(['price', 'bathrooms', 'floors', 'lat', 'long'], dtype='object')

In [16]:
Y = data["price"]
X = data[["floors", "lat", "long"]]

x_train, x_test, y_train, y_test = train_test_split(X, Y , test_size=0.3)

xgb_reg = xgb.XGBRegressor(random_state=42, n_estimators=1000, learning_rate=0.05)
xgb_reg.fit(x_train, y_train, eval_set=[(x_test, y_test)], verbose=False)

pickle_file = "artifacts/xgb_model.pkl"
import pickle

with open(pickle_file, "wb") as f:
    pickle.dump(xgb_reg, f)

In [21]:
type(x_train)

pandas.core.frame.DataFrame

3. Tester l'inférence du modèle

In [27]:
with open("artifacts/xgb_model.pkl", 'rb') as model_file:
    model = pickle.load(model_file)

request_body = '{"floors": 2.0, "lat": 47.5112, "long": -122.257}'


input_data = json.loads(request_body)
features = np.array([input_data.get('floors', 0), input_data.get('lat', 0), input_data.get('long', 0)]).reshape(1, -1)
prediction = model.predict(features)


prediction

array([563297.5], dtype=float32)

In [29]:
int(prediction[0])

563297

4. Archiver le modèle 

In [None]:
# Define your file paths
tar_file_path = 'artifacts/xgb_model.tar.gz'
pickle_file_path = 'artifacts/xgb_model.pkl'

# Step 1: Delete the existing tar file if it exists
if os.path.exists(tar_file_path):
    os.remove(tar_file_path)
    print(f'Deleted existing file: {tar_file_path}')

# Step 2: Ensure the pickle file exists at the expected location
if not os.path.exists(pickle_file_path):
    raise FileNotFoundError(f'{pickle_file_path} does not exist.')

# Step 3: Compress the model file
try:
    # Change the directory to 'model' and create the tar file with the pickle file at the root
    subprocess.run(
        ['tar', '-czvf', tar_file_path, '-C', os.path.dirname(pickle_file_path), os.path.basename(pickle_file_path)],
        check=True
    )
    print(f'Successfully compressed {pickle_file_path} to {tar_file_path}')
except subprocess.CalledProcessError as e:
    print(f'Error compressing file: {e}')



Deleted existing file: artifacts/xgb_model.tar.gz
Successfully compressed artifacts/xgb_model.pkl to artifacts/xgb_model.tar.gz


5. Mettre le modèle dans un bucket s3 crée au préalable

In [None]:
s3_bucket = 'kevindouanla' # Replace with your S3 bucket name
s3_bucket_path = f's3://{s3_bucket}'

# Step 4: Upload the compressed file to S3
try:
    s3_client = boto3.client('s3')
    # Upload the file to S3
    s3_client.upload_file(tar_file_path, s3_bucket, tar_file_path)
    print(f"File uploaded successfully to s3://{s3_bucket}/{tar_file_path}")
except Exception as e:
    print(f"Error uploading file: {e}")

# Step 5: Output the S3 location of the model data
model_data = f'{s3_bucket_path}/{tar_file_path}'
print(f"Model Data S3 Location: {model_data}")

File uploaded successfully to s3://kevindouanla/artifacts/xgb_model.tar.gz
Model Data S3 Location: s3://kevindouanla/artifacts/xgb_model.tar.gz


step 3

In [31]:
from sagemaker import get_execution_role
from sagemaker.model import Model
import sagemaker

In [51]:

role = "arn:aws:iam::077260319067:role/aws-role-sagemaker"

sagemaker_session = sagemaker.Session()

image_uri = "077260319067.dkr.ecr.eu-north-1.amazonaws.com/aws-model-ecr:latest"
entry_point = "inference.py"
source_dir = "src"

model = Model(
    image_uri=image_uri, # Provide the URI to your Pushed Docker image from AWS ECR
    model_data=model_data,
    role=role,
    sagemaker_session=sagemaker_session,
    entry_point='inference.py',
    source_dir='.',
    env={
        'SAGEMAKER_CONTAINER_LOG_LEVEL': '30',
        'SAGEMAKER_ENABLE_CLOUDWATCH_LOGGING': 'true'
    }
)

# Create and deploy the endpoint
try:
    predictor = model.deploy(
        instance_type="ml.m5.xlarge",
        initial_instance_count=1,
        endpoint_name="model-deployment-endpoint"
    )
except Exception as e:
    print(f"Failed to deploy endpoint: {e}")


-----!

In [53]:
import json
import sagemaker
from sagemaker.predictor import Predictor
from sagemaker import get_execution_role

# Initialize SageMaker session and get execution role
sagemaker_session = sagemaker.Session()
role = "arn:aws:iam::077260319067:role/aws-role-sagemaker"

# Replace with your actual endpoint name
endpoint_name = "model-deployment-endpoint"

# Define the request body with the input features
request_body = {
    "floors": 1.0,
    "lat": 2.0,
    "long": 3.0
}

# Create a Predictor object
predictor = Predictor(endpoint_name=endpoint_name, sagemaker_session=sagemaker_session)

# Send a request to the SageMaker endpoint
response = predictor.predict(json.dumps(request_body), initial_args={'ContentType': 'application/json'})

# Parse the response
prediction = json.loads(response)

# Print the prediction
print("Prediction:", prediction)

Prediction: {'prediction': 415099}


In [52]:
import boto3

client = boto3.client("sagemaker", region_name="eu-north-1")

response = client.list_endpoints()

print(response)


{'Endpoints': [{'EndpointName': 'model-deployment-endpoint', 'EndpointArn': 'arn:aws:sagemaker:eu-north-1:077260319067:endpoint/model-deployment-endpoint', 'CreationTime': datetime.datetime(2025, 9, 29, 23, 44, 18, 490000, tzinfo=tzlocal()), 'LastModifiedTime': datetime.datetime(2025, 9, 29, 23, 46, 52, 369000, tzinfo=tzlocal()), 'EndpointStatus': 'InService'}], 'ResponseMetadata': {'RequestId': '88f62520-a897-4a3e-853e-c2d95f85ae97', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '88f62520-a897-4a3e-853e-c2d95f85ae97', 'strict-transport-security': 'max-age=47304000; includeSubDomains', 'x-frame-options': 'DENY', 'content-security-policy': "frame-ancestors 'none'", 'cache-control': 'no-cache, no-store, must-revalidate', 'x-content-type-options': 'nosniff', 'content-type': 'application/x-amz-json-1.1', 'content-length': '249', 'date': 'Mon, 29 Sep 2025 21:47:28 GMT'}, 'RetryAttempts': 0}}
