In [1]:
# -------------------------------
# 1. Import Libraries.
# -------------------------------
import boto3
import pandas as pd
from io import StringIO
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import joblib
import sagemaker
from sagemaker.sklearn import SKLearn







ModuleNotFoundError: No module named 'boto3'

In [2]:
!pip install boto3 pandas scikit-learn joblib sagemaker


Collecting boto3
  Downloading boto3-1.40.16-py3-none-any.whl.metadata (6.7 kB)
Collecting sagemaker
  Downloading sagemaker-2.251.0-py3-none-any.whl.metadata (17 kB)
Collecting botocore<1.41.0,>=1.40.16 (from boto3)
  Downloading botocore-1.40.16-py3-none-any.whl.metadata (5.7 kB)
Collecting s3transfer<0.14.0,>=0.13.0 (from boto3)
  Downloading s3transfer-0.13.1-py3-none-any.whl.metadata (1.7 kB)
Collecting attrs<26,>=24 (from sagemaker)
  Using cached attrs-25.3.0-py3-none-any.whl.metadata (10 kB)
Collecting docker (from sagemaker)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting fastapi (from sagemaker)
  Downloading fastapi-0.116.1-py3-none-any.whl.metadata (28 kB)
Collecting graphene<4,>=3 (from sagemaker)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting importlib-metadata<7.0,>=1.4.0 (from sagemaker)
  Downloading importlib_metadata-6.11.0-py3-none-any.whl.metadata (4.9 kB)
Collecting omegaconf<3,>=2.2 (from sagemaker)
  Downl

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
aiobotocore 2.12.3 requires botocore<1.34.70,>=1.34.41, but you have botocore 1.40.16 which is incompatible.
spyder 5.5.1 requires ipython!=8.17.1,<9.0.0,>=8.13.0; python_version > "3.8", but you have ipython 9.4.0 which is incompatible.
spyder-kernels 2.5.0 requires ipython!=8.17.1,<9,>=8.13.0; python_version > "3.8", but you have ipython 9.4.0 which is incompatible.
streamlit 1.32.0 requires packaging<24,>=16.8, but you have packaging 24.2 which is incompatible.
streamlit 1.32.0 requires rich<14,>=10.14.0, but you have rich 14.0.0 which is incompatible.


In [None]:
------------------------------
# 2. AWS & S3 Setup
# -------------------------------
bucket_name = 'your-bucket-name'
raw_file_key = 'data/raw_sales.csv'

# boto3 client
s3 = boto3.client('s3')

# -------------------------------
# 3. Pull Data from S3
# -------------------------------
obj = s3.get_object(Bucket=bucket_name, Key=raw_file_key)
data = obj['Body'].read().decode('utf-8')
df = pd.read_csv(StringIO(data))
print("Raw Data Sample:\n", df.head())



In [None]:
# -------------------------------
# 4. Feature Engineering
# -------------------------------
# Example: create lag feature
df['lag_1_sales'] = df.groupby('product_id')['sales'].shift(1).fillna(0)

# Example: revenue
df['revenue'] = df['sales'] * df['price']

# Select features and target
X = df[['lag_1_sales', 'revenue']]
y = df['sales']

# -------------------------------
# 5. Train-Test Split
# -------------------------------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# -------------------------------
# 6. Save Train/Test Data to S3 (Optional)
# -------------------------------
def upload_df_to_s3(df, key):
    csv_buffer = StringIO()
    df.to_csv(csv_buffer, index=False)
    s3.put_object(Bucket=bucket_name, Key=key, Body=csv_buffer.getvalue())

upload_df_to_s3(X_train, 'data/X_train.csv')
upload_df_to_s3(X_test, 'data/X_test.csv')
upload_df_to_s3(pd.DataFrame(y_train), 'data/y_train.csv')
upload_df_to_s3(pd.DataFrame(y_test), 'data/y_test.csv')

# -------------------------------


In [None]:
# 7. Train Model Locally (Optional)
# -------------------------------
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Save model locally
joblib.dump(model, "rf_model.pkl")

# -------------------------------
# 8. Deploy Model in SageMaker
# -------------------------------
# Initialize SageMaker session
sess = sagemaker.Session()
role = "arn:aws:iam::123456789012:role/YourSageMakerRole"

# Upload local model to S3 (needed by SageMaker)
model_s3_path = sess.upload_data(path='rf_model.pkl', bucket=bucket_name, key_prefix='model')

# Create a SageMaker SKLearn model
sklearn_model = SKLearn(model_data=model_s3_path,
                        role=role,
                        entry_point='train.py',  # optional, for training script
                        framework_version='0.23-1',
                        sagemaker_session=sess)

# Deploy as real-time endpoint
predictor = sklearn_model.deploy(
    initial_instance_count=1,
    instance_type='ml.m4.xlarge'
)

# -------------------------------
# 9. Make Predictions
# -------------------------------
preds = predictor.predict(X_test.values)
print("Sample Predictions:", preds[:5])

