# Test Notebook
This is a notebook to test the following:
1. Running within Sagemaker Studio, using the execution role
2. No Internet connectivity - should be able to use pre-installed modules
3. Should be able to read/write to S3-Bucket 

In [None]:
import sagemaker
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Initialize a SageMaker session
session = sagemaker.Session()

# Create some sample data
np.random.seed(0)
X = np.random.rand(100, 2)
y = (X[:, 0] + X[:, 1] > 1).astype(int)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Create and train a simple Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [None]:
# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model accuracy: {accuracy:.2f}")

# Save the model
import joblib
joblib.dump(model, 'random_forest_model.joblib')

In [None]:
# Upload the model to S3
# bucket = session.default_bucket()
bucket = "YOUR-BUCKET-NAME"
model_artifact = session.upload_data('random_forest_model.joblib', bucket=bucket, key_prefix='models/via-studio-nb')

print(f"Model artifact uploaded to: {model_artifact}")

In [None]:
# # Clean up: remove the local model file
import os
os.remove('random_forest_model.joblib')

print("Test completed successfully!")