# Import modules

In [None]:
import os
from os.path import join
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.svm import SVR
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.multioutput import MultiOutputRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score

# Read Data

In [None]:
from google.colab import drive
drive.mount('/content/drive/')
%cd /content/drive/MyDrive/AI Hack/data/

Mounted at /content/drive/
/content/drive/MyDrive/AI Hack/data


In [None]:
home = os.getcwd()
out = join(home, "output")
data = pd.read_csv(join(out, "WeatherData.csv"))
data

Unnamed: 0.1,Unnamed: 0,mintemp_c,maxtemp_c,precip_mm,sunshine,gust_kph,daily_will_it_rain,tom_will_it_rain,wind_kph,humidity,...,temp_c,PersonID,sex,age,height,weight,BMI,freqOfExercise,CI,Target
0,0,17.9,35.2,0.0,12.3,48,0,0,6,20,...,26.6,1,0,40,175.80,87.75,28.39,6,2.307144,-1.805114
1,1,17.9,35.2,0.0,12.3,48,0,0,6,20,...,26.6,2,0,74,170.76,84.59,29.01,2,3.343987,-1.806799
2,2,17.9,35.2,0.0,12.3,48,0,0,6,20,...,26.6,3,0,24,176.49,81.89,26.29,4,2.459838,-1.802157
3,3,17.9,35.2,0.0,12.3,48,0,0,6,20,...,26.6,4,0,39,175.88,88.58,28.64,0,4.243744,-1.807501
4,4,17.9,35.2,0.0,12.3,48,0,0,6,20,...,26.6,5,0,61,173.37,89.91,29.91,1,4.302597,-1.810043
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95155,47575,22.9,33.4,17.8,6.2,93,1,1,15,67,...,30.3,6,1,37,162.47,74.20,28.11,5,2.384639,-0.963873
95156,47576,22.9,33.4,17.8,6.2,93,1,1,15,67,...,30.3,7,1,30,162.40,70.60,26.77,2,3.139030,-0.964535
95157,47577,22.9,33.4,17.8,6.2,93,1,1,15,67,...,30.3,8,1,34,162.24,69.47,26.39,1,2.461945,-0.962891
95158,47578,22.9,33.4,17.8,6.2,93,1,1,15,67,...,30.3,9,1,22,163.19,66.04,24.80,0,4.014209,-0.963780


# Data Processing

In [None]:
#separate the response variable and predict variable
X = data.drop("Target", axis=1)
y = data["Target"]

#split the dataset
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Regression Models

# XGBoost

In [None]:
import xgboost as xgb
xgb_model = xgb.XGBRegressor()
# Create the parameter grid: gbm_param_grid
gbm_param_grid = {
    'n_estimators': [100, 200, 300, 400, 500],
    'learning_rate': [0.01, 0.1, 0.15, 0.2],
    'max_depth': [7, 9, 11]
}

#find the best model and parameters
grid_search = GridSearchCV(estimator=xgb_model, param_grid=gbm_param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, Y_train)

print("Best Parameters:", grid_search.best_params_)
best_model = grid_search.best_estimator_
print(best_model)

Best Parameters: {'learning_rate': 0.1, 'max_depth': 9, 'n_estimators': 500}
XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=0.1, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=9, max_leaves=None,
             min_child_weight=None, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=500, n_jobs=None,
             num_parallel_tree=None, random_state=None, ...)


In [None]:
#predict the result
XGBr_pred = best_model.predict(X_test)

#MSE
XGBr_mse = mean_squared_error(Y_test, XGBr_pred)
print(f'Mean Squared Error of XGBoost Regression: {XGBr_mse}')

Mean Squared Error of XGBoost Regression: 4.967611105742106e-06


In [None]:
import pickle
with open('XGBoost.model','wb') as file:
  pickle.dump(best_model,file)


In [None]:
from google.colab import drive
drive.mount('/content/drive/')
%cd /content/drive/MyDrive/AI Hack/data/output/

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).
/content/drive/MyDrive/AI Hack/data/output


In [None]:
import pickle
objects = []
with (open("XGBoost.model", "rb")) as openfile:
    while True:
        try:
            objects.append(pickle.load(openfile))
        except EOFError:
            break

In [None]:
model = objects[0]

In [None]:
model_file_name = "locally-trained-xgboost-model"
model._Booster.save_model(model_file_name)



In [None]:
!tar czvf model.tar.gz $model_file_name

locally-trained-xgboost-model


In [None]:
!pip install boto3


Collecting boto3
  Downloading boto3-1.29.6-py3-none-any.whl (135 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m135.8/135.8 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting botocore<1.33.0,>=1.32.6 (from boto3)
  Downloading botocore-1.32.6-py3-none-any.whl (11.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.5/11.5 MB[0m [31m50.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting jmespath<2.0.0,>=0.7.1 (from boto3)
  Downloading jmespath-1.0.1-py3-none-any.whl (20 kB)
Collecting s3transfer<0.8.0,>=0.7.0 (from boto3)
  Downloading s3transfer-0.7.0-py3-none-any.whl (79 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.8/79.8 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: jmespath, botocore, s3transfer, boto3
Successfully installed boto3-1.29.6 botocore-1.32.6 jmespath-1.0.1 s3transfer-0.7.0


In [None]:
!pip install sagemaker

Collecting sagemaker
  Downloading sagemaker-2.197.0.tar.gz (917 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m917.0/917.0 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting smdebug_rulesconfig==1.0.1 (from sagemaker)
  Downloading smdebug_rulesconfig-1.0.1-py2.py3-none-any.whl (20 kB)
Collecting pathos (from sagemaker)
  Downloading pathos-0.3.1-py3-none-any.whl (82 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.1/82.1 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting schema (from sagemaker)
  Downloading schema-0.7.5-py2.py3-none-any.whl (17 kB)
Collecting tblib==1.7.0 (from sagemaker)
  Downloading tblib-1.7.0-py2.py3-none-any.whl (12 kB)
Collecting ppft>=1.7.6.7 (from pathos->sagemaker)
  Downloading ppft-1.7.6.7-py3-none-any.whl (56 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.8/56.8 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?2

In [None]:
%%time

import os
import boto3
import re
import json
import sagemaker
from sagemaker import get_execution_role

s3 = boto3.resource('s3',
                    aws_access_key_id="AKIA53SGMP77GQQ23J5R",
                    aws_secret_access_key = "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")


#AKIA53SGMP77DOGJS7FS
#gIstqUu3tSkHhE6tnL1TJi4GJTes5stxUVOFjM0b

# sagemaker.Session(boto3.session.Session())

os.environ['AWS_DEFAULT_REGION'] = "us-east-1"

region = boto3.Session().region_name
#region = "us-east-1"

role = "arn:aws:iam::952556617726:role/service-role/SageMaker-test2"

bucket='<hacktestamelia>' # put your s3 bucket name here, and create s3 bucket
prefix = 'sagemaker/xgboost-byo'
bucket_path = 'https://s3-{}.amazonaws.com/{}'.format(region,bucket)
# customize to your bucket where you have stored the data

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
CPU times: user 1.3 s, sys: 173 ms, total: 1.48 s
Wall time: 1.66 s


In [None]:
client = boto3.client('s3',
                      aws_access_key_id="AKIA53SGMP77GQQ23J5R",
                    aws_secret_access_key = "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")

In [None]:
fObj = open("model.tar.gz", 'rb')
key= os.path.join(prefix, model_file_name, 'model.tar.gz')
boto3.Session().resource('s3').Bucket(bucket).Object(key)

s3.Object(bucket_name='<hacktestamelia>', key='sagemaker/xgboost-byo/locally-trained-xgboost-model/model.tar.gz')

In [None]:
with open("model.tar.gz", 'rb') as data:
  client.upload_fileobj(data, 'hacktestamelia', 'sagemaker/xgboost-byo/locally-trained-xgboost-model/model.tar.gz')

In [None]:
containers = {
              #'us-east-1': '811284229777.dkr.ecr.us-east-1.amazonaws.com/xgboost:latest'
              'us-east-1': 's3://arn:aws:s3:us-east-1:952556617726:accesspoint/test'
              }
container = containers[boto3.Session().region_name]

In [None]:
%%time
from time import gmtime, strftime

print("!")

model_name = model_file_name + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
#model_name = 'model.tar.gz'
print("!a")
model_url = 'https://s3-{}.amazonaws.com/{}/{}'.format(region,bucket,key)

print("!b")
sm_client = boto3.client('sagemaker',
                         aws_access_key_id="AKIA53SGMP77GQQ23J5R",
                         aws_secret_access_key = "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")

print (model_url)

primary_container = {
    'Image': container,
    'ModelDataUrl': model_url
}

print("!c")

print(role)

create_model_response2 = sm_client.create_model(
    ModelName = model_name,
    ExecutionRoleArn = role,
    PrimaryContainer = primary_container)

print(create_model_response2['ModelArn'])

!
!a
!b
https://s3-us-east-1.amazonaws.com/<hacktestamelia>/sagemaker/xgboost-byo/locally-trained-xgboost-model/model.tar.gz
!c
arn:aws:iam::952556617726:role/service-role/SageMaker-test2


ClientError: ignored

# Multioutput regression

In [None]:
#https://scikit-learn.org/stable/modules/multiclass.html#multioutput-regression

In [None]:
#example from sklearn document
from sklearn.datasets import make_regression
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import GradientBoostingRegressor
X, y = make_regression(n_samples=10, n_targets=3, random_state=1)
MultiOutputRegressor(GradientBoostingRegressor(random_state=0)).fit(X, y).predict(X)

# Average

In [None]:
#ensemble predictions using simple averaging
ensemble_predictions = np.mean([rfr_pred, lr_pred, svr_pred], axis=0)

#evaluate the ensemble model
ensemble_mse = mean_squared_error(Y_test, ensemble_predictions)
print(f'Ensemble Mean Squared Error: {ensemble_mse}')

# Pipeline

In [None]:
#define pipeline with preprocessing and regression model steps
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Standardize features
    ('regressor', LogisticRegression())  # Initial regression model
])

#parameter range
param_grid = {
    # LogisticRegression(),
    'regressor': [RandomForestRegressor(), GradientBoostingRegressor()],
    'regressor__n_estimators': [50, 100, 200],
    'regressor__max_depth': [3, 5, 7]
}

#find the best model and parameters
grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='mean_squared_error', n_jobs=-1)
grid_search.fit(X_train, Y_train)

print("Best Parameters:", grid_search.best_params_)
best_model = grid_search.best_estimator_

#evaluate the best model
y_pred = best_model.predict(X_test)
mse = mean_squared_error(Y_test, y_pred)
print(f'Mean Squared Error on Test Set: {mse}')

#perform cross-validation to get more robust performance estimates
cv_scores = cross_val_score(best_model, X_train, Y_train, cv=5, scoring='mean_squared_error', n_jobs=-1)
print(f'Cross-validated Mean Squared Error: {np.mean(-cv_scores)}')

###visulization: parameter tunning or prediction errors?
from yellowbrick.model_selection import CVScores
from yellowbrick.regressor import PredictionError

#result visulization
cv_visualizer = CVScores(grid_search, scoring='mean_squared_error')
cv_visualizer.fit(X, y)
cv_visualizer.show()

#visulization of prediction errors
model_visualizer = PredictionError(grid_search)
model_visualizer.fit(X_train, Y_train)
model_visualizer.score(X_test, Y_test)
model_visualizer.show()

KeyboardInterrupt: ignored

In [None]:
import requests

url = "https://weatherapi-com.p.rapidapi.com/current.json"

querystring = {"q":"53.1,-0.13"}

headers = {
	"X-RapidAPI-Key": "beacf3c7cfmsh084cbf3d0bd8766p1c9c54jsne1fc399a50b8",
	"X-RapidAPI-Host": "weatherapi-com.p.rapidapi.com"
}

response = requests.get(url, headers=headers, params=querystring)

print(response.json())

{'location': {'name': 'Boston', 'region': 'Lincolnshire', 'country': 'United Kingdom', 'lat': 53.1, 'lon': -0.13, 'tz_id': 'Europe/London', 'localtime_epoch': 1700791698, 'localtime': '2023-11-24 2:08'}, 'current': {'last_updated_epoch': 1700791200, 'last_updated': '2023-11-24 02:00', 'temp_c': 9.0, 'temp_f': 48.2, 'is_day': 0, 'condition': {'text': 'Partly cloudy', 'icon': '//cdn.weatherapi.com/weather/64x64/night/116.png', 'code': 1003}, 'wind_mph': 16.1, 'wind_kph': 25.9, 'wind_degree': 300, 'wind_dir': 'WNW', 'pressure_mb': 1019.0, 'pressure_in': 30.09, 'precip_mm': 0.0, 'precip_in': 0.0, 'humidity': 71, 'cloud': 25, 'feelslike_c': 5.6, 'feelslike_f': 42.1, 'vis_km': 10.0, 'vis_miles': 6.0, 'uv': 1.0, 'gust_mph': 26.3, 'gust_kph': 42.4}}


In [None]:
import requests

url = "https://weatherapi-com.p.rapidapi.com/history.json"

querystring = {"q":"London","dt":"2023-11-17","lang":"en","end_dt":"2023-11-24"}

headers = {
	"X-RapidAPI-Key": "beacf3c7cfmsh084cbf3d0bd8766p1c9c54jsne1fc399a50b8",
	"X-RapidAPI-Host": "weatherapi-com.p.rapidapi.com"
}

response = requests.get(url, headers=headers, params=querystring)

print(response.json())

{'location': {'name': 'London', 'region': 'City of London, Greater London', 'country': 'United Kingdom', 'lat': 51.52, 'lon': -0.11, 'tz_id': 'Europe/London', 'localtime_epoch': 1700793509, 'localtime': '2023-11-24 2:38'}, 'forecast': {'forecastday': [{'date': '2023-11-17', 'date_epoch': 1700179200, 'day': {'maxtemp_c': 9.6, 'maxtemp_f': 49.3, 'mintemp_c': 5.6, 'mintemp_f': 42.1, 'avgtemp_c': 7.5, 'avgtemp_f': 45.5, 'maxwind_mph': 7.6, 'maxwind_kph': 12.2, 'totalprecip_mm': 0.91, 'totalprecip_in': 0.04, 'avgvis_km': 9.7, 'avgvis_miles': 6.0, 'avghumidity': 79.0, 'condition': {'text': 'Partly cloudy', 'icon': '//cdn.weatherapi.com/weather/64x64/day/116.png', 'code': 1003}, 'uv': 3.0}, 'astro': {'sunrise': '07:22 AM', 'sunset': '04:09 PM', 'moonrise': '12:15 PM', 'moonset': '07:05 PM', 'moon_phase': 'Waxing Crescent', 'moon_illumination': 15}, 'hour': [{'time_epoch': 1700179200, 'time': '2023-11-17 00:00', 'temp_c': 6.3, 'temp_f': 43.3, 'is_day': 0, 'condition': {'text': 'Partly cloudy',