### Generate endpoint

In [None]:
import boto3

In [1]:
# !pip install sagemaker
import sagemaker



In [8]:
model = sagemaker.Model("s3://w210-bucket/models/rf_model.tar.gz",
                        role=None)

In [12]:
model

TypeError: 'NoneType' object is not callable

In [None]:
sm = sagemaker.Session()

### Old work to build/fit model below

In [None]:
import boto3
import sagemaker_pyspark
from pyspark.sql import SparkSession
from pyspark import SparkContext, SparkConf
from sagemaker_pyspark import IAMRole, classpath_jars
from sagemaker_pyspark.algorithms import LinearLearnerRegressor
from pyspark.sql.functions import col, log10 
#from pyspark.ml.regression import LinearRegression
#from sagemaker_pyspark.algorithms import KMeansSageMakerEstimator

In [None]:
session = boto3.Session(profile_name='default')
credentials = session.get_credentials()
ACCESS_KEY = credentials.access_key
SECRET_KEY = credentials.secret_key
role = 'arn:aws:iam::752600073001:role/service-role/AmazonSageMaker-ExecutionRole-20230319T095240'
region = "us-east-1"

In [None]:
conf = (SparkConf()
        .set("spark.driver.extraClassPath", ":".join(classpath_jars())))
SparkContext(conf=conf)

In [None]:
classpath = ":".join(sagemaker_pyspark.classpath_jars())
spark = SparkSession.builder.config("spark.driver.extraClassPath", classpath).getOrCreate()

In [None]:
processed_train_df = spark.read.parquet("s3a://w210-bucket/data_wrangling/processed_train_df.parquet")
processed_test_df = spark.read.parquet("s3a://w210-bucket/data_wrangling/processed_test_df.parquet")
processed_train_df = processed_train_df.withColumn("log_ridership_number",log10(col('ridership_number'))).drop("ridership_number")
processed_test_df  = processed_test_df.withColumn("log_ridership_number",log10(col('ridership_number'))).drop("ridership_number")

In [None]:
processed_train_df.columns
processed_train_df.dtypes

In [None]:
new_df_train = processed_train_df.toDF('features','label')

In [None]:
new_df_train.columns
new_df_train.dtypes

In [None]:
llr_estimator = LinearLearnerRegressor(
    trainingInstanceType="ml.g4dn.xlarge",
    trainingInstanceCount=1,
    endpointInstanceType="ml.m4.xlarge",
    endpointInitialInstanceCount=1,
    sagemakerRole=IAMRole(role),
    trainingSparkDataFormatOptions={'featuresColumnName':'features','labelColumnName':'log_ridership_number'})

In [None]:
llr_estimator.getParam('feature_dim')

In [None]:
llr_model = llr_estimator.fit(processed_train_df)

In [None]:
lr = LinearRegression(featuresCol = 'features', labelCol='log_ridership_number')
lr_model = lr.fit(processed_train_df)

In [None]:
train_results = lr_model.evaluate(processed_train_df)
print(f"Intercept: {lr_model.intercept}")
print(f"R2 : {train_results.r2}")
print(f"RMSE : {train_results.rootMeanSquaredError}")
print(f"MAE : {train_results.meanAbsoluteError}")