# Zapisywanie i wczytywanie modeli

Zbiór danych: https://www.kaggle.com/datasets/mrsimple07/restaurants-revenue-prediction/data

In [1]:
import pandas as pd
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
import joblib
import pickle
import boto3
import os

In [2]:
# puść ten kod, 
# jeżeli wywołujesz plik  w folderze rozwiąznaia, 
# a ramka danych znajduje się w folderze data
import os 
os.chdir('../')

In [3]:
# Ładowanie danych
df= pd.read_csv('data/Restaurant_revenue.csv')

In [None]:
# zmienne kategoryczne
cat_features = list(df.select_dtypes(include='object').columns)
cat_features

In [None]:
# zmienne numeryczne
num_features = list(df.select_dtypes(exclude = 'object').columns[:-1])
num_features

In [5]:
# Train/test split
train_x, test_x, train_y, test_y = train_test_split(df.drop('Monthly_Revenue',axis=1),df['Monthly_Revenue'])

In [6]:
# Pobranie pipeline'u do tworzenia modelu
from help_function import create_model

In [7]:
# Stworzenie modelu
m1 = create_model(train_x,
                  train_y, 
                  GradientBoostingRegressor(min_samples_leaf=10),
                  num_features,
                  cat_features)

In [None]:
m1

In [None]:
# r^2 test
m1.score(test_x,test_y)

## Joblib

In [None]:
# Zapisanie modelu
joblib.dump(m1,"m1_restaurant.joblib")

In [11]:
# Załadowanie modelu 
loaded_model = joblib.load("m1_restaurant.joblib")

In [None]:
# r^2 test
loaded_model.score(test_x,test_y)

## Pickle

In [13]:
# Zapisanie modelu
file = open("m1_restuarant_pickle.pkl", "wb")
pickle.dump(m1,file)
file.close()

In [14]:
# Załadowanie modelu
file = open("m1_restuarant_pickle.pkl","rb")
loaded_model_2 = pickle.load(file)
file.close()

In [None]:
# r^2 test
loaded_model_2.score(test_x, test_y)

## Zapis do AWS S3

In [16]:
from help_function import save_model_to_s3, download_model_from_s3

In [17]:
#from dotenv import load_dotenv
#load_dotenv()

In [18]:
# Wczytanie kluczy
AWS_ACCESS_KEY = os.getenv("AWS_ACCESS_KEY")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")

In [22]:
# Inicjalizacja klienta S3
s3_client = boto3.client('s3',
                         aws_access_key_id = AWS_ACCESS_KEY,
                         aws_secret_access_key = AWS_SECRET_ACCESS_KEY)

In [23]:
# zapis modelu
save_model_to_s3(s3_client=s3_client, model = loaded_model, bucket_name = 'mlmisa', path_in_s3='models/loaded_model_v1.joblib')


In [24]:
# wczytanie modelu
download_model_from_s3(s3_client=s3_client,bucket_name = 'mlmisa', path_in_s3 = 'models/loaded_model_v1.joblib')