In [8]:
# import libraries for the model
import pandas as pd       
import numpy as np                 
import seaborn as sns                
import matplotlib.pyplot as plt                
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
import pickle
import joblib

In [9]:
# read the data for analysis 
penguins = pd.read_csv('./penguins.csv')
penguins.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 344 entries, 0 to 343
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   species            344 non-null    object 
 1   island             344 non-null    object 
 2   bill_length_mm     342 non-null    float64
 3   bill_depth_mm      342 non-null    float64
 4   flipper_length_mm  342 non-null    float64
 5   body_mass_g        342 non-null    float64
 6   sex                333 non-null    object 
dtypes: float64(4), object(3)
memory usage: 18.9+ KB


In [10]:
penguins_clean = penguins.dropna()
penguins_clean.info()

<class 'pandas.core.frame.DataFrame'>
Index: 333 entries, 0 to 343
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   species            333 non-null    object 
 1   island             333 non-null    object 
 2   bill_length_mm     333 non-null    float64
 3   bill_depth_mm      333 non-null    float64
 4   flipper_length_mm  333 non-null    float64
 5   body_mass_g        333 non-null    float64
 6   sex                333 non-null    object 
dtypes: float64(4), object(3)
memory usage: 20.8+ KB


In [11]:
penguins_clean.columns

Index(['species', 'island', 'bill_length_mm', 'bill_depth_mm',
       'flipper_length_mm', 'body_mass_g', 'sex'],
      dtype='object')

In [12]:
# choose x and y variables
X = penguins_clean[['bill_length_mm', 'bill_depth_mm',
       'flipper_length_mm']]
y = penguins_clean['body_mass_g']

# apply standard scaler to X variables
scaler = StandardScaler()
X = scaler.fit_transform(X)

# import linear regression
linear_model = LinearRegression()
linear_reg = linear_model.fit(X,y)

In [13]:
# use joblib to convert the model into pickle
# pickle is a serilized module in python 

# pickle.dump(linear_reg, open('linear_reg.pkl', 'wb'))
joblib.dump(linear_reg, 'linear_reg.pkl')


# open('xgboost_model.pkl', 'wb'): opens a file in write-binary mode (wb), ready to store bytes.
# pickle.dump(...): writes the entire xgboost_model object to that file in binary format.
# xgboost_model.pkl is now a file that contains your trained model and can be shared or deployed.


# load the model for prediction - deserilizing the model
# loaded_red = pickle.load(open('linear_reg.pkl', 'rb'))
model_reg = joblib.load('linear_reg.pkl')


In [14]:
# test flask performnace 
import requests

url = "http://localhost:5000/predict"

data = {"X": [50, 35, 14]}

response = requests.post(url, json=data)
# print(response.json())


In [16]:
print(response.json())

{'Penguin_Body_Mass': 16279.192061836173}


In [15]:
print(response.status_code)
print(response.text)

200
{
  "Penguin_Body_Mass": 16279.192061836173
}

