## Imports, Read data, and Generate Model

In [113]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

df = pd.read_csv("./SalaryData.csv")

train_set, test_set = train_test_split(df, test_size=0.2, random_state=42)

df_copy = train_set.copy()

test_set_full = test_set.copy()
test_set = test_set.drop(["Salary"], axis=1)

train_labels = df_copy["Salary"]

train_set_full = train_set.copy()
train_set = train_set.drop(["Salary"], axis=1)

lin_reg = LinearRegression()

lin_reg.fit(train_set, train_labels)

salary_pred = lin_reg.predict(test_set)

salary_pred

array([ 115790.21011287,   71498.27809463,  102596.86866063,
         75267.80422384,   55477.79204548,   60189.69970699])

## Run Model API

### Persist Model and Data

In [46]:
from sklearn.externals import joblib

In [47]:
joblib.dump(lin_reg, "linear_regression_model.pkl")

joblib.dump(train_set, "training_data.pkl")
joblib.dump(train_labels, "training_labels.pkl")

['training_labels.pkl']

### Predict via API

In [20]:
import requests

data = {"yearsOfExperience": 8}

response = requests.post("http://localhost:5000/predict", json = data)

response.json()

[100712.10559602463]

In [35]:
df_copy.query('YearsExperience > 7 & YearsExperience <= 8')

Unnamed: 0,YearsExperience,Salary
22,7.9,101302.0
21,7.1,98273.0


### Retrain via API

In [101]:
import json

data = json.dumps([{"YearsExperience": 12,"Salary": 140000}, 
                   {"YearsExperience": 12.1,"Salary": 142000}])

data

'[{"YearsExperience": 12, "Salary": 140000}, {"YearsExperience": 12.1, "Salary": 142000}]'

In [102]:
training_set = joblib.load("./training_data.pkl")
training_labels = joblib.load("./training_labels.pkl")

df = pd.read_json(data)
df

Unnamed: 0,Salary,YearsExperience
0,140000,12.0
1,142000,12.1


In [111]:
df_training_set = df.drop(["Salary"], axis=1)
df_training_labels = df["Salary"]

#training_set
df_training_set = pd.concat([training_set, df_training_set])
df_training_labels = pd.concat([training_labels, df_training_labels])

lin_reg = LinearRegression()
lin_reg.fit(df_training_set, df_training_labels)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [112]:
lin_reg.coef_

array([ 9517.91869385])