## Load data

In [1]:
import pandas as pd

In [2]:
df = pd.DataFrame({
    'experience'      : [None, None, 'five', 'two', 'seven', 'three', 'ten', 'eleven'],
    'test_score'      : [8,8,6,10,9,7,None,7],
    'interview_score' : [9,6,7,10,6,10,7,8],
    'salary'          : [50000,45000,60000,65000,70000,62000,72000,80000]
})
df

Unnamed: 0,experience,test_score,interview_score,salary
0,,8.0,9,50000
1,,8.0,6,45000
2,five,6.0,7,60000
3,two,10.0,10,65000
4,seven,9.0,6,70000
5,three,7.0,10,62000
6,ten,,7,72000
7,eleven,7.0,8,80000


## Preprocessing

### Word to number

In [6]:
!pip install word2number

Collecting word2number
  Downloading https://files.pythonhosted.org/packages/4a/29/a31940c848521f0725f0df6b25dca8917f13a2025b0e8fcbe5d0457e45e6/word2number-1.1.zip
Building wheels for collected packages: word2number
  Building wheel for word2number (setup.py) ... [?25ldone
[?25h  Stored in directory: /home/myself/.cache/pip/wheels/46/2f/53/5f5c1d275492f2fce1cdab9a9bb12d49286dead829a4078e0e
Successfully built word2number
Installing collected packages: word2number
Successfully installed word2number-1.1


In [3]:
from word2number import w2n

In [4]:
w2n.word_to_num('two thousand')

2000

In [5]:
def word2num(x):
    try:
        k = w2n.word_to_num(x)
        return k
    except:
        print(x + ' is not a number')
    return None

In [6]:
df['experience'] = df['experience'].map(
    lambda x: word2num(x) if not pd.isnull(x) else x)
df

Unnamed: 0,experience,test_score,interview_score,salary
0,,8.0,9,50000
1,,8.0,6,45000
2,5.0,6.0,7,60000
3,2.0,10.0,10,65000
4,7.0,9.0,6,70000
5,3.0,7.0,10,62000
6,10.0,,7,72000
7,11.0,7.0,8,80000


### Missing values

In [7]:
df.isna().sum()

experience         2
test_score         1
interview_score    0
salary             0
dtype: int64

In [8]:
df['experience'].fillna(0, inplace=True)
df['test_score'].fillna(df['test_score'].mean(), inplace=True)
df

Unnamed: 0,experience,test_score,interview_score,salary
0,0.0,8.0,9,50000
1,0.0,8.0,6,45000
2,5.0,6.0,7,60000
3,2.0,10.0,10,65000
4,7.0,9.0,6,70000
5,3.0,7.0,10,62000
6,10.0,7.857143,7,72000
7,11.0,7.0,8,80000


## Train model

In [9]:
X = df.drop('salary', axis=1)
y = df['salary']

print(len(X), len(y))

8 8


In [10]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

In [11]:
model = LinearRegression()
model.fit(X, y)

LinearRegression()

In [12]:
y_pred = model.predict(X)
r2_score(y_pred, y)

0.9626511210293307

In [13]:
vars(model)

{'fit_intercept': True,
 'normalize': False,
 'copy_X': True,
 'n_jobs': None,
 'n_features_in_': 3,
 'coef_': array([2827.63404314, 1912.93803053, 2196.9753141 ]),
 '_residues': 33339855.691710446,
 'rank_': 3,
 'singular_': array([11.40375657,  4.13947032,  3.17037293]),
 'intercept_': 17237.3303137272}

## Save model for deployment

In [14]:
import pickle

In [15]:
with open('model', 'wb') as f:
    pickle.dump(model, f)

In [16]:
with open('model', 'rb') as f:
    model2 = pickle.load(f)
    display(vars(model2))

{'fit_intercept': True,
 'normalize': False,
 'copy_X': True,
 'n_jobs': None,
 'n_features_in_': 3,
 'coef_': array([2827.63404314, 1912.93803053, 2196.9753141 ]),
 '_residues': 33339855.691710446,
 'rank_': 3,
 'singular_': array([11.40375657,  4.13947032,  3.17037293]),
 'intercept_': 17237.3303137272}

## Hello world Flask app

In [17]:
%%writefile helloworld.py

from flask import Flask
app = Flask(__name__)

@app.route('/')
def hello():
    return "Hello World"

if __name__ == "__main__":
    app.run(debug=True, port=8889)

Writing helloworld.py


In [18]:
!python helloworld.py

 * Serving Flask app "helloworld" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on
 * Running on http://127.0.0.1:8889/ (Press CTRL+C to quit)
 * Restarting with stat
 * Debugger is active!
 * Debugger PIN: 273-270-913
127.0.0.1 - - [11/Dec/2020 19:51:27] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [11/Dec/2020 19:51:27] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
^C


Go to http://127.0.0.1:8889/ and check that you get "Hello World".   
Once you're done, press the stop button in Jupyter.

## Deploy model

In [19]:
%%writefile index.html

<!DOCTYPE html>
<html>
  <head>
    <meta charset="UTF-8">
    <title>Salary prediction</title>
    <style>
        body {
            background: #f5f5f5;
        }
        main {
            max-width: 800px;
            margin: 0 auto;
            transform: translateY(30vh);
        }
        form {
            background: white;
            padding: 15px;
            box-shadow: 0 0 5px rgba(0,0,0,0.3);
        }
        form div {
            padding: 10px 0;
        }
        form label {
            width: 150px;
            display: inline-block;
        }
        button {
            background: royalblue;
            padding: 5px 15px;
            color: white;
            border: 0;
            border-radius: 3px;
            cursor: pointer;
        }
        button:hover {
            opacity: 0.8;
        }
        hr {
            border: 1px solid #eee;
            margin: 20px 0;
        }
        .info {
            background: #d9edf7;
            padding: 15px;
            margin-top: 15px;
            border: 1px solid #bce8f1;
        }
        .info:empty {
            display: none;
        }
    </style>
  </head>
  <body>
    <main>
        <form action="{{ url_for('predict') }}" method="POST">
            <div>
                <label for="experience">Experience*</label>
                <input id="experience" name="experience" required
                        type="number" min="0" max="100">
            </div>
            <div>
                <label for="test_score">Test Score*</label>
                <input id="test_score" name="test_score" required
                        type="number" min="0" max="100">
            </div>
            <div>
                <label for="interview_score">Interview Score*</label>
                <input id="interview_score" name="interview_score" required
                        type="number" min="0" max="100">
            </div>
            <hr>
            <button type="submit">Predict</button>
        </form>
        <div class="info">{{ prediction }}</div>
    </main>
  </body>
</html>

Overwriting index.html


In [20]:
%%writefile app.py

from flask import Flask, request, render_template
import numpy as np
import pickle

app   = Flask(__name__, template_folder='.')
model = pickle.load(open('model', 'rb'))

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/', methods=['POST'])
def predict():
    post = dict(request.form)

    features = [np.array(list(post.values())).astype('int')]
    predict  = int(round(model.predict(features)[0], -2))

    return render_template('index.html',
                prediction='Predicted Salary: ${:,}'.format(predict).replace(',', ' '))

if __name__ == "__main__":
    app.run(debug=True, port=8889)

Overwriting app.py


In [21]:
!python app.py

 * Serving Flask app "app" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on
 * Running on http://127.0.0.1:8889/ (Press CTRL+C to quit)
 * Restarting with stat
 * Debugger is active!
 * Debugger PIN: 130-824-805
127.0.0.1 - - [11/Dec/2020 19:52:05] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [11/Dec/2020 19:52:15] "[37mPOST / HTTP/1.1[0m" 200 -
^C
