# Launching Application with Flask

![](http://flask.pocoo.org/static/logo/flask.svg)


### Basic Titanic Model


Here, we use a basic `RandomForestClassifier` to determine the results of the crash.  The goal is to pickle and reuse this later, in our Flask example.

In [27]:
import pandas as pd
import pickle

In [28]:
df = pd.read_csv('data/titanic.csv')

In [29]:
df.head()

Unnamed: 0,survived,pclass,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked
0,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [30]:
df = pd.read_csv('data/titanic.csv')
df['sex'] = df['sex'].apply(lambda x: 0 if x == 'male' else 1)
df = df[['pclass', 'sex', 'age', 'fare', 'sibsp', 'survived']].dropna()

In [31]:
X = df.drop('survived', axis = 1)
y = df['survived']

In [32]:
from sklearn.ensemble import RandomForestClassifier

In [33]:
RandomForestClassifier(n_estimators=100).fit(X, y)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [34]:
PREDICTOR = RandomForestClassifier(n_estimators=100).fit(X, y)

In [35]:
PREDICTOR.predict(X.iloc[4].values.reshape(-1,5))

array([0], dtype=int64)

### Pickling

In [36]:
with open('titanic_rfc.pkl', 'wb') as picklefile:
    pickle.dump(PREDICTOR, picklefile)

In [37]:
with open('titanic_rfc.pkl', 'rb') as picklefile:
     PREDICTOR = pickle.load(picklefile)

### Spam or Ham?

In [15]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB

In [17]:
df = pd.read_csv('data/smsspamcollection.txt', sep='\t', header=None)
df.columns = ['target', 'msg']
y = df['target']
X = df['msg']

In [18]:
df.head()

Unnamed: 0,target,msg
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


In [19]:
cvec = TfidfVectorizer(stop_words='english', max_features = 300)
X = cvec.fit_transform(X)
clf = MultinomialNB()
clf.fit(X, y)

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)

In [20]:
message = 'sup duder, wanna pickle?'
mess = pd.Series(message)
X_new = cvec.transform(mess)
clf.predict(X_new)

array(['ham'], dtype='<U4')

In [21]:
from sklearn.feature_extraction.text import CountVectorizer

In [22]:
grams = CountVectorizer().fit_transform(mess)

In [23]:
grams.data

array([1, 1, 1, 1], dtype=int64)

In [24]:
from nltk.tokenize import word_tokenize
import nltk

In [26]:
tokes = word_tokenize(message)

LookupError: 
**********************************************************************
  Resource [93mpunkt[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('punkt')
  [0m
  Searched in:
    - 'C:\\Users\\pdt/nltk_data'
    - 'C:\\nltk_data'
    - 'D:\\nltk_data'
    - 'E:\\nltk_data'
    - 'C:\\ProgramData\\Anaconda3\\nltk_data'
    - 'C:\\ProgramData\\Anaconda3\\share\\nltk_data'
    - 'C:\\ProgramData\\Anaconda3\\lib\\nltk_data'
    - 'C:\\Users\\pdt\\AppData\\Roaming\\nltk_data'
    - ''
**********************************************************************


In [None]:
tokes