In [11]:
import json
import plotly
import pandas as pd

from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize

from flask import Flask
from flask import render_template, request, jsonify
from plotly.graph_objs import Bar
from sqlalchemy import create_engine
import joblib

In [12]:
def tokenize(text):
    tokens = word_tokenize(text)
    lemmatizer = WordNetLemmatizer()

    clean_tokens = []
    for tok in tokens:
        clean_tok = lemmatizer.lemmatize(tok).lower().strip()
        clean_tokens.append(clean_tok)

    return clean_tokens

In [13]:
# load data
engine = create_engine('sqlite:///../data/DisasterResponse.db')
df = pd.read_sql_table('DisasterResponse', engine)

# load model
model = joblib.load("../models/classifier.pkl")

In [14]:
genre_counts = df.groupby('genre').count()['message']
genre_names = list(genre_counts.index)

In [16]:
genre_counts

genre
direct    10766
news      13054
social     2396
Name: message, dtype: int64

In [30]:
melted = pd.melt(df, id_vars=['id', 'message', 'original', 'genre', 'related', 'direct_report', 'offer', 'request',
                             'aid_related', 'weather_related', 'other_aid', 'infrastructure_related', 'other_weather'])
melted.groupby('variable').sum()['value'].nlargest(10)

variable
food                2923
earthquake          2455
storm               2443
shelter             2314
floods              2155
medical_help        2084
water               1672
buildings           1333
medical_products    1313
transport           1201
Name: value, dtype: int64

In [20]:
melted

Unnamed: 0,id,message,original,genre,related,variable,value
0,2,Weather update - a cold front from Cuba that c...,Un front froid se retrouve sur Cuba ce matin. ...,direct,1,request,0
1,7,Is the Hurricane over or is it not over,Cyclone nan fini osinon li pa fini,direct,1,request,0
2,8,Looking for someone but no name,"Patnm, di Maryani relem pou li banm nouvel li ...",direct,1,request,0
3,9,UN reports Leogane 80-90 destroyed. Only Hospi...,UN reports Leogane 80-90 destroyed. Only Hospi...,direct,1,request,1
4,12,"says: west side of Haiti, rest of the country ...",facade ouest d Haiti et le reste du pays aujou...,direct,1,request,0
...,...,...,...,...,...,...,...
917555,30261,The training demonstrated how to enhance micro...,,news,0,direct_report,0
917556,30262,A suitable candidate has been selected and OCH...,,news,0,direct_report,0
917557,30263,"Proshika, operating in Cox's Bazar municipalit...",,news,1,direct_report,0
917558,30264,"Some 2,000 women protesting against the conduc...",,news,1,direct_report,0


In [25]:
df.columns

Index(['id', 'message', 'original', 'genre', 'related', 'request', 'offer',
       'aid_related', 'medical_help', 'medical_products', 'search_and_rescue',
       'security', 'military', 'child_alone', 'water', 'food', 'shelter',
       'clothing', 'money', 'missing_people', 'refugees', 'death', 'other_aid',
       'infrastructure_related', 'transport', 'buildings', 'electricity',
       'tools', 'hospitals', 'shops', 'aid_centers', 'other_infrastructure',
       'weather_related', 'floods', 'storm', 'fire', 'earthquake', 'cold',
       'other_weather', 'direct_report'],
      dtype='object')