In [35]:
import pandas as pd
from sqlalchemy import create_engine
import numpy as np
import plotly

from plotly.graph_objs import Bar
from plotly import offline

In [36]:
engine = create_engine('sqlite:///static/db/DisasterRes.db')
df = pd.read_sql_table('DisasterResponse', engine)
print(f"df.shape: {df.shape}")

df.shape: (26215, 40)


#### Genre Bar Plot

In [37]:
df['genre'].value_counts()

news      13054
direct    10766
social     2395
Name: genre, dtype: int64

In [38]:
x = df['genre'].value_counts().index.tolist()
x = [x.title() for x in x]

In [39]:
y = df['genre'].value_counts().values.tolist()

In [40]:
data = [{
    
    'type': 'bar',
    'x': x,
    'y': y
    
}]

In [41]:
my_layout = {
    'title': '',
    'xaxis': {'title': 'Genre'},
    'yaxis': {'title': 'Numbers'},
    
}

fig = {'data': data, 'layout': my_layout}

In [42]:
offline.plot(fig, filename='genre.html')

'genre.html'

#### Distribution of categories

In [50]:
df.iloc[:, -36:].columns.to_list()

cat_names = [x.replace('_', ' ').title() for x in df.iloc[:, -36:].columns.to_list()]

In [51]:
cat_names

['Related',
 'Request',
 'Offer',
 'Aid Related',
 'Medical Help',
 'Medical Products',
 'Search And Rescue',
 'Security',
 'Military',
 'Child Alone',
 'Water',
 'Food',
 'Shelter',
 'Clothing',
 'Money',
 'Missing People',
 'Refugees',
 'Death',
 'Other Aid',
 'Infrastructure Related',
 'Transport',
 'Buildings',
 'Electricity',
 'Tools',
 'Hospitals',
 'Shops',
 'Aid Centers',
 'Other Infrastructure',
 'Weather Related',
 'Floods',
 'Storm',
 'Fire',
 'Earthquake',
 'Cold',
 'Other Weather',
 'Direct Report']

In [57]:
cats_df = df.iloc[:, -36:]

In [58]:
cats_df

Unnamed: 0,related,request,offer,aid_related,medical_help,medical_products,search_and_rescue,security,military,child_alone,...,aid_centers,other_infrastructure,weather_related,floods,storm,fire,earthquake,cold,other_weather,direct_report
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,1,0,0,0,0,0,0,...,0,0,1,0,1,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,1,0,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26210,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
26211,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
26212,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
26213,1,0,0,1,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


In [60]:
count_cats = cats_df.sum().sort_values(ascending=False)

count_cats

related                   20093
aid_related               10860
weather_related            7297
direct_report              5075
request                    4474
other_aid                  3446
food                       2923
earthquake                 2455
storm                      2443
shelter                    2314
floods                     2155
medical_help               2084
infrastructure_related     1705
water                      1672
other_weather              1376
buildings                  1333
medical_products           1313
transport                  1201
death                      1194
other_infrastructure       1151
refugees                    875
military                    860
search_and_rescue           724
money                       604
electricity                 532
cold                        530
security                    471
clothing                    405
aid_centers                 309
missing_people              298
hospitals                   283
fire    

In [62]:
count_cats_k = [x.replace('_', ' ').title() for x in count_cats.index.to_list()]

count_cats_k

['Related',
 'Aid Related',
 'Weather Related',
 'Direct Report',
 'Request',
 'Other Aid',
 'Food',
 'Earthquake',
 'Storm',
 'Shelter',
 'Floods',
 'Medical Help',
 'Infrastructure Related',
 'Water',
 'Other Weather',
 'Buildings',
 'Medical Products',
 'Transport',
 'Death',
 'Other Infrastructure',
 'Refugees',
 'Military',
 'Search And Rescue',
 'Money',
 'Electricity',
 'Cold',
 'Security',
 'Clothing',
 'Aid Centers',
 'Missing People',
 'Hospitals',
 'Fire',
 'Tools',
 'Shops',
 'Offer',
 'Child Alone']

In [69]:
count_cats_v = list(count_cats.values)

count_cats_v

[20093,
 10860,
 7297,
 5075,
 4474,
 3446,
 2923,
 2455,
 2443,
 2314,
 2155,
 2084,
 1705,
 1672,
 1376,
 1333,
 1313,
 1201,
 1194,
 1151,
 875,
 860,
 724,
 604,
 532,
 530,
 471,
 405,
 309,
 298,
 283,
 282,
 159,
 120,
 118,
 0]

In [70]:
cat_data = [{
    
    'type': 'bar',
    'x': count_cats_k,
    'y': count_cats_v
    
}]

cat_layout = {
    'title': '',
    'xaxis': {'title': 'Categories'},
    'yaxis': {'title': 'Numbers'},
    
}

fig = {'data': cat_data, 'layout': cat_layout}

In [71]:
offline.plot(fig, filename='categories.html')

'categories.html'