# <div align = "center"><h1>NLP with Disaster Tweets</h3></div>
<img src = "https://www.thestatesman.com/wp-content/uploads/2022/05/54a547740a8b1bf4b5960fcbfba825e4-1.jpg" style = "width: 100%">

# Introduction
<div class = "alert alert-info">
<b>Natural Language Processing (NLP)</b> enables computers to understand natural languages as humans do. Whether the language is spoken or written, natural language processing uses artificial intelligence to take real-world inputs, process it, and make sense of it in a way a computer can understand. In this kernel we'll explore classification of tweets as disaster or non-disaster using <b> Hugging Face 🤗 Transformers</b> and <b>BERT</b>. Founded in 2016, Hugging Face 🤗 evolved from a developer of natural language processing (NLP) technology into an open-source library and community platform where popular NLP models such as <b>BERT, GPT-2, T5 and DistilBERT</b> are available.    
</div>

In [None]:
!pip install text-hammer

# Imports

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import missingno as msno
from wordcloud import WordCloud

from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
import folium 
from folium import plugins 

import re
import string
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from collections import Counter
from collections import defaultdict
import text_hammer as th

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, Input, Model
from keras.callbacks import ReduceLROnPlateau
from keras.losses import BinaryCrossentropy
from tensorflow.keras.utils import plot_model

import transformers
from transformers import AutoTokenizer, DataCollatorWithPadding
from transformers import AutoModelForSequenceClassification, TrainingArguments
from transformers import TFBertModel, Trainer
from tokenizers import Tokenizer
from transformers import BertTokenizer

import warnings
warnings.filterwarnings('ignore')

# Color Scheme
Setting the color scheme for the notebook

In [None]:
custom_colors = ['#000000', '#E31E33', '#4A53E1', '#F5AD02', '#94D5EA', '#F6F8F7']
custom_palette = sns.set_palette(sns.color_palette(custom_colors))
sns.palplot(sns.color_palette(custom_colors), size = 1)
plt.tick_params(axis = 'both', labelsize = 0, length = 0)

# Check For GPU

In [None]:
def kaggle_gpu():
    device_name = tf.test.gpu_device_name()

    if 'GPU' not in device_name:
        print('GPU device not found')
    else:
        print('Found GPU at: {}'.format(device_name))
    
    if  tf.config.list_physical_devices('GPU'):
        print('GPU availabe')
    else:
        print('GPU not available')

kaggle_gpu()

Looking at the input files

In [None]:
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

Reading the dataframe

In [None]:
df = pd.read_csv('/kaggle/input/nlp-getting-started/train.csv')
test_df = pd.read_csv('/kaggle/input/nlp-getting-started/test.csv')
df

In [None]:
print(df.isna().sum())
print('----------------------------')
print('Total Missing Values: ', df.isna().sum().sum())
print('----------------------------')

Most of the missing values in our dataframe are from the `location` and `keyword` columns

# EDA (Exploratory Data Analysis)

Visualizing the missing data in the form of a chart

In [None]:
plt.figure(figsize = (15, 10))
sns.heatmap(df.isna(), yticklabels = False, cbar = False, cmap = 'afmhot')
plt.title("Visualizing the Missing Data", fontsize = 20)
plt.xticks(rotation = 35, fontsize = 15)
plt.show()

Bar graph representation of the missing values

In [None]:
msno.bar(df, color = (0, 0, 0), sort = "ascending", figsize = (15, 10))
plt.show()

Lets take a look at the class distribution of our dataset

In [None]:
plt.figure(figsize = (15, 12))
ax = plt.axes()
ax.set_facecolor('black')
ax = sns.countplot(x = 'target', data = df, palette = [custom_colors[2], custom_colors[1]], edgecolor = 'white', linewidth = 1.2)
plt.title('Disaster Count', fontsize = 25)
plt.xlabel('Disaster', fontsize = 20)
plt.ylabel('Count', fontsize = 20)
ax.xaxis.set_tick_params(labelsize = 15)
ax.yaxis.set_tick_params(labelsize = 15)
bbox_args = dict(boxstyle = 'round', fc = '0.9')
for p in ax.patches:
        ax.annotate('{:.0f} = {:.2f}%'.format(p.get_height(), (p.get_height() / len(df['target'])) * 100), (p.get_x() + 0.25, p.get_height() + 60), 
                   color = 'black',
                   bbox = bbox_args,
                   fontsize = 15)
plt.show()

There is a class imbalance in the dataset, with 4342 non-disaster tweets and 3271 disaster tweets.

Let's take a look at where most of the tweets in our dataset come from:

In [None]:
df['location'].value_counts()[:10]

Bar chart representation of the locations from where the highest number of tweets originate

In [None]:
plt.figure(figsize = (15, 13))
ax = plt.axes()
ax.set_facecolor('black')
ax = ((df.location.value_counts())[:10]).plot(kind = 'bar', color = custom_colors[2], linewidth = 2, edgecolor = 'white')
plt.title('Location Count', fontsize = 30)
plt.xlabel('Location', fontsize = 25)
plt.ylabel('Count', fontsize = 25)
ax.xaxis.set_tick_params(labelsize = 15, rotation = 30)
ax.yaxis.set_tick_params(labelsize = 15)
bbox_args = dict(boxstyle = 'round', fc = '0.9')
for p in ax.patches:
        ax.annotate('{:.0f}'.format(p.get_height()), (p.get_x() + 0.15, p.get_height() + 2),
                   bbox = bbox_args,
                   color = custom_colors[2],
                   fontsize = 15)

Visualizing the top 10 locations from where most of the tweets come from

In [None]:
new_df = pd.DataFrame()
new_df['location'] = ((df['location'].value_counts())[:10]).index
new_df['count'] = ((df['location'].value_counts())[:10]).values
geolocator = Nominatim(user_agent = 'Rahil')
geocode = RateLimiter(geolocator.geocode, min_delay_seconds = 0.5)
lat = {}
long = {}
for i in new_df['location']:
    location = geocode(i)
    lat[i] = location.latitude
    long[i] = location.longitude
new_df['latitude'] = new_df['location'].map(lat)
new_df['longitude'] = new_df['location'].map(long)
map = folium.Map(location = [10.0, 10.0], tiles = 'CartoDB dark_matter', zoom_start = 1.5)
markers = []
title = '''<h1 align = "center" style = "font-size: 35px"><b>Top 10 Tweet Locations</b></h1>'''
for i, r in new_df.iterrows():
    loss = r['count']
    if r['count'] > 0:
        counts = r['count'] * 0.4
        folium.CircleMarker([float(r['latitude']), float(r['longitude'])], radius = float(counts), color = custom_colors[1], fill = True).add_to(map)
map.get_root().html.add_child(folium.Element(title))
map

In [None]:
non_list_stopwords = stopwords.words('english')
stopwords = list(stopwords.words('english'))
stopwords[:10]

In [None]:
non_disaster_tweets_length = (df[df['target'] == 0])['text'].str.len()
disaster_tweets_length = (df[df['target'] == 1])['text'].str.len()
print(non_disaster_tweets_length)
print(disaster_tweets_length)

In [None]:
fig, axes = plt.subplots(1, 2, figsize = (30, 15))
fig.suptitle('Tweet Character Length', fontsize = 45)

axes[0].set_facecolor('black')
axes[0].hist(non_disaster_tweets_length, color = custom_colors[1], edgecolor = 'white', linewidth = 4)
axes[0].set_title('Non-Disaster Tweets', fontsize = 40)
axes[0].set_xlabel('Character Length', fontsize = 35)
axes[0].set_ylabel('Frequency', fontsize = 35)
axes[0].xaxis.set_tick_params(labelsize = 30)
axes[0].yaxis.set_tick_params(labelsize = 30)

axes[1].set_facecolor('black')
axes[1].hist(disaster_tweets_length, color = custom_colors[2], edgecolor = 'white', linewidth = 4)
axes[1].set_title('Disaster Tweets', fontsize = 40)
axes[1].set_xlabel('Character Length', fontsize = 35)
axes[1].set_ylabel('Frequency', fontsize = 35)
axes[1].xaxis.set_tick_params(labelsize = 30)
axes[1].yaxis.set_tick_params(labelsize = 30)

plt.subplots_adjust(wspace = 0.25, hspace = 0.1)
plt.show()

In [None]:
class tweet_functions:
    
    '''Getting the count of different attributes of our tweets'''
    
    def __init__(self, column):
        self.column = column
        
    def count_characters(self):
        return((self.column).apply(lambda word: len(str(word))))
    
    def count_words(self):
        return((self.column).apply(lambda word: len(str(word).split())))
    
    def count_urls(self):
        return((self.column).apply(lambda word: len([url for url in str(word).lower().split() if 'http' in word or 'https' in word])))
    
    def count_hashtags(self):
        return((self.column).apply(lambda word: len([hashtag for hashtag in str(word) if '#' in hashtag])))
    
    def count_tags(self):
        return((self.column).apply(lambda word: len([tag for tag in str(word) if '@' in tag])))
    
    def count_stopwords(self):
        return((self.column).apply(lambda word: len([word for word in str(word).lower().split() if word in stopwords])))

In [None]:
fig, axes = plt.subplots(nrows = 3, ncols = 2, figsize = (30, 30))

axes[0][0].set_facecolor('black')
sns.distplot(tweet_functions((df[df['target'] == 0])['text']).count_characters(), ax = axes[0][0], color = custom_colors[3], label = 'Non-Disaster Tweets', kde_kws = dict(linewidth = 3.5))
sns.distplot(tweet_functions((df[df['target'] == 1])['text']).count_characters(), ax = axes[0][0], color = custom_colors[4], label = 'Disaster Tweets', kde_kws = dict(linewidth = 3.5))
axes[0][0].set_title('Character Count', fontsize = 45)
axes[0][0].set_xlabel('Characters', fontsize = 40)
axes[0][0].set_ylabel('Density', fontsize = 40)
axes[0][0].xaxis.set_tick_params(labelsize = 30)
axes[0][0].yaxis.set_tick_params(labelsize = 30)
axes[0][0].legend(facecolor = 'black', labelcolor = 'white', prop = {'size': 25}).get_frame().set_linewidth(2.5)

axes[0][1].set_facecolor('black')
sns.distplot(tweet_functions((df[df['target'] == 0])['text']).count_words(), ax = axes[0][1], color = custom_colors[3], label = 'Non-Disaster Tweets', kde_kws = dict(linewidth = 3.5))
sns.distplot(tweet_functions((df[df['target'] == 1])['text']).count_words(), ax = axes[0][1], color = custom_colors[4], label = 'Disaster Tweets', kde_kws = dict(linewidth = 3.5))
axes[0][1].set_title('Word Count', fontsize = 45)
axes[0][1].set_xlabel('Words', fontsize = 40)
axes[0][1].set_ylabel('Density', fontsize = 40)
axes[0][1].xaxis.set_tick_params(labelsize = 30)
axes[0][1].yaxis.set_tick_params(labelsize = 30)
axes[0][1].legend(facecolor = 'black', labelcolor = 'white', prop = {'size': 25}).get_frame().set_linewidth(2.5)

axes[1][0].set_facecolor('black')
sns.distplot(tweet_functions((df[df['target'] == 0])['text']).count_urls(), ax = axes[1][0], color = custom_colors[3], label = 'Non-Disaster Tweets', kde_kws = dict(linewidth = 3.5))
sns.distplot(tweet_functions((df[df['target'] == 1])['text']).count_urls(), ax = axes[1][0], color = custom_colors[4], label = 'Disaster Tweets', kde_kws = dict(linewidth = 3.5))
axes[1][0].set_title('URL Count', fontsize = 45)
axes[1][0].set_xlabel('URLs', fontsize = 40)
axes[1][0].set_ylabel('Density', fontsize = 40)
axes[1][0].xaxis.set_tick_params(labelsize = 30)
axes[1][0].yaxis.set_tick_params(labelsize = 30)
axes[1][0].legend(facecolor = 'black', labelcolor = 'white', prop = {'size': 25}).get_frame().set_linewidth(2.5)

axes[1][1].set_facecolor('black')
sns.distplot(tweet_functions((df[df['target'] == 0])['text']).count_hashtags(), ax = axes[1][1], color = custom_colors[3], label = 'Non-Disaster Tweets', kde_kws = dict(linewidth = 3.5))
sns.distplot(tweet_functions((df[df['target'] == 1])['text']).count_hashtags(), ax = axes[1][1], color = custom_colors[4], label = 'Disaster Tweets', kde_kws = dict(linewidth = 3.5))
axes[1][1].set_title('Hashtag Count', fontsize = 45)
axes[1][1].set_xlabel('Hashtags', fontsize = 40)
axes[1][1].set_ylabel('Density', fontsize = 40)
axes[1][1].xaxis.set_tick_params(labelsize = 30)
axes[1][1].yaxis.set_tick_params(labelsize = 30)
axes[1][1].legend(facecolor = 'black', labelcolor = 'white', prop = {'size': 25}).get_frame().set_linewidth(2.5)

axes[2][0].set_facecolor('black')
sns.distplot(tweet_functions((df[df['target'] == 0])['text']).count_tags(), ax = axes[2][0], color = custom_colors[3], label = 'Non-Disaster Tweets', kde_kws = dict(linewidth = 3.5))
sns.distplot(tweet_functions((df[df['target'] == 1])['text']).count_tags(), ax = axes[2][0], color = custom_colors[4], label = 'Disaster Tweets', kde_kws = dict(linewidth = 3.5))
axes[2][0].set_title('Mention Count', fontsize = 45)
axes[2][0].set_xlabel('Mentions', fontsize = 40)
axes[2][0].set_ylabel('Density', fontsize = 40)
axes[2][0].xaxis.set_tick_params(labelsize = 30)
axes[2][0].yaxis.set_tick_params(labelsize = 30)
axes[2][0].legend(facecolor = 'black', labelcolor = 'white', prop = {'size': 25}).get_frame().set_linewidth(2.5)

axes[2][1].set_facecolor('black')
sns.distplot(tweet_functions((df[df['target'] == 0])['text']).count_stopwords(), ax = axes[2][1], color = custom_colors[3], label = 'Non-Disaster Tweets', kde_kws = dict(linewidth = 3.5))
sns.distplot(tweet_functions((df[df['target'] == 1])['text']).count_stopwords(), ax = axes[2][1], color = custom_colors[4], label = 'Disaster Tweets', kde_kws = dict(linewidth = 3.5))
axes[2][1].set_title('Stopword Count', fontsize = 45)
axes[2][1].set_xlabel('Stopwords', fontsize = 40)
axes[2][1].set_ylabel('Density', fontsize = 40)
axes[2][1].xaxis.set_tick_params(labelsize = 30)
axes[2][1].yaxis.set_tick_params(labelsize = 30)
axes[2][1].legend(facecolor = 'black', labelcolor = 'white', prop = {'size': 25}).get_frame().set_linewidth(2.5)

plt.subplots_adjust(hspace = 0.5)
plt.show()

# Preprocessing the Tweets

In [None]:
def remove_urls(text):
    urls = re.compile(r'https?://\S+|www\.\S+')
    return urls.sub(r'', text)

def remove_HTML(text):
    html = re.compile('<.*?>')
    return html.sub(r'', text)

def remove_emoji(text):
    emoji_pattern = re.compile('['
                           u'\U0001F600-\U0001F64F'
                           u'\U0001F300-\U0001F5FF'
                           u'\U0001F680-\U0001F6FF'
                           u'\U0001F1E0-\U0001F1FF'
                           u'\U00002702-\U000027B0'
                           u'\U000024C2-\U0001F251'
                           ']+', flags=re.UNICODE)
    return emoji_pattern.sub(r'', text)

def remove_emoticons(text):
    emoticons = {
    u":‑\)":"Happy face or smiley",
    u":\)":"Happy face or smiley",
    u":-\]":"Happy face or smiley",
    u":\]":"Happy face or smiley",
    u":-3":"Happy face smiley",
    u":3":"Happy face smiley",
    u":->":"Happy face smiley",
    u":>":"Happy face smiley",
    u"8-\)":"Happy face smiley",
    u":o\)":"Happy face smiley",
    u":-\}":"Happy face smiley",
    u":\}":"Happy face smiley",
    u":-\)":"Happy face smiley",
    u":c\)":"Happy face smiley",
    u":\^\)":"Happy face smiley",
    u"=\]":"Happy face smiley",
    u"=\)":"Happy face smiley",
    u":‑D":"Laughing, big grin or laugh with glasses",
    u":D":"Laughing, big grin or laugh with glasses",
    u"8‑D":"Laughing, big grin or laugh with glasses",
    u"8D":"Laughing, big grin or laugh with glasses",
    u"X‑D":"Laughing, big grin or laugh with glasses",
    u"XD":"Laughing, big grin or laugh with glasses",
    u"=D":"Laughing, big grin or laugh with glasses",
    u"=3":"Laughing, big grin or laugh with glasses",
    u"B\^D":"Laughing, big grin or laugh with glasses",
    u":-\)\)":"Very happy",
    u":‑\(":"Frown, sad, andry or pouting",
    u":-\(":"Frown, sad, andry or pouting",
    u":\(":"Frown, sad, andry or pouting",
    u":‑c":"Frown, sad, andry or pouting",
    u":c":"Frown, sad, andry or pouting",
    u":‑<":"Frown, sad, andry or pouting",
    u":<":"Frown, sad, andry or pouting",
    u":‑\[":"Frown, sad, andry or pouting",
    u":\[":"Frown, sad, andry or pouting",
    u":-\|\|":"Frown, sad, andry or pouting",
    u">:\[":"Frown, sad, andry or pouting",
    u":\{":"Frown, sad, andry or pouting",
    u":@":"Frown, sad, andry or pouting",
    u">:\(":"Frown, sad, andry or pouting",
    u":'‑\(":"Crying",
    u":'\(":"Crying",
    u":'‑\)":"Tears of happiness",
    u":'\)":"Tears of happiness",
    u"D‑':":"Horror",
    u"D:<":"Disgust",
    u"D:":"Sadness",
    u"D8":"Great dismay",
    u"D;":"Great dismay",
    u"D=":"Great dismay",
    u"DX":"Great dismay",
    u":‑O":"Surprise",
    u":O":"Surprise",
    u":‑o":"Surprise",
    u":o":"Surprise",
    u":-0":"Shock",
    u"8‑0":"Yawn",
    u">:O":"Yawn",
    u":-\*":"Kiss",
    u":\*":"Kiss",
    u":X":"Kiss",
    u";‑\)":"Wink or smirk",
    u";\)":"Wink or smirk",
    u"\*-\)":"Wink or smirk",
    u"\*\)":"Wink or smirk",
    u";‑\]":"Wink or smirk",
    u";\]":"Wink or smirk",
    u";\^\)":"Wink or smirk",
    u":‑,":"Wink or smirk",
    u";D":"Wink or smirk",
    u":‑P":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u":P":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u"X‑P":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u"XP":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u":‑Þ":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u":Þ":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u":b":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u"d:":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u"=p":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u">:P":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u":‑/":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u":/":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u":-[.]":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u">:[(\\\)]":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u">:/":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u":[(\\\)]":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u"=/":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u"=[(\\\)]":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u":L":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u"=L":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u":S":"Skeptical, annoyed, undecided, uneasy or hesitant",
    u":‑\|":"Straight face",
    u":\|":"Straight face",
    u":$":"Embarrassed or blushing",
    u":‑x":"Sealed lips or wearing braces or tongue-tied",
    u":x":"Sealed lips or wearing braces or tongue-tied",
    u":‑#":"Sealed lips or wearing braces or tongue-tied",
    u":#":"Sealed lips or wearing braces or tongue-tied",
    u":‑&":"Sealed lips or wearing braces or tongue-tied",
    u":&":"Sealed lips or wearing braces or tongue-tied",
    u"O:‑\)":"Angel, saint or innocent",
    u"O:\)":"Angel, saint or innocent",
    u"0:‑3":"Angel, saint or innocent",
    u"0:3":"Angel, saint or innocent",
    u"0:‑\)":"Angel, saint or innocent",
    u"0:\)":"Angel, saint or innocent",
    u":‑b":"Tongue sticking out, cheeky, playful or blowing a raspberry",
    u"0;\^\)":"Angel, saint or innocent",
    u">:‑\)":"Evil or devilish",
    u">:\)":"Evil or devilish",
    u"\}:‑\)":"Evil or devilish",
    u"\}:\)":"Evil or devilish",
    u"3:‑\)":"Evil or devilish",
    u"3:\)":"Evil or devilish",
    u">;\)":"Evil or devilish",
    u"\|;‑\)":"Cool",
    u"\|‑O":"Bored",
    u":‑J":"Tongue-in-cheek",
    u"#‑\)":"Party all night",
    u"%‑\)":"Drunk or confused",
    u"%\)":"Drunk or confused",
    u":-###..":"Being sick",
    u":###..":"Being sick",
    u"<:‑\|":"Dump",
    u"\(>_<\)":"Troubled",
    u"\(>_<\)>":"Troubled",
    u"\(';'\)":"Baby",
    u"\(\^\^>``":"Nervous or Embarrassed or Troubled or Shy or Sweat drop",
    u"\(\^_\^;\)":"Nervous or Embarrassed or Troubled or Shy or Sweat drop",
    u"\(-_-;\)":"Nervous or Embarrassed or Troubled or Shy or Sweat drop",
    u"\(~_~;\) \(・\.・;\)":"Nervous or Embarrassed or Troubled or Shy or Sweat drop",
    u"\(-_-\)zzz":"Sleeping",
    u"\(\^_-\)":"Wink",
    u"\(\(\+_\+\)\)":"Confused",
    u"\(\+o\+\)":"Confused",
    u"\(o\|o\)":"Ultraman",
    u"\^_\^":"Joyful",
    u"\(\^_\^\)/":"Joyful",
    u"\(\^O\^\)／":"Joyful",
    u"\(\^o\^\)／":"Joyful",
    u"\(__\)":"Kowtow as a sign of respect, or dogeza for apology",
    u"_\(\._\.\)_":"Kowtow as a sign of respect, or dogeza for apology",
    u"<\(_ _\)>":"Kowtow as a sign of respect, or dogeza for apology",
    u"<m\(__\)m>":"Kowtow as a sign of respect, or dogeza for apology",
    u"m\(__\)m":"Kowtow as a sign of respect, or dogeza for apology",
    u"m\(_ _\)m":"Kowtow as a sign of respect, or dogeza for apology",
    u"\('_'\)":"Sad or Crying",
    u"\(/_;\)":"Sad or Crying",
    u"\(T_T\) \(;_;\)":"Sad or Crying",
    u"\(;_;":"Sad of Crying",
    u"\(;_:\)":"Sad or Crying",
    u"\(;O;\)":"Sad or Crying",
    u"\(:_;\)":"Sad or Crying",
    u"\(ToT\)":"Sad or Crying",
    u";_;":"Sad or Crying",
    u";-;":"Sad or Crying",
    u";n;":"Sad or Crying",
    u";;":"Sad or Crying",
    u"Q\.Q":"Sad or Crying",
    u"T\.T":"Sad or Crying",
    u"QQ":"Sad or Crying",
    u"Q_Q":"Sad or Crying",
    u"\(-\.-\)":"Shame",
    u"\(-_-\)":"Shame",
    u"\(一一\)":"Shame",
    u"\(；一_一\)":"Shame",
    u"\(=_=\)":"Tired",
    u"\(=\^\·\^=\)":"cat",
    u"\(=\^\·\·\^=\)":"cat",
    u"=_\^=	":"cat",
    u"\(\.\.\)":"Looking down",
    u"\(\._\.\)":"Looking down",
    u"\^m\^":"Giggling with hand covering mouth",
    u"\(\・\・?":"Confusion",
    u"\(?_?\)":"Confusion",
    u">\^_\^<":"Normal Laugh",
    u"<\^!\^>":"Normal Laugh",
    u"\^/\^":"Normal Laugh",
    u"\（\*\^_\^\*）" :"Normal Laugh",
    u"\(\^<\^\) \(\^\.\^\)":"Normal Laugh",
    u"\(^\^\)":"Normal Laugh",
    u"\(\^\.\^\)":"Normal Laugh",
    u"\(\^_\^\.\)":"Normal Laugh",
    u"\(\^_\^\)":"Normal Laugh",
    u"\(\^\^\)":"Normal Laugh",
    u"\(\^J\^\)":"Normal Laugh",
    u"\(\*\^\.\^\*\)":"Normal Laugh",
    u"\(\^—\^\）":"Normal Laugh",
    u"\(#\^\.\^#\)":"Normal Laugh",
    u"\（\^—\^\）":"Waving",
    u"\(;_;\)/~~~":"Waving",
    u"\(\^\.\^\)/~~~":"Waving",
    u"\(-_-\)/~~~ \($\·\·\)/~~~":"Waving",
    u"\(T_T\)/~~~":"Waving",
    u"\(ToT\)/~~~":"Waving",
    u"\(\*\^0\^\*\)":"Excited",
    u"\(\*_\*\)":"Amazed",
    u"\(\*_\*;":"Amazed",
    u"\(\+_\+\) \(@_@\)":"Amazed",
    u"\(\*\^\^\)v":"Laughing,Cheerful",
    u"\(\^_\^\)v":"Laughing,Cheerful",
    u"\(\(d[-_-]b\)\)":"Headphones,Listening to music",
    u'\(-"-\)':"Worried",
    u"\(ーー;\)":"Worried",
    u"\(\^0_0\^\)":"Eyeglasses",
    u"\(\＾ｖ\＾\)":"Happy",
    u"\(\＾ｕ\＾\)":"Happy",
    u"\(\^\)o\(\^\)":"Happy",
    u"\(\^O\^\)":"Happy",
    u"\(\^o\^\)":"Happy",
    u"\)\^o\^\(":"Happy",
    u":O o_O":"Surprised",
    u"o_0":"Surprised",
    u"o\.O":"Surpised",
    u"\(o\.o\)":"Surprised",
    u"oO":"Surprised",
    u"\(\*￣m￣\)":"Dissatisfied",
    u"\(‘A`\)":"Snubbed or Deflated"
    }
    emoticon_pattern = re.compile(u'(' + u'|'.join(k for k in emoticons) + u')')
    return emoticon_pattern.sub(r'', text)

def remove_mentions(text):
    mentions = re.compile('@[A-Za-z0-9_]+')
    return mentions.sub(r'', text)

def word_lemmatizer(text):
    lemmatizer = WordNetLemmatizer()
    return ' '.join([lemmatizer.lemmatize(word) for word in text.split()])

In [None]:
df['text'] = df['text'].str.lower() # convert to lowercase
df['text'] = df['text'].apply(lambda text: remove_urls(text)) # remove URLs
df['text'] = df['text'].apply(lambda text: remove_HTML(text)) # remove HTML tags
df['text'] = df['text'].str.translate(str.maketrans('', '', string.punctuation)) # remove punctuations
df['text'] = df['text'].apply(lambda text: ' '.join([word for word in str(text).split() if word not in stopwords])) # remove stopwords
df['text'] = df['text'].apply(lambda text: remove_emoji(text)) # remove emojis
df['text'] = df['text'].apply(lambda text: remove_emoticons(text)) # remove emoticons
df['text'] = df['text'].apply(lambda text: remove_mentions(text)) # remove mentions
df['text'] = df['text'].apply(lambda text: word_lemmatizer(text)) # lemmatize words
df['text'] = df['text'].apply(lambda text: th.cont_exp(text)) # convert i'm to i am, you're to you are, etc
print(df['text'])

test_df['text'] = test_df['text'].str.lower() # convert to lowercase
test_df['text'] = test_df['text'].apply(lambda text: remove_urls(text)) # remove URLs
test_df['text'] = test_df['text'].apply(lambda text: remove_HTML(text)) # remove HTML tags
test_df['text'] = test_df['text'].str.translate(str.maketrans('', '', string.punctuation)) # remove punctuations
test_df['text'] = test_df['text'].apply(lambda text: ' '.join([word for word in str(text).split() if word not in stopwords])) # remove stopwords
test_df['text'] = test_df['text'].apply(lambda text: remove_emoji(text)) # remove emojis
test_df['text'] = test_df['text'].apply(lambda text: remove_emoticons(text)) # remove emoticons
test_df['text'] = test_df['text'].apply(lambda text: remove_mentions(text)) # remove mentions
test_df['text'] = test_df['text'].apply(lambda text: word_lemmatizer(text)) # lemmatize words
test_df['text'] = test_df['text'].apply(lambda text: th.cont_exp(text)) # convert i'm to i am, you're to you are, etc
print(test_df['text'])

In [None]:
counter = Counter()
for text in df['text'].values:
    for word in text.split():
        counter[word] += 1
counter.most_common(10)

In [None]:
data = dict(sorted(counter.items(), key = lambda x: x[1] ,reverse = True)[:10])
words = list(data.keys())
frequency = list(data.values())

fig, ax = plt.subplots(nrows = 1, ncols = 1, figsize = (15, 15))
ax.set_facecolor('black')
ax = sns.barplot(x = frequency, y = words, color = '#8699A7', edgecolor = 'white', linewidth = 2)
plt.title('Word Frequency', fontsize = 35)
plt.xlabel('Frequency', fontsize = 30)
plt.ylabel('Words', fontsize = 30)
plt.xticks(size = 20)
plt.yticks(size = 20)
bbox_args = dict(boxstyle = 'round', fc = '0.9')
for p in ax.patches:
    width = p.get_width()
    plt.text(9.5 + p.get_width(), p.get_y() + 0.5 * p.get_height(), '{:1.0f}'.format(width), 
             ha = 'center', 
             va = 'center', 
             color = 'black', 
             bbox = bbox_args, 
             fontsize = 15)
plt.show()

# Creating Ngrams
In extremely simple terms an `ngram` is a sequence of `n` words. Lets take an example: `This is a sentence`. 

<img src = "https://images.deepai.org/glossary-terms/867de904ba9b46869af29cead3194b6c/8ARA1.png">

Based on the value of `n` we can generate different `ngrams` as follows:
> - **N = 1 (Unigrams)**: `This`, `is`, `a`, `sentence`
> - **N = 2 (Bigrams)**: `This is`, `is a`, `a sentence`
> - **N = 3 (Trigrams)**: `This is a`, `is a sentence`

Ngrams find their applications in auto completion of sentences, speech recognition, machine translation and predictive text inputs.

In [None]:
def generate_ngrams(text, n_gram = 0):
    token = [token for token in text.lower().split(' ') if token != '' if token not in non_list_stopwords]
    ngrams = zip(*[token[i:] for i in range(n_gram)])
    return [' '.join(ngram) for ngram in ngrams]

def generate_ngram_dictionaries(n_gram):
    non_disaster_ngrams = defaultdict(int)
    disaster_ngrams = defaultdict(int)

    for tweet in df[df['target'] == 0]['text']:
        for word in generate_ngrams(tweet, n_gram = n_gram):
            non_disaster_ngrams[word] += 1

    for tweet in df[df['target'] == 1]['text']:
        for word in generate_ngrams(tweet, n_gram = n_gram):
            disaster_ngrams[word] += 1

    non_disaster_ngram_data = dict(sorted(non_disaster_ngrams.items(), key = lambda x: x[1], reverse = True)[:10])
    non_disaster_ngram_words = list(non_disaster_ngram_data.keys())
    non_disaster_ngram_frequency = list(non_disaster_ngram_data.values())

    disaster_ngram_data = dict(sorted(disaster_ngrams.items(), key = lambda x: x[1], reverse = True)[:10])
    disaster_ngram_words = list(disaster_ngram_data.keys())
    disaster_ngram_frequency = list(disaster_ngram_data.values())
    
    return non_disaster_ngram_data, non_disaster_ngram_words, non_disaster_ngram_frequency, disaster_ngram_data, disaster_ngram_words, disaster_ngram_frequency

def create_ngram_graphs(non_disaster_ngram_words, non_disaster_ngram_frequency, disaster_ngram_words, disaster_ngram_frequency, n_gram):
    fig, axes = plt.subplots(nrows = 1, ncols = 2, figsize = (30, 20))

    axes[0].set_facecolor('black')
    sns.barplot(x = non_disaster_ngram_frequency, y = non_disaster_ngram_words, ax = axes[0], color = '#0057B1', edgecolor = 'white', linewidth = 2)
    if(n_gram == 1):
        axes[0].set_title('Non-Disaster Unigrams', fontsize = 45)
    if(n_gram == 2):
        axes[0].set_title('Non-Disaster Bigrams', fontsize = 45)
    if(n_gram == 3):
        axes[0].set_title('Non-Disaster Trigrams', fontsize = 45)
    axes[0].set_xlabel('Count', fontsize = 40)
    axes[0].set_ylabel('Words', fontsize = 40)
    if(n_gram == 1):
        axes[0].xaxis.set_tick_params(labelsize = 30)
        axes[0].yaxis.set_tick_params(labelsize = 30)
    elif(n_gram == 2):
        axes[0].xaxis.set_tick_params(labelsize = 20)
        axes[0].yaxis.set_tick_params(labelsize = 20)
    else:
        axes[0].xaxis.set_tick_params(labelsize = 18)
        axes[0].yaxis.set_tick_params(labelsize = 18)
    for p in axes[0].patches:
        width = p.get_width()
        if(n_gram == 1 or n_gram == 2):
            axes[0].text(0.75 + p.get_width(), p.get_y() + 0.5 * p.get_height(), '{:1.0f}'.format(width), 
                     ha = 'center', 
                     va = 'center', 
                     color = 'blue', 
                     bbox = bbox_args, 
                     fontsize = 25)
        if(n_gram == 3):
            axes[0].text(0.6 + p.get_width(), p.get_y() + 0.5 * p.get_height(), '{:1.0f}'.format(width), 
                     ha = 'center', 
                     va = 'center', 
                     color = 'blue', 
                     bbox = bbox_args, 
                     fontsize = 22)

    axes[1].set_facecolor('black')
    sns.barplot(x = disaster_ngram_frequency, y = disaster_ngram_words, ax = axes[1], palette = [custom_colors[1]], edgecolor = 'white', linewidth = 2)
    if(n_gram == 1):
        axes[1].set_title('Disaster Unigrams', fontsize = 45)
    if(n_gram == 2):
        axes[1].set_title('Disaster Bigrams', fontsize = 45)
    if(n_gram == 3):
        axes[1].set_title('Disaster Trigrams', fontsize = 45)
    axes[1].set_xlabel('Count', fontsize = 40)
    axes[1].set_ylabel('Words', fontsize = 40)
    if(n_gram == 1):
        axes[1].xaxis.set_tick_params(labelsize = 30)
        axes[1].yaxis.set_tick_params(labelsize = 30)
    elif(n_gram == 2):
        axes[1].xaxis.set_tick_params(labelsize = 20)
        axes[1].yaxis.set_tick_params(labelsize = 20)
    else:
        axes[1].xaxis.set_tick_params(labelsize = 18)
        axes[1].yaxis.set_tick_params(labelsize = 18)
    for p in axes[1].patches:
        width = p.get_width()
        if(n_gram == 1 or n_gram == 2):
            axes[1].text(0.8 + p.get_width(), p.get_y() + 0.5 * p.get_height(), '{:1.0f}'.format(width), 
                     ha = 'center', 
                     va = 'center', 
                     color = 'red', 
                     bbox = bbox_args, 
                     fontsize = 25)
        if(n_gram == 3):
            axes[1].text(0.6 + p.get_width(), p.get_y() + 0.5 * p.get_height(), '{:1.0f}'.format(width), 
                     ha = 'center', 
                     va = 'center', 
                     color = 'red', 
                     bbox = bbox_args, 
                     fontsize = 18)
    if(n_gram == 1 or n_gram == 2):
        plt.subplots_adjust(wspace = 0.4)
    if(n_gram == 3):
        plt.subplots_adjust(wspace = 0.9)
        
def final_output_ngram_graphs(ngram):
    _, non_disaster_ngram_words, non_disaster_ngram_frequency, _, disaster_ngram_words, disaster_ngram_frequency = generate_ngram_dictionaries(ngram)
    create_ngram_graphs(non_disaster_ngram_words, non_disaster_ngram_frequency, disaster_ngram_words, disaster_ngram_frequency, ngram)

# Unigrams

In [None]:
final_output_ngram_graphs(1)

# Bigrams

In [None]:
final_output_ngram_graphs(2)

# Trigrams

In [None]:
final_output_ngram_graphs(3)

# Packed Bubble Chart
The packed bubbled chart, also known as a bubble chart, is a means to show relational value without regards to axes. The bubbles are packed in as tightly as possible to make efficient use of space.

In [None]:
def dictionaries_for_packed_bubble_chart(ngram):
    _, non_disaster_ngram_words_list, non_disaster_ngram_frequency_list, _, disaster_ngram_words_list, disaster_ngram_frequency_list = generate_ngram_dictionaries(ngram)
    packed_bubble_chart_dict = {
        'non_disaster_ngrams': non_disaster_ngram_words_list,
        'non_disaster_ngrams_frequency': non_disaster_ngram_frequency_list,
        'disaster_ngrams': disaster_ngram_words_list,
        'disaster_ngrams_frequency': disaster_ngram_frequency_list,
        'colors': ['#5A69AF', '#579E65', '#F9C784', '#FC944A', '#F24C00', '#00B825', '#FC944A', '#EF4026', '#F9C784', '#FC944A']
    }
    return packed_bubble_chart_dict

In [None]:
class BubbleChart:
    
    def __init__(self, area, bubble_spacing=0):

        area = np.asarray(area)
        r = np.sqrt(area / np.pi)

        self.bubble_spacing = bubble_spacing
        self.bubbles = np.ones((len(area), 4))
        self.bubbles[:, 2] = r
        self.bubbles[:, 3] = area
        self.maxstep = 2 * self.bubbles[:, 2].max() + self.bubble_spacing
        self.step_dist = self.maxstep / 2

        length = np.ceil(np.sqrt(len(self.bubbles)))
        grid = np.arange(length) * self.maxstep
        gx, gy = np.meshgrid(grid, grid)
        self.bubbles[:, 0] = gx.flatten()[:len(self.bubbles)]
        self.bubbles[:, 1] = gy.flatten()[:len(self.bubbles)]

        self.com = self.center_of_mass()

    def center_of_mass(self):
        return np.average(self.bubbles[:, :2], axis = 0, weights = self.bubbles[:, 3])

    def center_distance(self, bubble, bubbles):
        return np.hypot(bubble[0] - bubbles[:, 0], bubble[1] - bubbles[:, 1])

    def outline_distance(self, bubble, bubbles):
        center_distance = self.center_distance(bubble, bubbles)
        return center_distance - bubble[2] - bubbles[:, 2] - self.bubble_spacing

    def check_collisions(self, bubble, bubbles):
        distance = self.outline_distance(bubble, bubbles)
        return len(distance[distance < 0])

    def collides_with(self, bubble, bubbles):
        distance = self.outline_distance(bubble, bubbles)
        idx_min = np.argmin(distance)
        return idx_min if type(idx_min) == np.ndarray else [idx_min]

    def collapse(self, n_iterations=50):

        for _i in range(n_iterations):
            moves = 0
            for i in range(len(self.bubbles)):
                rest_bub = np.delete(self.bubbles, i, 0)

                dir_vec = self.com - self.bubbles[i, :2]

                dir_vec = dir_vec / np.sqrt(dir_vec.dot(dir_vec))

                new_point = self.bubbles[i, :2] + dir_vec * self.step_dist
                new_bubble = np.append(new_point, self.bubbles[i, 2:4])

                if not self.check_collisions(new_bubble, rest_bub):
                    self.bubbles[i, :] = new_bubble
                    self.com = self.center_of_mass()
                    moves += 1
                    
                else:
                    for colliding in self.collides_with(new_bubble, rest_bub):

                        dir_vec = rest_bub[colliding, :2] - self.bubbles[i, :2]
                        dir_vec = dir_vec / np.sqrt(dir_vec.dot(dir_vec))

                        orth = np.array([dir_vec[1], - dir_vec[0]])

                        new_point1 = (self.bubbles[i, :2] + orth * self.step_dist)
                        new_point2 = (self.bubbles[i, :2] - orth * self.step_dist)
                        
                        dist1 = self.center_distance(self.com, np.array([new_point1]))
                        dist2 = self.center_distance(self.com, np.array([new_point2]))
                        
                        new_point = new_point1 if dist1 < dist2 else new_point2
                        new_bubble = np.append(new_point, self.bubbles[i, 2:4])
                        
                        if not self.check_collisions(new_bubble, rest_bub):
                            self.bubbles[i, :] = new_bubble
                            self.com = self.center_of_mass()

            if moves / len(self.bubbles) < 0.1:
                self.step_dist = self.step_dist / 2

    def plot(self, ax, labels, colors):

        for i in range(len(self.bubbles)):
            circ = plt.Circle(self.bubbles[i, :2], self.bubbles[i, 2], color=colors[i])
            ax.add_patch(circ)
            ax.text(*self.bubbles[i, :2], labels[i], horizontalalignment = 'center', verticalalignment = 'center')


def final_packed_bubble_chart(ngram, displayer):
    
    packed_bubble_chart_dict = dictionaries_for_packed_bubble_chart(ngram)
    
    if(displayer == 'Non Disaster'):
        bubble_chart = BubbleChart(area = packed_bubble_chart_dict['non_disaster_ngrams_frequency'], bubble_spacing = 0.001)
        bubble_chart.collapse()
        fig, ax = plt.subplots(subplot_kw = dict(aspect = 'equal'), figsize = (30, 30))
        ax.set_facecolor('black')
        bubble_chart.plot(ax, packed_bubble_chart_dict['non_disaster_ngrams'], colors = packed_bubble_chart_dict['colors'])
        
    if(displayer == 'Disaster'):
        bubble_chart = BubbleChart(area = packed_bubble_chart_dict['disaster_ngrams_frequency'], bubble_spacing = 0.001)
        bubble_chart.collapse()
        fig, ax = plt.subplots(subplot_kw = dict(aspect = 'equal'), figsize = (30, 30))
        ax.set_facecolor('black')
        bubble_chart.plot(ax, packed_bubble_chart_dict['disaster_ngrams'], colors = packed_bubble_chart_dict['colors'])
        
    ax.set_title('Packed Bubble Chart for ' + displayer + ' Ngrams = ' + str(ngram), fontsize = 30)
    ax.relim()
    ax.autoscale_view()
    plt.xticks([])
    plt.yticks([])
    if(ngram == 1):
        plt.rcParams.update({'font.size': 30})
    if(ngram == 2):
        plt.rcParams.update({'font.size': 25})
    if(ngram == 3):
        plt.rcParams.update({'font.size': 18})
    plt.show()

In [None]:
final_packed_bubble_chart(1, 'Non Disaster')

In [None]:
final_packed_bubble_chart(2, 'Disaster')

In [None]:
final_packed_bubble_chart(3, 'Disaster')

# Wordcloud of Tweets
Word clouds (also known as text clouds or tag clouds) work in a simple way: the more a specific word appears in a source of textual data (such as a speech, blog post, or database), the bigger and bolder it appears in the word cloud. A word cloud is a collection, or cluster, of words depicted in different sizes. The bigger and bolder the word appears, the more often it’s mentioned within a given text and the more important it is.

In [None]:
wordcloud = WordCloud(width = 1400, height = 600, background_color = 'black').generate(''.join(text for text in df['text']))
plt.figure(figsize = (20, 10))
plt.title('Wordcloud Visualization of Tweets', fontsize = 30)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()

In [None]:
df

In [None]:
df = df.drop(columns = ['id', 'keyword', 'location'])

In [None]:
df

In [None]:
tokenizer = AutoTokenizer.from_pretrained('bert-large-uncased')
bert = TFBertModel.from_pretrained('bert-large-uncased')

In [None]:
X_train = tokenizer(
    text = df['text'].tolist(),
    add_special_tokens = True,
    max_length = 36, 
    truncation = True,
    padding = True, 
    return_tensors = 'tf',
    return_attention_mask = True,
    verbose = True)

X_test = tokenizer(
    text = test_df['text'].tolist(),
    add_special_tokens = True,
    max_length = 36, 
    truncation = True,
    padding = True, 
    return_tensors = 'tf',
    return_attention_mask = True,
    verbose = True)

In [None]:
print(X_train)
print(X_test)

In [None]:
print(X_train['input_ids'].shape)
print(X_train['attention_mask'].shape)

In [None]:
y_train = df['target'].values
y_train

# Creating the Model

In [None]:
input_ids = Input(shape=(36,), dtype=tf.int32, name = 'input_ids')
attention_mask = Input(shape=(36,), dtype=tf.int32, name = 'attention_mask')

embeddings = bert(input_ids = input_ids, attention_mask = attention_mask)[0]
layer = layers.Dropout(0.2)(embeddings)
layer = layers.Dense(1024, activation = 'relu')(layer)
layer = layers.Dense(32, activation = 'relu')(layer)
layer = layers.Flatten()(layer)
y = layers.Dense(1, activation = 'sigmoid')(layer)
    
model = keras.Model(inputs = [input_ids, attention_mask], outputs = y)

In [None]:
model.summary()

In [None]:
model.compile(
    optimizer = keras.optimizers.Adam(learning_rate = 6e-6, epsilon = 1e-8, decay = 0.01, clipnorm = 1.0),
    loss = BinaryCrossentropy(from_logits = True), 
    metrics = ['accuracy']
)

In [None]:
plot_model(model, show_shapes = True)

# Training the Model

In [None]:
classifier = model.fit(
    x = {'input_ids': X_train['input_ids'],
         'attention_mask': X_train['attention_mask']
        },
    y = y_train,
    validation_split = 0.05,
    epochs = 5,
    batch_size = 32
)

# Model Performance

In [None]:
def model_performance_graphs():
    
    fig, axes = plt.subplots(1, 2, figsize = (15, 8))

    axes[0].plot(classifier.epoch, classifier.history['accuracy'], label = 'acc')
    axes[0].plot(classifier.epoch, classifier.history['val_accuracy'], label = 'val_acc')
    axes[0].set_title('Accuracy vs Epochs', fontsize = 20)
    axes[0].set_xlabel('Epochs', fontsize = 15)
    axes[0].set_ylabel('Accuracy', fontsize = 15)
    axes[0].legend()

    axes[1].plot(classifier.epoch, classifier.history['loss'], label = 'loss')
    axes[1].plot(classifier.epoch, classifier.history['val_loss'], label="val_loss")
    axes[1].set_title("Loss Curve",fontsize=18)
    axes[1].set_xlabel("Epochs",fontsize=15)
    axes[1].set_ylabel("Loss",fontsize=15)
    axes[1].legend()

    plt.show()
    
model_performance_graphs()

# Making the Predictions

In [None]:
test_df = test_df[['id', 'text']]
pred = model.predict({'input_ids': X_test['input_ids'],
                      'attention_mask': X_test['attention_mask']})
print(pred)
pred = tf.squeeze(tf.round((pred)))
print(np.array(pred))

# Creating the Submission File

In [None]:
test_df['target'] = pred
test_df['target'] = test_df['target'].astype(int)
test_df = test_df[['id', 'target']]
test_df.to_csv('submission.csv', index = False)
test_df

# References
> - https://www.kaggle.com/code/gunesevitan/nlp-with-disaster-tweets-eda-cleaning-and-bert#2.-Meta-Features
> - https://www.kaggle.com/code/shahules/basic-eda-cleaning-and-glove
> - https://www.kaggle.com/code/sudalairajkumar/getting-started-with-text-preprocessing/notebook
> - https://www.kaggle.com/code/aishwarya2210/prediction-of-tweets-using-bert-model
> - https://www.kaggle.com/code/xhlulu/disaster-nlp-keras-bert-using-tfhub
> - https://www.kaggle.com/code/ratan123/start-from-here-disaster-tweets-eda-basic-model#4.-Exploring-location-column

<div class="alert alert-warning" role="alert">🚧 Work in Progress 🚧</div>