# Map data visualisation

This file will be used to visualize the geolocation in various ways 

Firstly we import the python packages that will be relevant for this project

### Imports 

In [1]:
import numpy as np
import pandas as pd
import re #for regular expression and data cleaning

from geopy.geocoders import Nominatim #for geocoding

import folium #for map visualization
from folium.plugins import HeatMap #for heat map visualization
from branca.colormap import LinearColormap #for heat map visualization

now we need to extract ther data

### Data extraction

In [None]:
data = pd.read_csv("./archive/translated_dataset2.csv",)
data.head()

Now we would need to clean the data. We would need to remove the URLs, hashtags, mentions, and emojis. We would also need to remove the punctuations and convert the text to lowercase. 

### Data cleaning

In [None]:
def clean_text(text):
    text = re.sub(r'@[A-Za-z0-9]+', '', text) # remove mentions
    text = re.sub(r'#', '', text) # remove hashtags
    text = re.sub(r'\s+', ' ', text) # remove extra whitespace
    return text

data['translated_content'] = data['translated_content'].apply(clean_text)

# Transform the date column to datetime format
data['date'] = pd.to_datetime(data['date'])
data['date'] = data['date'].dt.strftime('%Y-%m-%d %H:%M:%S') # stolen from main file




We now need to find where the tweets are and fill in the missing values. This can be done with natural language processing. We are trying to see what city or specific location the tweet is mentioned in it. 

### Location extraction

In [3]:

from tqdm import tqdm

data = pd.read_csv("./archive/only_english.csv")

# Create a new column with the distance from location after finding *km or *miles in text.
data['distance'] = data['content'].apply(lambda x: re.findall(r'\d+km|\d+miles', x))

# Extract the number from the distance column and convert it to float.
data['distance'] = data['distance'].apply(lambda x: float(re.findall(r'\d+', x[0])[0]) if len(x) > 0 else None)

# Create a geolocator object with a custom user_agent
geolocator = Nominatim(user_agent="my-custom-user-agent")

# Define a function to get the coordinates of a location
def get_coordinates(row):
    try:
        # Use geolocator to get the location's coordinates
        location = geolocator.geocode(row['city_mention'])
        return pd.Series({'latitude': location.latitude, 'longitude': location.longitude})
    except:
        return pd.Series({'latitude': None, 'longitude': None})

# Remove duplicate city names within the array
seen_cities = set()
data['city_mention'] = data['city_mention'].apply(lambda x: x if x not in seen_cities else None if None else seen_cities.add(x) or x)

# Apply the get_coordinates function to the city column to create a new coordinates column
tqdm.pandas(desc="Geocoding progress")
data[['latitude', 'longitude']] = data.progress_apply(lambda row: get_coordinates(row), axis=1)

# Print the amount of tweets with location and distance.
print(f"Amount of tweets with coordinates: {len(data[data['latitude'].notnull()])}")
print(f"Amount of tweets with distance: {len(data[data['distance'].notnull()])}")

# save the data to a new csv file called "full_data.csv"
data.to_csv("./archive/english_data_with_locations.csv", index=False)

Geocoding progress:   1%|          | 94/17942 [00:46<2:27:46,  2.01it/s]

Geocoding progress:   1%|          | 97/17942 [00:47<2:28:53,  2.00it/s]

Now that we have some tweets with locations, we can plot them on a map. We will use the folium package to do this.

### Map plotting

In [3]:
# Loads the file previously saved as "full_data.csv"
data = pd.read_csv("full_data.csv")

epicenter = [37.225, 37.021]

# Creates a map at the given location and zoom level
map = folium.Map(location=epicenter, zoom_start=6)




epicenter_dot = folium.Icon(color="red")


# create an icon object
epicenter_dot = folium.Icon(icon='glyphicon glyphicon-flash', prefix='glyphicon', color='red')

# create a marker with the icon
folium.Marker(location=epicenter, icon=epicenter_dot).add_to(map)


folium.Circle(location=epicenter, radius=1000000, color="red", fill=True, fill_color="red").add_to(map)
data = data.dropna(subset=['longitude', 'latitude'])
tweet_locations = data.apply(lambda row: [row['latitude'], row['longitude']], axis=1).tolist()
heatmap_layer = HeatMap(tweet_locations, radius=10)
heatmap_layer.add_to(map)





map





The map above contains a lot of tweets from everywhere in the world. We can zoom in to see the tweets in a specific area. 

### Zooming in

In [4]:
map = folium.Map(location=epicenter, zoom_start=6)

folium.Circle(location=epicenter, radius=1000000, color="red", fill=True, fill_color="red").add_to(map)

folium.Marker(location=epicenter, icon=epicenter_dot).add_to(map)

data= data.dropna(subset=['longitude', 'latitude'])
data = data[(data['latitude'] - epicenter[0])**2 + (data['longitude'] - epicenter[1])**2 <= (1000000/111319.9)**2]
data_locations = data.apply(lambda row: [row['latitude'], row['longitude']], axis=1).tolist()

heatmap_layer = HeatMap(data_locations, radius=10)
heatmap_layer.add_to(map)

map

Currently we have only displayed the information that was within the tweets. now with a little more analysis we can see where people asked for help, and where people offered help.

### Help offered and asked

In [15]:
filtered_data = data[data['classification'] == 'Need'].dropna(subset=['latitude', 'longitude'])
filtered_data = filtered_data[(data['latitude'] - epicenter[0])**2 + (filtered_data['longitude'] - epicenter[1])**2 <= (1000000/111319.9)**2]
# Create the map
map = folium.Map(location=epicenter, zoom_start=5)

# Add a circle around the epicenter
folium.Circle(location=epicenter, radius=1000000, color="red", fill=True, fill_color="red").add_to(map)

# Add a marker for the epicenter
folium.Marker(location=epicenter, icon=epicenter_dot).add_to(map)

# Add a marker for each row in the filtered dataframe
for index, row in filtered_data.iterrows():
    folium.Marker(location=[row['latitude'], row['longitude']]).add_to(map)

map

  filtered_data = filtered_data[(data['latitude'] - epicenter[0])**2 + (filtered_data['longitude'] - epicenter[1])**2 <= (1000000/111319.9)**2]


This is the visualization of where people have asked for help.
The next visualization is where people have offered help.

### Help offered

In [14]:
filtered_data = data[data['classification'] == 'Offer'].dropna(subset=['latitude', 'longitude'])
filtered_data = filtered_data[(data['latitude'] - epicenter[0])**2 + (filtered_data['longitude'] - epicenter[1])**2 <= (1000000/111319.9)**2]
# Create the map
map = folium.Map(location=epicenter, zoom_start=5)

# Add a circle around the epicenter
folium.Circle(location=epicenter, radius=1000000, color="red", fill=True, fill_color="red").add_to(map)

# Add a marker for the epicenter
folium.Marker(location=epicenter, icon=epicenter_dot).add_to(map)

# Add a marker for each row in the filtered dataframe
for index, row in filtered_data.iterrows():
    folium.Marker(location=[row['latitude'], row['longitude']]).add_to(map)

map

  filtered_data = filtered_data[(data['latitude'] - epicenter[0])**2 + (filtered_data['longitude'] - epicenter[1])**2 <= (1000000/111319.9)**2]
