# Set up

In [113]:
# INSTALL PACKAGES
# Run this cell

! pip install plotly
! pip install geopandas



In [114]:
# IMPORT LIBRARIES
# Run this cell

import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import numpy as np
import statistics
import seaborn as sns; sns.set(style="ticks", color_codes=True)
import geopandas as gpd
from geopandas import GeoDataFrame
import json

In [115]:
# INPUT MAPBOX ACCESS TOKEN
# Update cell with Mapbox credentials

# Option 1: Import credentials from separate file
info = json.loads(open("mapbox_credentials.json").read())
px.set_mapbox_access_token(info["access_token"])

# Option 2: Directly copy in credentials
# px.set_mapbox_access_token("ACCESS TOKEN HERE")

# Import data

In [142]:
# IMPORT DATA
# Features: Entity Name, Entity Type, Year, Month, Day, Latitude, Longitude, Country, City, News Source, Frequency

df = pd.read_csv("geoparse_clean.csv")
df.head()

Unnamed: 0,Entity,Frequency,Entity Type,Year,Month,Day,Latitude,Longitude,Country,City,News Source
0,Ukraine,129,GPE,2022,3,1,49.487197,31.271832,Ukraine,,aljazeera
1,Russia,117,GPE,2022,3,1,64.686314,97.745306,Russia,,aljazeera
2,United States,25,GPE,2022,3,1,39.78373,-100.445882,United States,,aljazeera
3,Moscow,23,GPE,2022,3,1,55.750446,37.617494,Russia,Moscow,aljazeera
4,Poland,8,GPE,2022,3,1,52.215933,19.134422,Poland,,aljazeera


# Select data

In [143]:
# SELECT DATA FOR WORLD MAP
# Large entities only, i.e. countries

# Aggregate countries by news source, get lat & long
country_loc_df = pd.pivot_table(df, index=["News Source","Day","Country"], values = ["Latitude","Longitude"])
# Aggregate countries by news source, get frequency count
country_count_df = pd.pivot_table(df, index=["News Source","Day","Country"], values = ["Frequency"], aggfunc="count")
# Aggregate bodies of water by news source, get lat & long
water_loc_df = pd.pivot_table(df[df["Country"].isnull()], index=["News Source","Day","Entity"], values = ["Latitude","Longitude"])
# Aggregate bodies of water by news source, get frequency count
water_count_df = pd.pivot_table(df[df["Country"].isnull()], index=["News Source","Day","Entity"], values = ["Latitude"], aggfunc="count")
water_count_df = water_count_df[water_count_df["Latitude"] != 0]

world_df = pd.DataFrame({"Entity": [i[2] for i in country_loc_df.index] + [i[2] for i in water_loc_df.index],
            "Frequency": country_count_df["Frequency"].tolist() + water_count_df["Latitude"].tolist(),
            "Day": [i[1] for i in country_loc_df.index] + [i[1] for i in water_loc_df.index],
            "Latitude": country_loc_df["Latitude"].tolist() + water_loc_df["Latitude"].tolist(),
            "Longitude": country_loc_df["Longitude"].tolist() + water_loc_df["Longitude"].tolist(),
            "News Source": [i[0] for i in country_loc_df.index] + [i[0] for i in water_loc_df.index]})

# Add row for Fox on 3/1 and Aljazeera on 3/31 without lat/long as placeholder for Mapbox key
world_df = world_df.append({'Day': 1, 'Entity': 'Ukraine', 'News Source': 'fox', 'Frequency': 0},ignore_index=True)
world_df = world_df.append({'Day': 31, 'Entity': 'Ukraine', 'News Source': 'aljazeera', 'Frequency': 0},ignore_index=True)

world_df.head()

Unnamed: 0,Entity,Frequency,Day,Latitude,Longitude,News Source
0,Belarus,1,1,53.425061,27.697136,aljazeera
1,France,1,1,46.106887,-0.751987,aljazeera
2,Georgia,1,1,42.3154,43.3569,aljazeera
3,Germany,1,1,51.08342,10.423447,aljazeera
4,Poland,1,1,52.215933,19.134422,aljazeera


In [144]:
# SELECT DATA FOR UKRAINE MAP
# Small entities within UKR, i.e. cities

ukr_df = df[df["Country"] == "Ukraine"]
ukr_df = ukr_df[ukr_df["Entity"] != "Ukraine"]
# Add row for Fox on 3/1 and Aljazeera on 3/31 without lat/long as placeholder for Mapbox key
ukr_df = ukr_df.append({'Day': 1, 'Country': 'Ukraine', 'News Source': 'fox', 'Frequency': 0},ignore_index=True)
ukr_df = ukr_df.append({'Day': 31, 'Country': 'Ukraine', 'News Source': 'aljazeera', 'Frequency': 0},ignore_index=True)
ukr_df.head()

Unnamed: 0,Entity,Frequency,Entity Type,Year,Month,Day,Latitude,Longitude,Country,City,News Source
0,Kharkiv,5,GPE,2022.0,3.0,1,49.992318,36.231015,Ukraine,Kharkiv,aljazeera
1,Kyiv,4,GPE,2022.0,3.0,1,50.450034,30.524136,Ukraine,Kyiv,aljazeera
2,Kherson,3,GPE,2022.0,3.0,1,46.542172,33.407933,Ukraine,,aljazeera
3,Mariupol,3,GPE,2022.0,3.0,1,47.095765,37.549962,Ukraine,Mariupol,aljazeera
4,Kyiv,8,LOC,2022.0,3.0,1,50.450034,30.524136,Ukraine,Kyiv,aljazeera


# Dynamic bubble map

In [145]:
# DYNAMIC BUBBLE MAP

def animate_map(df,time_col,zoom):
    fig = px.scatter_mapbox(df,
              size="Frequency",
              lat="Latitude" ,
              lon="Longitude",
              hover_name="Entity",
              color="News Source",
              animation_frame=time_col,
              mapbox_style="carto-positron",
              category_orders={
              time_col:list(np.sort(df[time_col].unique()))
              },                  
              zoom=zoom)
    fig.show();

# Generate maps

## Raw data

In [139]:
# Raw data
raw_df = df
raw_df = raw_df.append({'Day': 1, 'Country': 'Ukraine', 'News Source': 'fox', 'Frequency': 0},ignore_index=True)
raw_df = raw_df.append({'Day': 31, 'Country': 'Ukraine', 'News Source': 'aljazeera', 'Frequency': 0},ignore_index=True)
animate_map(raw_df,"Day",1)

## World map

In [146]:
# World map, aggregated by country
animate_map(world_df,"Day",1)

## Ukraine map

In [147]:
# Ukraine map, cities
animate_map(ukr_df,"Day",4)