In [1]:
import pandas as pd 
import os

# function to load data
def load_data(data_path, file_name):
    csv_path = os.path.join(data_path, file_name)
    return pd.read_csv(csv_path, low_memory=False)

data = load_data("data", "COVID19_open_line_list.csv")
data.head()

Unnamed: 0,ID,age,sex,city,province,country,wuhan(0)_not_wuhan(1),latitude,longitude,geo_resolution,...,Unnamed: 35,Unnamed: 36,Unnamed: 37,Unnamed: 38,Unnamed: 39,Unnamed: 40,Unnamed: 41,Unnamed: 42,Unnamed: 43,Unnamed: 44
0,1.0,30,male,"Chaohu City, Hefei City",Anhui,China,1.0,31.64696,117.7166,admin3,...,,,,,,,,,,
1,2.0,47,male,"Baohe District, Hefei City",Anhui,China,1.0,31.77863,117.3319,admin3,...,,,,,,,,,,
2,3.0,49,male,"High-Tech Zone, Hefei City",Anhui,China,1.0,31.828313,117.224844,point,...,,,,,,,,,,
3,4.0,47,female,"High-Tech Zone, Hefei City",Anhui,China,1.0,31.828313,117.224844,point,...,,,,,,,,,,
4,5.0,50,female,"Feidong County, Hefei City",Anhui,China,1.0,32.00123,117.5681,admin3,...,,,,,,,,,,


In [52]:
data['country'].value_counts()

China                   10374
South Korea               938
Japan                     711
Italy                     587
Singapore                  90
Thailand                   31
Iran                       28
Kuwait                     26
Bahrain                    20
United Arab Emirates       20
Germany                    20
France                     19
Malaysia                   17
United States              15
Australia                  15
Vietnam                    15
Spain                      13
United Kingdom             12
Switzerland                10
Sweden                      9
Canada                      9
Iraq                        6
Oman                        6
Norway                      4
Israel                      4
Romania                     3
India                       3
Philippines                 3
Croatia                     3
Lebanon                     3
Finland                     2
Pakistan                    2
Russia                      2
Georgia   

In [2]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14126 entries, 0 to 14125
Data columns (total 45 columns):
ID                          13173 non-null float64
age                         1349 non-null object
sex                         1264 non-null object
city                        10194 non-null object
province                    12906 non-null object
country                     13148 non-null object
wuhan(0)_not_wuhan(1)       13170 non-null float64
latitude                    13147 non-null float64
longitude                   13147 non-null float64
geo_resolution              13147 non-null object
date_onset_symptoms         746 non-null object
date_admission_hospital     730 non-null object
date_confirmation           13089 non-null object
symptoms                    493 non-null object
lives_in_Wuhan              565 non-null object
travel_history_dates        503 non-null object
travel_history_location     758 non-null object
reported_market_exposure    35 non-null object
addi

In [20]:
data.drop(["Unnamed: 33", "Unnamed: 34", "Unnamed: 35", "Unnamed: 36", "Unnamed: 37",
          "Unnamed: 38","Unnamed: 39","Unnamed: 40","Unnamed: 41",
          "Unnamed: 42","Unnamed: 43","Unnamed: 44"],
            axis=1,
            inplace=True)

In [21]:
data.dropna(subset=['latitude', 'longitude', 'date_confirmation'], inplace=True)
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 13063 entries, 0 to 13152
Data columns (total 33 columns):
ID                          13063 non-null float64
age                         1316 non-null object
sex                         1223 non-null object
city                        10109 non-null object
province                    12805 non-null object
country                     13039 non-null object
wuhan(0)_not_wuhan(1)       13063 non-null float64
latitude                    13063 non-null float64
longitude                   13063 non-null float64
geo_resolution              13063 non-null object
date_onset_symptoms         727 non-null object
date_admission_hospital     724 non-null object
date_confirmation           13063 non-null object
symptoms                    471 non-null object
lives_in_Wuhan              562 non-null object
travel_history_dates        495 non-null object
travel_history_location     722 non-null object
reported_market_exposure    34 non-null object
addi

In [22]:
data['date_confirmation'].head(3)

0    22.01.2020
1    23.01.2020
2    23.01.2020
Name: date_confirmation, dtype: object

In [30]:
dates = []
for ii in range(len(data['date_confirmation'])):
    if '-' in data.iloc[ii]['date_confirmation']:
        dates.append(data.iloc[ii]['date_confirmation'].split('-')[0])
    else:
        dates.append(data.iloc[ii]['date_confirmation'])

In [36]:
converted = []
for ii in dates:
    # Convert the data type of column 'DOB' from string (DD/MM/YYYY) to datetime64
    converted.append(pd.to_datetime(ii).strftime('%m/%d/%Y'))

In [38]:
converted[0]

'01/22/2020'

In [53]:
features = [
    {
        'type': 'Feature',
        'geometry': {
            'type': 'Point',
            'coordinates': [data.iloc[i]['longitude'], data.iloc[i]['latitude']],
        },
        'properties': {
            'popup': "<h5>" + str(data.iloc[i]['additional_information']) + "</h5><br>",
            'time': converted[i],
            'icon': 'circle',
            'iconstyle': {
                'fillColor': 'crimson',
                'color': 'crimson',
                'fillOpacity': 0.5,
                'stroke': 'false',
                'fill':True,
                'radius': 2,
                'weight':0.5,
            },
            'style': {'weight': 0.5},
            'id': 'man'
        }
    } for i in range(0,len(data))
]

In [59]:
# import the library
import folium
import pandas as pd
from folium import plugins
from folium.plugins import FloatImage
 
# Make an empty map, cartodbdark_matter , stamenterrain
map_ = folium.Map(location=[100, 0],
            max_bounds=True,
            tiles="cartodbdark_matter", 
            zoom_start=1.5,
            max_zoom=6, 
            min_zoom=1)

folium.plugins.TimestampedGeoJson(
            {
                'type': 'FeatureCollection',
                'features': features
            },
            period='P1D',
            add_last_point=True,
            auto_play=False,
            loop=True,
            max_speed=5,
            min_speed=1,
            loop_button=True,
            date_options='MM/DD/YYYY',
            time_slider_drag_update=True,
            duration='P6D').add_to(map_)


map_.save("maps/time_lapse.html")
map_

In [None]:
import folium
import pandas as pd

# Make an empty map, cartodbdark_matter , stamenterrain
folium_map = folium.Map(location=[100, 0],
            max_bounds=True,
            tiles="cartodbdark_matter", 
            zoom_start=1.5,
            max_zoom=6, 
            min_zoom=1)

for i in range(0,len(data)):
    folium.Circle(
      location=[data.iloc[i]['latitude'], data.iloc[i]['longitude']],
      popup= "",
      radius= 2,
      color='crimson',
      fill=True,
      fillOpacit= 0.9,
      weight=0.5,
      fill_color='crimson'
   ).add_to(folium_map)

folium_map.save("maps/all_map.html")
folium_map