In [None]:
import math
from datetime import datetime, timedelta

import numpy as np
import pandas as pd
import networkx as nx
import altair as alt
from datetime import timedelta
import matplotlib.pyplot as plt

In [None]:
data = pd.read_csv('data_2020-03-06.csv', parse_dates=['確定日'])
# Remove 3 cases with 'under investigation' instead of date
data = data[data['確定日']!='調査中']
# parse date
data['date'] = data['確定日'].apply(lambda x: datetime.strptime(f'2020/{x}', '%Y/%m/%d'))

In [None]:
# regions translations
locations_jp = ['北海道', '愛知県', '東京都', '神奈川県', '千葉県', '和歌山県', '中国（武漢市）', '大阪府',
       '熊本県', '福岡県', '石川県', '沖縄県', '高知県', '京都府', '調査中', '兵庫県', '中国（湖北省）',
       '和歌山', '中国（湖南省）', '岐阜県', '埼玉県', '奈良県', '宮城県', '静岡県', '新潟県', '栃木県',
       '滋賀県', '神奈川', '長野県', '三重県', '大阪市', '大分県', '山口県', '愛媛県', '宮崎県']
locations = ['Hokkaido', 'Aichi', 'Tokyo', 'Kanagawa', 'Chiba', 'Wakayama', 'China (Wuhan)', 'Osaka',
       'Kumamoto', 'Fukuoka', 'Ishikawa', 'Okinawa', 'Kochi', 'Kyoto', 'Under investigation', 'Hyogo', 'China (Hubei)',
       'Wakayama', 'China (Hunan)', 'Gifu', 'Saitama', 'Nara', 'Miyagi', 'Shizuoka', 'Niigata', 'Tochigi',
       'Shiga Prefecture', 'Kanagawa', 'Nagano Prefecture', 'Mie Prefecture', 'Osaka', 'Oita Prefecture', 'Yamaguchi', 'Ehime', 'Miyazaki']
translations = dict(zip(locations_jp, locations))
data['Place of residence'] = data['居住地'].apply(lambda x: translations[x])

In [None]:
data['contacts'] = data['周囲の患者の発生※'].map(lambda x: x.replace('なし', '').replace('調査中', '').replace('No.', '').split('、'))
data.head()

## Build Graph

In [None]:
G = nx.Graph()
for row in data.iterrows():
    person = int(row[1]['旧No.'])
    contacts = row[1]['contacts']
    date = row[1]['date'].date().isoformat()
    G.add_node(person)
    G.add_node(date)
    loc = row[1]['Place of residence']
    G.add_node(loc)
    #print(loc)
    G.add_edge(person, loc, weight=0.1)
    G.add_edge(person, date, weight=1)
    for c in contacts:
        if c != '':
            try:
                G.add_edge(person, int(c), weight=0.4)
            except:
                continue
            

In [None]:
dates = data.sort_values('date').date.dt.date.unique()
Gdates = nx.Graph()
dates_pos = {}
for date in dates:
    dt = (date- datetime.now().date()).days/10.
    #print()
    dates_pos[date.isoformat()] = (dt, 0)
for i, date in enumerate(dates[1:]):
    #print(date, dates[i])
    G.add_edge(date.isoformat(), dates[i].isoformat())
    Gdates.add_edge(date.isoformat(), dates[i].isoformat())
fixed=dates_pos.keys()

## Generate layout.
We are using fixed nodes for dates, in order to generate a layout that maps to time

In [None]:
layout = nx.spring_layout(G, pos=dates_pos, fixed=fixed, k=1/math.sqrt(len(G)*0.9), iterations=100)
X = []
Y = []
for key in layout:
    X.append(layout[key][0])
    Y.append(layout[key][1])

In [None]:
layout_df = pd.DataFrame(index=layout.keys(), data={'X': X, 'Y': Y})

In [None]:
data_l = data.join(layout_df, on='旧No.')
data_l['Place of residence'] = data_l['Place of residence'].apply(lambda x: x.split(' (')[0])
data_l.head()

In [None]:
data_l['location_id'] = data_l['Place of residence'].astype('category').cat.rename_categories(range(data_l['Place of residence'].nunique()))

In [None]:
cm = plt.cm.tab20
colors = cm(data_l.location_id)

In [None]:
data_l['Place of residence'].unique()

color_scale = alt.Scale(domain=[ 'China','Under investigation', 'Kanagawa','Chiba', 'Tokyo','Saitama', 'Nara', 'Osaka', 'Mie Prefecture', 'Kyoto',
        'Wakayama', 'Okinawa', 'Aichi', 'Hokkaido',
       , 'Fukuoka', 'Ishikawa',
       'Kumamoto', 'Tochigi', 'Nagano Prefecture', 'Gifu',
       'Shiga Prefecture', 'Shizuoka', 'Kochi', 'Niigata', 'Miyagi',
       'Hyogo'],
                       range=[
                           
                       ])

In [None]:
color_scale=alt.Scale(domain=[
    'Hokkaido',
    'Miyagi',
    'Tochigi','Saitama','Chiba', 'Tokyo', 'Kanagawa',
    'Niigata', 'Nagano Prefecture','Shizuoka',
    'Ishikawa', 'Gifu','Aichi', 
    'Shiga Prefecture', 'Mie Prefecture', 'Nara','Wakayama',
    'Kyoto','Osaka','Hyogo',
    'Kochi',
    'Fukuoka','Oita Prefecture', 'Kumamoto',
    'Okinawa','China','Under investigation' ], scheme='rainbow')

In [None]:
edges_df = pd.DataFrame(G.edges).reset_index()
edges_df.columns=['edge_id', 'from', 'to']
edges_df.tail()

In [None]:
from_edges= edges_df.join(layout_df, on='from').join(data_l.set_index('旧No.')['date'], on='from')
to_edges = edges_df.join(layout_df, on='to').join(data_l.set_index('旧No.')['date'], on='to')
from_edges.head(2)

In [None]:
all_edges = pd.concat([from_edges, to_edges], axis=0)
all_edges.head()

In [None]:
valid_edges = all_edges[all_edges['from'].isin(data['旧No.']) & all_edges['to'].isin(data['旧No.'])].copy().sort_values('edge_id')
valid_edges.sort_values('edge_id').head()

In [None]:
X_axis=alt.Axis(labels=True, domain=False, ticks=False, grid=False, title='date')
Y_axis =alt.Axis(labels=False, domain=False, ticks=False, grid=False, title=None)
nodes_chart = alt.Chart(data_l).mark_point(size=100).encode(
    x=alt.X('monthdate(date)', axis=X_axis),
    y=alt.Y('Y:Q',axis=Y_axis),
    shape=alt.Shape('Place of residence', scale=alt.Scale(domain=color_scale.domain)),
    color=alt.Color('Place of residence', scale=alt.Scale(domain=color_scale.domain, scheme='plasma')),
    tooltip=['Place of residence', 'date', '年代', '性別']
).interactive()
#nodes_chart.resolve_scale(color='independent', shape='independent')

In [None]:
edges_chart = alt.Chart(valid_edges).mark_line(color='gray').encode(
    x=alt.X('monthdate(date)',axis=X_axis),
    y=alt.Y('Y:Q',axis=Y_axis),
    detail='edge_id',
)
#edges_chart

In [None]:
viz = alt.layer(edges_chart, nodes_chart, height=500, width=800)#.interactive()
viz.title='Japan coronavirus cases and their connections, by detection date and prefecture.'
viz

In [None]:
viz.save('japan_coronavirus_network.html')