In [2]:
import pandas as pd
import networkx as nx
from collections import OrderedDict

### COVID 19 INDIA CROWD-SOURCED DATBASE 
Source - covid19india.org
<br>
Their data sources - https://telegra.ph/Covid-19-Sources-03-19

In [3]:
data = pd.read_csv("COVID19_raw_data.csv")

In [4]:
data = data.fillna("-1")

In [5]:
data.shape

(23041, 20)

In [6]:
columns = [data.columns]

In [7]:
columns

[Index(['Patient Number', 'State Patient Number', 'Date Announced',
        'Estimated Onset Date', 'Age Bracket', 'Gender', 'Detected City',
        'Detected District', 'Detected State', 'State code', 'Current Status',
        'Notes', 'Contracted from which Patient (Suspected)', 'Nationality',
        'Type of transmission', 'Status Change Date', 'Source_1', 'Source_2',
        'Source_3', 'Backup Notes'],
       dtype='object')]

In [8]:
data_filtered = data.loc[(data['Contracted from which Patient (Suspected)']!="-1") ]

In [9]:
data_filtered.shape

(1534, 20)

In [10]:
import re
def extractor(x):
    if x[0]=='E' and x[1]=='0':
        return [1670]
    elif x[0]=='P':
        numbers = re.findall('\d+',x)
        numbers = [int(i) for i in numbers]
        if numbers!=None:
            return numbers
        else:
            return []
    else:
        return []
    

In [11]:
data_filtered['source_cluster_flag'] = data_filtered['Contracted from which Patient (Suspected)'].apply(lambda x:extractor(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [12]:
nodes = list(data_filtered['source_cluster_flag'].index)

In [13]:
import numpy as np

In [14]:
adjacency_list = {}

In [15]:
import matplotlib.pyplot as plt
from pyvis.network import Network

In [16]:
def build_graph(adjacency_list):
    g = nx.from_dict_of_lists(adjacency_list)
    g1 = Network(height='1000',width='1000',notebook=True)
    options = '''{
      "physics": {
        "forceAtlas2Based": {
          "gravitationalConstant": -547,
          "springLength": 275,
          "avoidOverlap": 0.98
        },
        "maxVelocity": 82,
        "minVelocity": 0.87,
        "solver": "forceAtlas2Based"
      }
    }'''
    g1.set_options(options)
    g1.from_nx(g)
    return g1

In [17]:
def assign_node_color(nodes,color_map,clr):
    adjacency_list ={}
    color_nodes = OrderedDict()
    for i in nodes:
        if clr==True:
            state = data_filtered['Detected State'][i]
            if state in color_map.keys():
                adjacency_list[i+1]=data_filtered['source_cluster_flag'][i]
                assign_color = color_map[state]
                color_nodes[i+1] = assign_color
        elif clr==False:
            adjacency_list[i+1]=data_filtered['source_cluster_flag'][i]      
    return color_nodes,adjacency_list

In [18]:
def graph_color(gl,color_nodes,label):
    for i in range(len(gl.nodes)):
        try:
            gl.nodes[i]['color']=color_nodes[int(gl.nodes[i]['title'])]
        except:
            gl.nodes[i]['color']="#ffffff"
    gl.prep_notebook()
    gl.show("./interactive_graphs/covid_19_"+str(label)+".html")
def just_show_graph(gl,label):
    gl.show("./interactive_graphs/covid_19_"+str(label)+".html")
    

##### ** Nodes represent 'Patient Number' in data, download covid_19.html for interactive graph. Here central cluster - travelled to Delhi cases.


### Naive Analysis - 1

<img src="./images/naive_analysis.jpeg">
We are looking such kind of patterns where recepient becomes source of infection for others as well.
for e.g p3 got infected from p1 and then p5 and p6 got infected from p3.
<b> Higher the number, longer and denser the chain of such patterns->higher is the chance that community transmission is happening<b> 

In [19]:
color_map = {}
_,adjacency_list_full = assign_node_color(nodes,color_map,False)
gr = build_graph(adjacency_list_full)
just_show_graph(gr,"FULL")

#### examples of case described above in data

<img src="./images/full.png">

### 2) Neighbhouring State Spread Analysis

#### 1) Delhi - UP

In [182]:
color_map_up_delhi = {'Uttar Pradesh':"#ff0000",'Delhi':"#0000ff"}
color_nodes_up_delhi,adjacency_list_up_delhi = assign_node_color(nodes,color_map_up_delhi,True)
gr = build_graph(adjacency_list_up_delhi)
graph_color(gr,color_nodes_up_delhi,"UP_DELHI")

<b> red = UP , blue = Delhi <b>

<img src="./images/up_delhi.png">

#### 2) Maharshtra - Madhya Pradesh

In [None]:
color_map_mah_madh = {'Maharashtra':"#ff0000",'Madhya Pradesh':"#0000ff"}
color_nodes_mah_madh,adjacency_list_mah_madh = assign_node_color(nodes,color_map_mah_madh,True)
gr = build_graph(adjacency_list_mah_madh)
graph_color(gr,color_nodes_mah_madh,"MAHA_MP")

<b> red = Maharashtra , blue = MP <b>

<img src="./images/maha_mp.png">

#### 3)Kerala - Tamil Nadu

In [188]:
color_map_kl_tam = {'Kerala':"#ff0000",'Tamil Nadu':"#0000ff"}
color_nodes_kl_tam,adjacency_list_kl_tam = assign_node_color(nodes,color_map_kl_tam,True)
gr = build_graph(adjacency_list_kl_tam)
graph_color(gr,color_nodes_kl_tam,"KL_TAMIL")

<b> red = Kerala , blue = Tamil Nadu <b>

<img src="./images/kl_tamil.png">

#### 3) TOP 8 state-wise spread

In [196]:
color_map_misc = {"Kerala":"#ff0000",                #red
            "Tamil Nadu" : "#0000ff",               #blue
            "Maharashtra" : "#09d712",              #bright green
            "Delhi" : "#503b3b",                    #brown
            "Uttar Pradesh":"#ff83fa",               #purple
            "Madhya Pradesh" : "#6a8000",            #brwonish yellow
            "Gujarat":"#53544c",                    #greyish balck"
            "Rajasthan":"#2cdede",                   #cyan
            "Karnataka":"#f2a3c0"}                   #pink

color_nodes_misc,adjacency_list_misc = assign_node_color(nodes,color_map_misc,True)
gr = build_graph(adjacency_list_misc)
graph_color(gr,color_nodes_misc,"TOP_8_STATES")

<img src="./images/TOP_8.png">