## Omicron is spreading all over the world!

On 26 November 2021, WHO designated the variant B.1.1.529 a variant of concern, named Omicron, on the advice of WHO’s Technical Advisory Group on Virus Evolution (TAG-VE).  This decision was based on the evidence presented to the TAG-VE that Omicron has several mutations that may have an impact on how it behaves, for example, on how easily it spreads or the severity of illness it causes. Here is a summary of what is currently known.  <br/>
https://www.who.int/news/item/28-11-2021-update-on-omicron

# Variants of Concern synonyms
https://covariants.org/

* omicron: B.1.1.529
* alpha: B.1.1.7 
* beta: B.1.351 
* gammma: P.1 
* delta: B.1.617.2 
* kappa: B.1.617.1 
* epsilon: B.1.427, B.1.429
* eta: B.1.525 
* iota: B.1.526
* lambda: C.37
* mu: B.1.621

In [1]:
import sys
!{sys.executable} -m pip install bar_chart_race

Collecting bar_chart_race
  Downloading bar_chart_race-0.1.0-py3-none-any.whl (156 kB)
[K     |████████████████████████████████| 156 kB 287 kB/s 
Installing collected packages: bar-chart-race
Successfully installed bar-chart-race-0.1.0


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import datetime as datetime
from datetime import timedelta
from plotly.subplots import make_subplots
import bar_chart_race as bcr
from IPython.display import HTML
import warnings
warnings.filterwarnings("ignore")
from IPython.display import Video
from tqdm import tqdm

# Data preparation

In [3]:
data = pd.read_csv('../input/omicron-covid19-variant-daily-cases/covid-variants.csv')
data

Unnamed: 0,location,date,variant,num_sequences,perc_sequences,num_sequences_total
0,Angola,2020-07-06,Alpha,0,0.0,3
1,Angola,2020-07-06,B.1.1.277,0,0.0,3
2,Angola,2020-07-06,B.1.1.302,0,0.0,3
3,Angola,2020-07-06,B.1.1.519,0,0.0,3
4,Angola,2020-07-06,B.1.160,0,0.0,3
...,...,...,...,...,...,...
100411,Zimbabwe,2021-11-01,Omicron,0,0.0,6
100412,Zimbabwe,2021-11-01,S:677H.Robin1,0,0.0,6
100413,Zimbabwe,2021-11-01,S:677P.Pelican,0,0.0,6
100414,Zimbabwe,2021-11-01,others,0,0.0,6


### num_sequences : number of sequences of the variant
### perc_sequences : percentage of sequences of the variant
### num_sequences_total : total number of sequences examined

In [4]:
data.columns

Index(['location', 'date', 'variant', 'num_sequences', 'perc_sequences',
       'num_sequences_total'],
      dtype='object')

In [5]:
data2=data[['date','variant','location','perc_sequences']].copy()
data2

Unnamed: 0,date,variant,location,perc_sequences
0,2020-07-06,Alpha,Angola,0.0
1,2020-07-06,B.1.1.277,Angola,0.0
2,2020-07-06,B.1.1.302,Angola,0.0
3,2020-07-06,B.1.1.519,Angola,0.0
4,2020-07-06,B.1.160,Angola,0.0
...,...,...,...,...
100411,2021-11-01,Omicron,Zimbabwe,0.0
100412,2021-11-01,S:677H.Robin1,Zimbabwe,0.0
100413,2021-11-01,S:677P.Pelican,Zimbabwe,0.0
100414,2021-11-01,others,Zimbabwe,0.0


In [6]:
print(data2.variant.unique())
print(data2.location.unique())

['Alpha' 'B.1.1.277' 'B.1.1.302' 'B.1.1.519' 'B.1.160' 'B.1.177' 'B.1.221'
 'B.1.258' 'B.1.367' 'B.1.620' 'Beta' 'Delta' 'Epsilon' 'Eta' 'Gamma'
 'Iota' 'Kappa' 'Lambda' 'Mu' 'Omicron' 'S:677H.Robin1' 'S:677P.Pelican'
 'others' 'non_who']
['Angola' 'Argentina' 'Aruba' 'Australia' 'Austria' 'Bahrain' 'Bangladesh'
 'Belgium' 'Belize' 'Benin' 'Bosnia and Herzegovina' 'Botswana' 'Brazil'
 'Brunei' 'Bulgaria' 'Cambodia' 'Cameroon' 'Canada' 'Chile' 'Colombia'
 'Costa Rica' 'Croatia' 'Curacao' 'Cyprus' 'Czechia' 'Denmark' 'Djibouti'
 'Dominican Republic' 'Ecuador' 'Egypt' 'Estonia' 'Ethiopia' 'Fiji'
 'Finland' 'France' 'Gambia' 'Georgia' 'Germany' 'Ghana' 'Greece'
 'Guatemala' 'Hong Kong' 'Hungary' 'Iceland' 'India' 'Indonesia' 'Iran'
 'Iraq' 'Ireland' 'Israel' 'Italy' 'Jamaica' 'Japan' 'Jordan' 'Kazakhstan'
 'Kenya' 'Kosovo' 'Kuwait' 'Latvia' 'Lebanon' 'Liechtenstein' 'Lithuania'
 'Luxembourg' 'Madagascar' 'Malawi' 'Malaysia' 'Maldives' 'Malta'
 'Mauritius' 'Mexico' 'Moldova' 'Monaco' 'Mongo

In [7]:
data2["date"]=data2["date"].apply(pd.to_datetime, dayfirst=True)
data2=data2.fillna(0)

In [8]:
data3=data2[data2['date']>data2['date'].max()-datetime.timedelta(days=15)][data2['variant']=='Omicron'].groupby('location',as_index=False).max().sort_values('perc_sequences',ascending=False)
data3[0:40]

Unnamed: 0,location,date,variant,perc_sequences
63,Zambia,2021-12-27,Omicron,100.0
21,Iran,2021-12-27,Omicron,100.0
2,Bangladesh,2022-01-05,Omicron,100.0
59,Ukraine,2021-12-27,Omicron,100.0
42,Peru,2021-12-27,Omicron,100.0
41,Pakistan,2022-01-05,Omicron,100.0
40,Oman,2021-12-27,Omicron,100.0
38,Nigeria,2021-12-27,Omicron,100.0
30,Malta,2021-12-27,Omicron,100.0
27,Malawi,2021-12-27,Omicron,100.0


In [9]:
fig = px.bar(data3[0:40], x='location', y='perc_sequences',title="The latest ranknig of Omicron perc_sequences")
fig.show()

In [10]:
data4=data2[data2['variant']=='Omicron'][data2['date']>'2021-11-14'].sort_values('date',ascending=True)
countries=['United States','Brazil','India','Japan','South Africa','United Kingdom']

In [11]:
fig=make_subplots(specs=[[{"secondary_y":False}]])
fig.add_trace(go.Scatter(x=data4['date'][data4['location']==countries[0]],y=data4[data4['location']==countries[0]]['perc_sequences'],name=countries[0]),secondary_y=False,)
fig.add_trace(go.Scatter(x=data4['date'][data4['location']==countries[1]],y=data4[data4['location']==countries[1]]['perc_sequences'],name=countries[1]),secondary_y=False,)
fig.add_trace(go.Scatter(x=data4['date'][data4['location']==countries[2]],y=data4[data4['location']==countries[2]]['perc_sequences'],name=countries[2]),secondary_y=False,)
fig.add_trace(go.Scatter(x=data4['date'][data4['location']==countries[3]],y=data4[data4['location']==countries[3]]['perc_sequences'],name=countries[3]),secondary_y=False,)
fig.add_trace(go.Scatter(x=data4['date'][data4['location']==countries[4]],y=data4[data4['location']==countries[4]]['perc_sequences'],name=countries[4]),secondary_y=False,)
fig.add_trace(go.Scatter(x=data4['date'][data4['location']==countries[5]],y=data4[data4['location']==countries[5]]['perc_sequences'],name=countries[5]),secondary_y=False,)

fig.update_layout(autosize=False,width=700,height=500,title_text="Omicron perc_sequences by country")
fig.update_xaxes(title_text="Date")
fig.update_yaxes(title_text="perc_sequences",secondary_y=False)
fig.show()

In [12]:
data3us=data2[data2.location=='United States']
data3bz=data2[data2.location=='Brazil']
data3in=data2[data2.location=='India']
data3jp=data2[data2.location=='Japan']
data3sa=data2[data2.location=='South Africa']
data3uk=data2[data2.location=='United Kingdom']

# Dataframe for Bar Chart Race

In [13]:
# set double indexes and unstack!!!
data4us=data3us.set_index(['date','variant']).unstack()['perc_sequences']
data4bz=data3bz.set_index(['date','variant']).unstack()['perc_sequences']
data4in=data3in.set_index(['date','variant']).unstack()['perc_sequences']
data4jp=data3jp.set_index(['date','variant']).unstack()['perc_sequences']
data4sa=data3sa.set_index(['date','variant']).unstack()['perc_sequences']
data4uk=data3uk.set_index(['date','variant']).unstack()['perc_sequences']

# Create Bar Chart Race


In [14]:
dataset=[data4us,data4bz,data4in,data4jp,data4sa,data4uk]

In [15]:
def make_bcr(df,name):
    bcr.bar_chart_race(
        df=df,
        filename='BCR_CovidVariants_'+name+'.mp4',
        orientation='h',
        sort='desc',
        n_bars=5,
        fixed_order=False,
        fixed_max=False,
        steps_per_period=10,
        interpolate_period=False,
        label_bars=True,
        bar_size=.95,
        period_label={'x': .99, 'y': .25, 'ha': 'right', 'va': 'center'},
        #period_fmt='%B %d, %Y',
        period_summary_func=lambda v, r: {'x': .99, 'y': .18,'s': '',
                                          'ha': 'right', 'size': 8, 'family': 'Courier New'},
        #perpendicular_bar_func='median',
        period_length=1000,
        figsize=(5,3),
        dpi=500,
        cmap='dark24',
        title='Covid Variants in '+name,
        title_size=10,
        bar_label_size=7,
        tick_label_size=5,
        shared_fontdict={'color' : '.1'},
        scale='linear',
        writer=None,
        fig=None,
        bar_kwargs={'alpha': .7},
        filter_column_colors=True)

In [16]:
for i in tqdm(range(6)):
    make_bcr(dataset[i],countries[i])

100%|██████████| 6/6 [05:30<00:00, 55.02s/it]


In [17]:
Video("/kaggle/working/BCR_CovidVariants_"+countries[0]+".mp4", width=600, height=400, embed=True)

In [18]:
Video("/kaggle/working/BCR_CovidVariants_"+countries[1]+".mp4", width=600, height=400, embed=True)

In [19]:
Video("/kaggle/working/BCR_CovidVariants_"+countries[2]+".mp4", width=600, height=400, embed=True)

In [20]:
Video("/kaggle/working/BCR_CovidVariants_"+countries[3]+".mp4", width=600, height=400, embed=True)

In [21]:
Video("/kaggle/working/BCR_CovidVariants_"+countries[4]+".mp4", width=600, height=400, embed=True)

In [22]:
Video("/kaggle/working/BCR_CovidVariants_"+countries[5]+".mp4", width=600, height=400, embed=True)