Processed 29 December 2021. Data are [here](https://www.covid19.admin.ch/en/epidemiologic/virus-variants?variants=VariantB11529,VariantB16172,VariantP1,VariantB1351,VariantB117,VariantC37,VariantB16171,VariantP2,VariantB1525,VariantB1526,VariantB11318).  Documentation is [here](https://www.covid19.admin.ch/api/data/documentation). Code is [here](https://github.com/jean-luc-jucker/variants_20211229).

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import os

# The rise and fall of Covid Variants
---

In [2]:
raw = pd.read_csv(os.path.join('bag_covid_19_data_csv_29_December_2021/data', 'COVID19Variants_wgs.csv'))
raw.date = pd.to_datetime(raw.date, infer_datetime_format=True)
data = raw[['geoRegion', 'variant_type', 'date', 'prct_mean7d', 'version']]
print(data.info())
data

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 125375 entries, 0 to 125374
Data columns (total 5 columns):
 #   Column        Non-Null Count   Dtype         
---  ------        --------------   -----         
 0   geoRegion     125375 non-null  object        
 1   variant_type  125375 non-null  object        
 2   date          125375 non-null  datetime64[ns]
 3   prct_mean7d   5486 non-null    float64       
 4   version       125375 non-null  object        
dtypes: datetime64[ns](1), float64(1), object(3)
memory usage: 4.8+ MB
None


Unnamed: 0,geoRegion,variant_type,date,prct_mean7d,version
0,CHFL,B.1.1.529,2020-09-28,,2021-12-29_09-48-32
1,CHFL,B.1.1.7,2020-09-28,,2021-12-29_09-48-32
2,CHFL,B.1.1.7 & E484K,2020-09-28,,2021-12-29_09-48-32
3,CHFL,B.1.351,2020-09-28,,2021-12-29_09-48-32
4,CHFL,B.1.525,2020-09-28,,2021-12-29_09-48-32
...,...,...,...,...,...
125370,CHFL,C.37,2021-12-20,,2021-12-29_10-04-36
125371,CHFL,other_lineages,2021-12-20,,2021-12-29_10-04-36
125372,CHFL,P.1,2021-12-20,,2021-12-29_10-04-36
125373,CHFL,P.2,2021-12-20,,2021-12-29_10-04-36


In [3]:
# Subset
df = data.loc[(data['geoRegion']=='CHFL') &
              (data['version']=='2021-12-29_10-04-36') &
              (data['date']>'2020-11-30')
              , :][['date', 'variant_type', 'prct_mean7d']]
df

Unnamed: 0,date,variant_type,prct_mean7d
120370,2020-12-01,B.1.1.318,0.0
120371,2020-12-01,B.1.1.529,0.0
120372,2020-12-01,B.1.1.7,0.1
120373,2020-12-01,B.1.351,0.0
120374,2020-12-01,B.1.525,0.0
...,...,...,...
125370,2021-12-20,C.37,
125371,2021-12-20,other_lineages,
125372,2021-12-20,P.1,
125373,2021-12-20,P.2,


In [4]:
# pivot for use in datawrapper

In [5]:
df=df.pivot(index='date', columns='variant_type')
df

Unnamed: 0_level_0,prct_mean7d,prct_mean7d,prct_mean7d,prct_mean7d,prct_mean7d,prct_mean7d,prct_mean7d,prct_mean7d,prct_mean7d,prct_mean7d,prct_mean7d,prct_mean7d,prct_mean7d
variant_type,B.1.1.318,B.1.1.529,B.1.1.7,B.1.351,B.1.525,B.1.526,B.1.617.1,B.1.617.2,C.37,P.1,P.2,all_sequenced,other_lineages
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
2020-12-01,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,99.9
2020-12-02,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,99.9
2020-12-03,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,99.9
2020-12-04,0.1,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,99.9
2020-12-05,0.1,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,99.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-12-16,0.0,46.3,0.0,0.0,0.0,0.0,0.0,53.3,0.0,0.0,0.0,100.0,0.5
2021-12-17,0.0,57.8,0.0,0.0,0.0,0.0,0.0,41.8,0.0,0.0,0.0,100.0,0.4
2021-12-18,,,,,,,,,,,,,
2021-12-19,,,,,,,,,,,,,


In [6]:
df.to_csv('clean_subset.csv', index_label='date')