https://www.kaggle.com/arashnic/data-on-covid19-variants-in-the-eueea-data  
Country: [String]  
country_code: 2-letter ISO country code [String]  
year_week: yyyy-Www  
Source: Data source, either GISAID EpiCoV database or TESSy. [String]  
new_cases: Weekly number of new confirmed cases. Set to zero in the event that countries have negative case counts due to retrospective correction of data. [Numeric]  
number_sequenced: Weekly number of sequences carried out [Numeric]  
percentcasessequenced 100 x newcases/numbersequenced. [Numeric]  
validdenominator: GISAID data: TRUE , TESSY data: FALSE if there are discrepancies in the data reported for a given week, such as where the sum of numberdetectionsvariant across all variants exceeds numbersequenced (aggregate data), or where no sequences have been reported that are coded as
‘wild type’ (case -based data). [Numeric]  
Variant: Each VOC, Other or UNK [Numeric]  
numberdetectionsvariant: Number of detections reported of the variant [Numeric]  
percentvariant : 100 x numberdetectionsvariant/ numbersequenced. Np value given if valid_denominator == FALSE [Numeric]

In [None]:
from datetime import datetime
from os import path
from pathlib import Path
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import scipy
#Functions
def replaceDate(inputString):
    if len(inputString[0])<10:
        repString=inputString.replace("-"," ")+" 0"
        repString=datetime.strptime(repString,"%Y %W %w")
        return repString
    else:
        repString=inputString.replace("-"," ")
        repString=datetime.strptime(repString,"%Y %m %d")
        return repString

def createGraph(input_df):
    sel_B117_df=input_df.loc[input_df["variant"]=="B.1.1.7"];
    sel_B351_df=input_df.loc[input_df["variant"]=="B.1.351"];
    sel_OTHR_df=input_df.loc[input_df["variant"]=="Other"];
    sel_P1_df=input_df.loc[input_df["variant"]=="P.1"];
    fig,ax=plt.subplots()
    titleString="Percentage of Variant Observed over Time in "+input_df.iloc[0,0]
    saveString=input_df.iloc[0,0]+"_Percent_Variance.jpg"
    folderString=Path("graphs/")
    completeSaveString=folderString/saveString
    ax.set_title(titleString)
    ax.set_xlabel("Date")
    ax.set_ylabel("Percentage of Variant")
    ax.minorticks_on()
    ax.set_axisbelow(True)
    plt.grid(which='major',color='blue',linestyle='-',linewidth='1')
    plt.grid(which='minor',color='black',linestyle=':',linewidth='0.5')
    plt.xticks(rotation=45)
    dates=matplotlib.dates.date2num(sel_B117_df["year_week"])
    plt.plot_date(dates,sel_B117_df["percent_variant"],label="B.1.1.7")
    plt.plot_date(dates,sel_B351_df["percent_variant"],label="B.1.351")
    plt.plot_date(dates,sel_OTHR_df["percent_variant"],label="Other")
    plt.plot_date(dates,sel_P1_df["percent_variant"],label="P.1")
    ax.legend()
    plt.ioff()
    plt.savefig(completeSaveString)
    plt.close()
    return

In [2]:
#import csv and sort by variant+week, remove TESSy data
if not path.exists("filtered_df.csv"):
    v_df=pd.read_csv("variants.csv",header=0);
    v_df=v_df.drop(columns=["country_code","valid_denominator"]);
    v_df=v_df.sort_values(by=['year_week','variant']);
    v_df=v_df.loc[v_df["source"]=="GISAID"];
    v_df=v_df.drop(columns=["source"]);
    v_df.to_csv('filtered_df.csv',index=False);
else:
    v_df=pd.read_csv("filtered_df.csv",header=0)
v_df["year_week"]=v_df["year_week"].apply(replaceDate)

In [3]:
#select all countries and graph each variant for each country
for x in v_df.country.unique():
    selected_df=v_df.loc[v_df["country"]==x];
    createGraph(selected_df)