# Explore GISAID data

This notebook is used to update `FINE_COUNTRIES` in `pyrocov/mutrans.py`.

In [1]:
import pickle
import torch
from collections import Counter

In [2]:
with open("results/gisaid.columns.pkl", "rb") as f:
    columns = pickle.load(f)
print("loaded {} rows".format(len(columns["day"])))
print(list(columns.keys()))

loaded 1425131 rows
['lineage', 'virus_name', 'accession_id', 'collection_date', 'location', 'add_location', 'day']


In [3]:
counts = Counter()
for location in columns["location"]:
    parts = location.split("/")
    if len(parts) < 2:
        continue
    parts = tuple(p.strip() for p in parts[:3])
    counts[parts] += 1

In [4]:
fine_countries = set()
for parts, count in counts.items():
    if count >= 5000:
        fine_countries.add(parts[1])
fine_countries = list(sorted(fine_countries))
print("\n".join(fine_countries))

Australia
Canada
Denmark
France
Germany
Iceland
Italy
Japan
Luxembourg
Portugal
Spain
Sweden
USA
United Kingdom


In [9]:
from pyrocov.mutrans import get_fine_countries
print("\n".join(sorted(get_fine_countries(columns))))

Australia
Belgium
Brazil
Canada
Denmark
France
Germany
India
Italy
Lithuania
Netherlands
Norway
Poland
Slovenia
South Africa
Spain
Sweden
Switzerland
USA
United Kingdom


In [15]:
locations = Counter(columns["location"])
print("\n".join(f"{c}\t{p}" for p, c in locations.most_common() if "United Kingdom" in p))

316448	Europe / United Kingdom / England
36773	Europe / United Kingdom / Scotland
33397	Europe / United Kingdom / Wales
4746	Europe / United Kingdom / Northern Ireland
171	Europe / United Kingdom / England / South Yorkshire
109	Europe / United Kingdom / England / London
11	Europe / United Kingdom / England / Derbyshire
2	Europe / United Kingdom / England / Yorkshire / Sheffield
2	Europe / United Kingdom
1	Europe / United Kingdom / England / Northamtonshire
1	Europe / United Kingdom / England / Nottinghamhisre
1	Europe / United Kingdom / Wales / Cardiff
1	Europe / United Kingdom / England / Warwickshire
1	Europe / United Kingdom / London
1	Europe / United Kingdom / Scotland / Fraserburg
