## Monkey Pox

In [27]:
## Import libraries

import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.io as pio
%matplotlib inline
import os


### Import Data from git

In [28]:
df_import = pd.read_csv('https://raw.githubusercontent.com/globaldothealth/monkeypox/main/latest.csv')
# df_import = pd.read_csv('monkeypox_data.csv')
df_import

Unnamed: 0,ID,Status,Location,City,Country,Age,Gender,Date_onset,Date_confirmation,Symptoms,...,Travel_history_location,Travel_history_country,Genomics_Metadata,Confirmation_method,Source,Source_II,Date_entry,Date_last_modified,Source_III,Country_ISO3
0,1,confirmed,Guy's and St Thomas Hospital London,London,England,,,2022-04-29,2022-05-06,rash,...,Lagos and Delta States,Nigeria,West African Clade,RT-PCR,https://www.gov.uk/government/news/monkeypox-c...,https://www.who.int/emergencies/disease-outbre...,2022-05-18,2022-05-18,,GBR
1,2,confirmed,Guy's and St Thomas Hospital London,London,England,,,2022-05-05,2022-05-12,rash,...,,,West African Clade,RT-PCR,https://www.gov.uk/government/news/monkeypox-c...,,2022-05-18,2022-05-18,,GBR
2,3,confirmed,London,London,England,,,2022-04-30,2022-05-13,vesicular rash,...,,,West African Clade,RT-PCR,https://www.gov.uk/government/news/monkeypox-c...,,2022-05-18,2022-05-18,,GBR
3,4,confirmed,London,London,England,,male,,2022-05-15,vesicular rash,...,,,West African Clade,,https://www.gov.uk/government/news/monkeypox-c...,,2022-05-18,2022-05-18,,GBR
4,5,confirmed,London,London,England,,male,,2022-05-15,vesicular rash,...,,,West African Clade,,https://www.gov.uk/government/news/monkeypox-c...,,2022-05-18,2022-05-18,,GBR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1135,1136,discarded,Istanbul,,Turkey,,,,,,...,,,,,https://www.hurriyetdailynews.com/tests-negati...,,2022-06-05,2022-06-05,,TUR
1136,1137,discarded,Istanbul,,Turkey,,,,,,...,,,,,https://www.hurriyetdailynews.com/tests-negati...,,2022-06-05,2022-06-05,,TUR
1137,1138,discarded,Istanbul,,Turkey,,,,,,...,,,,,https://www.hurriyetdailynews.com/tests-negati...,,2022-06-05,2022-06-05,,TUR
1138,1139,discarded,Istanbul,,Turkey,,,,,,...,,,,,https://www.hurriyetdailynews.com/tests-negati...,,2022-06-05,2022-06-05,,TUR


### Data Cleaning

In [29]:
df_import.columns

Index(['ID', 'Status', 'Location', 'City', 'Country', 'Age', 'Gender',
       'Date_onset', 'Date_confirmation', 'Symptoms', 'Hospitalised (Y/N/NA)',
       'Date_hospitalisation', 'Isolated (Y/N/NA)', 'Date_isolation',
       'Outcome', 'Contact_comment', 'Contact_ID', 'Contact_location',
       'Travel_history (Y/N/NA)', 'Travel_history_entry',
       'Travel_history_start', 'Travel_history_location',
       'Travel_history_country', 'Genomics_Metadata', 'Confirmation_method',
       'Source', 'Source_II', 'Date_entry', 'Date_last_modified', 'Source_III',
       'Country_ISO3'],
      dtype='object')

In [30]:
df = df_import[['ID','Date_confirmation', 'Location', 'City', 'Country','Status', 'Symptoms', 'Country_ISO3']]
df

Unnamed: 0,ID,Date_confirmation,Location,City,Country,Status,Symptoms,Country_ISO3
0,1,2022-05-06,Guy's and St Thomas Hospital London,London,England,confirmed,rash,GBR
1,2,2022-05-12,Guy's and St Thomas Hospital London,London,England,confirmed,rash,GBR
2,3,2022-05-13,London,London,England,confirmed,vesicular rash,GBR
3,4,2022-05-15,London,London,England,confirmed,vesicular rash,GBR
4,5,2022-05-15,London,London,England,confirmed,vesicular rash,GBR
...,...,...,...,...,...,...,...,...
1135,1136,,Istanbul,,Turkey,discarded,,TUR
1136,1137,,Istanbul,,Turkey,discarded,,TUR
1137,1138,,Istanbul,,Turkey,discarded,,TUR
1138,1139,,Istanbul,,Turkey,discarded,,TUR


In [31]:
# I want only value which is confirmed. In this dataset, only the confirmed case has date. So, I just drop na in the Date_confirmation column.
# only_con_df = df[df["Date_confirmation"].notna()] # old doesnt work anymore
only_con_df = df[df["Status"] == "confirmed"]

In [32]:
# remove SettingWithCopyWarning: from showing
pd.options.mode.chained_assignment = None

In [34]:
only_con_df['case'] = only_con_df.loc[:, 'Status']
only_con_df['case'] = only_con_df['case'].replace('confirmed', '1').astype('int')
only_con_df

Unnamed: 0,ID,Date_confirmation,Location,City,Country,Status,Symptoms,Country_ISO3,case
0,1,2022-05-06,Guy's and St Thomas Hospital London,London,England,confirmed,rash,GBR,1
1,2,2022-05-12,Guy's and St Thomas Hospital London,London,England,confirmed,rash,GBR,1
2,3,2022-05-13,London,London,England,confirmed,vesicular rash,GBR,1
3,4,2022-05-15,London,London,England,confirmed,vesicular rash,GBR,1
4,5,2022-05-15,London,London,England,confirmed,vesicular rash,GBR,1
...,...,...,...,...,...,...,...,...,...
1120,1121,2022-06-02,Quebec,,Canada,confirmed,,CAN,1
1121,1122,2022-06-02,Quebec,,Canada,confirmed,,CAN,1
1122,1123,2022-06-02,Quebec,,Canada,confirmed,,CAN,1
1123,1124,2022-06-04,New South Wales,Sydney,Australia,confirmed,,AUS,1


In [35]:
only_con_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 921 entries, 0 to 1139
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   ID                 921 non-null    int64 
 1   Date_confirmation  920 non-null    object
 2   Location           377 non-null    object
 3   City               296 non-null    object
 4   Country            921 non-null    object
 5   Status             921 non-null    object
 6   Symptoms           94 non-null     object
 7   Country_ISO3       921 non-null    object
 8   case               921 non-null    int32 
dtypes: int32(1), int64(1), object(7)
memory usage: 68.4+ KB


In [36]:
only_con_df['Date_confirmation'] = pd.to_datetime(only_con_df['Date_confirmation'])
only_con_df

Unnamed: 0,ID,Date_confirmation,Location,City,Country,Status,Symptoms,Country_ISO3,case
0,1,2022-05-06,Guy's and St Thomas Hospital London,London,England,confirmed,rash,GBR,1
1,2,2022-05-12,Guy's and St Thomas Hospital London,London,England,confirmed,rash,GBR,1
2,3,2022-05-13,London,London,England,confirmed,vesicular rash,GBR,1
3,4,2022-05-15,London,London,England,confirmed,vesicular rash,GBR,1
4,5,2022-05-15,London,London,England,confirmed,vesicular rash,GBR,1
...,...,...,...,...,...,...,...,...,...
1120,1121,2022-06-02,Quebec,,Canada,confirmed,,CAN,1
1121,1122,2022-06-02,Quebec,,Canada,confirmed,,CAN,1
1122,1123,2022-06-02,Quebec,,Canada,confirmed,,CAN,1
1123,1124,2022-06-04,New South Wales,Sydney,Australia,confirmed,,AUS,1


In [37]:
# The current data set doesnt have total confirm case for each country.

Total_case = only_con_df.groupby('Country', as_index=False).agg(Date_confirmation=('Date_confirmation', 'last'),case=('case',
                                                           lambda x: sum(x == x.iloc[-1])))

Total_case

Unnamed: 0,Country,Date_confirmation,case
0,Argentina,2022-05-27,2
1,Australia,2022-06-04,6
2,Austria,2022-05-23,1
3,Belgium,2022-06-03,17
4,Canada,2022-06-02,80
5,Czech Republic,2022-06-02,6
6,Denmark,2022-05-24,2
7,England,2022-06-03,214
8,Finland,2022-06-02,2
9,France,2022-06-03,51


In [38]:
# sum_case = []
# counter = 0


# for i in only_con_df["Country"]:
# 	if 'England' in i:
# 		counter += 1
# 	sum_case.append(counter)

In [122]:
# Count the symptoms for each country and find the similarity ratio
sym_df = df[df["Date_confirmation"].notna()]
sym_df['Symptoms'] = sym_df['Symptoms'].str.lower()

sym_df = (sym_df.set_index('Country')['Symptoms']
       .str.split(',', expand=True)
       .apply(lambda x: x.str.strip())
       .stack()
       .rename('Country')
       .reset_index(name='Symptoms'))
print(sym_df)

count_sym_df = sym_df.groupby(['Symptoms','Country']).size().reset_index(name='Symptoms_count')
count_sym_df = count_sym_df.sort_values(by=['Symptoms','Symptoms_count'])
count_sym_df

            Country  level_1        Symptoms
0           England        0            rash
1           England        0            rash
2           England        0  vesicular rash
3           England        0  vesicular rash
4           England        0  vesicular rash
..              ...      ...             ...
124  Czech Republic        3        headache
125  Czech Republic        4    skin lesions
126          Norway        0           fever
127          Norway        1     muscle pain
128          Norway        2            rash

[129 rows x 3 columns]


Unnamed: 0,Symptoms,Country,Symptoms_count
0,blisters,Finland,1
1,blisters,Italy,1
2,chills,Czech Republic,1
3,cough,Germany,1
4,fatigue,Czech Republic,1
5,fever,Argentina,1
6,fever,Austria,1
8,fever,Czech Republic,1
10,fever,Norway,1
11,fever,Switzerland,1


In [123]:
count_sym_df

Unnamed: 0,Symptoms,Country,Symptoms_count
0,blisters,Finland,1
1,blisters,Italy,1
2,chills,Czech Republic,1
3,cough,Germany,1
4,fatigue,Czech Republic,1
5,fever,Argentina,1
6,fever,Austria,1
8,fever,Czech Republic,1
10,fever,Norway,1
11,fever,Switzerland,1


In [128]:
to_merge = df[['Country', 'Country_ISO3']]
to_merge

Unnamed: 0,Country,Country_ISO3
0,England,GBR
1,England,GBR
2,England,GBR
3,England,GBR
4,England,GBR
...,...,...
1135,Turkey,TUR
1136,Turkey,TUR
1137,Turkey,TUR
1138,Turkey,TUR


In [134]:
to_merge = to_merge.drop_duplicates(['Country','Country_ISO3'])[['Country','Country_ISO3']]
to_merge.head()


Unnamed: 0,Country,Country_ISO3
0,England,GBR
7,Portugal,PRT
27,Spain,ESP
35,United States,USA
38,Canada,CAN


In [133]:
count_sym_df = pd.merge(left=count_sym_df, right=to_merge, how="left", on="Country")
count_sym_df

Unnamed: 0,Symptoms,Country,Symptoms_count,Country_ISO3
0,blisters,Finland,1,FIN
1,blisters,Italy,1,ITA
2,chills,Czech Republic,1,CZE
3,cough,Germany,1,DEU
4,fatigue,Czech Republic,1,CZE
5,fever,Argentina,1,ARG
6,fever,Austria,1,AUT
7,fever,Czech Republic,1,CZE
8,fever,Norway,1,NOR
9,fever,Switzerland,1,CHE


In [59]:
count_sym_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 35 entries, 0 to 34
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Symptoms        35 non-null     object
 1   Country         35 non-null     object
 2   Symptoms_count  35 non-null     int64 
dtypes: int64(1), object(2)
memory usage: 1.1+ KB


In [60]:
sort_df = only_con_df.sort_values(by=['Country','Date_confirmation'])
sort_df

Unnamed: 0,ID,Date_confirmation,Location,City,Country,Status,Symptoms,Country_ISO3,case
193,194,2022-05-27,,Buenos Aires,Argentina,confirmed,"pustules, fever",ARG,1
446,447,2022-05-27,Buenos Aires,,Argentina,confirmed,ulcerative lesions,ARG,1
106,107,2022-05-20,Sydney,Sydney,Australia,confirmed,,AUS,1
107,108,2022-05-20,The Alfred Hospital,Melbourne,Australia,confirmed,genital rash,AUS,1
950,951,2022-06-02,New South Wales,Sydney,Australia,confirmed,,AUS,1
...,...,...,...,...,...,...,...,...,...
1104,1105,2022-06-03,"New York City, New York",New York City,United States,confirmed,,USA,1
1105,1106,2022-06-03,"New York City, New York",New York City,United States,confirmed,,USA,1
1077,1078,NaT,"Oʻahu, Hawaii",,United States,confirmed,,USA,1
350,351,2022-05-26,,,Wales,confirmed,,GBR,1


In [61]:
sort_df['sum_case'] = sort_df.groupby((sort_df['Country'] != sort_df['Country'].shift(1)).cumsum()).cumcount()+1
sort_df

Unnamed: 0,ID,Date_confirmation,Location,City,Country,Status,Symptoms,Country_ISO3,case,sum_case
193,194,2022-05-27,,Buenos Aires,Argentina,confirmed,"pustules, fever",ARG,1,1
446,447,2022-05-27,Buenos Aires,,Argentina,confirmed,ulcerative lesions,ARG,1,2
106,107,2022-05-20,Sydney,Sydney,Australia,confirmed,,AUS,1,1
107,108,2022-05-20,The Alfred Hospital,Melbourne,Australia,confirmed,genital rash,AUS,1,2
950,951,2022-06-02,New South Wales,Sydney,Australia,confirmed,,AUS,1,3
...,...,...,...,...,...,...,...,...,...,...
1104,1105,2022-06-03,"New York City, New York",New York City,United States,confirmed,,USA,1,25
1105,1106,2022-06-03,"New York City, New York",New York City,United States,confirmed,,USA,1,26
1077,1078,NaT,"Oʻahu, Hawaii",,United States,confirmed,,USA,1,27
350,351,2022-05-26,,,Wales,confirmed,,GBR,1,1


### Visualization

In [62]:
from datetime import date

date = date.today()

In [149]:
fig1 = px.bar(Total_case.sort_values('case', ascending=True), x='Country', y='case', text='case')

fig1.update_layout(
    width=2372,
    height=800,
    title_text=f'Monkey Pox: {date}'
)

fig1.show()

In [150]:
fig2 = px.scatter(sort_df, x='Date_confirmation',y='sum_case', color='Country')

fig2.update_layout(
    width=2372,
    height=800,
    title_text=f'Monkey Pox: {date}'
)

fig2.show()

In [151]:
fig3 = px.line(sort_df, x='Date_confirmation',y='sum_case', color='Country')

fig3.update_layout(
    width=2372,
    height=800,
    title_text=f'Monkey Pox: {date}'
)

fig3.show()

In [152]:
fig4 = px.line(sort_df[sort_df['Country'] == 'England'], x='Date_confirmation',y='sum_case', color='Country')

fig4.update_layout(
    width=2372,
    height=800,
    title_text=f'Monkey Pox UK: {date}'
)
fig4.show()

In [156]:
fig5 = px.scatter(count_sym_df.sort_values('Symptoms_count', ascending=False), x='Country', y='Symptoms', text='Symptoms')

fig5.update_traces(textposition='bottom center')

fig5.update_layout(
    width=2372,
    height=550,
    title_text=f'Monkey Pox Symptoms Per Country: {date}'
)

fig5.show()

In [148]:
fig6 = px.bar(count_sym_df, x="Symptoms", y="Symptoms_count", text='Country_ISO3',
                color='Country',
                labels={'Symptoms_count':'Symptoms Occurrences'})

fig6.update_layout(
    width=2372,
    height=800,
    title_text=f'Monkey Pox Count Symptoms Per Country: {date}'
)

fig6.show()

### Export

In [157]:
fig1.write_image(f"Graph/fig1.png")
fig2.write_image(f"Graph/fig2.png")
fig3.write_image(f"Graph/fig3.png")
fig4.write_image(f"Graph/fig4.png")
fig5.write_image(f"Graph/fig5.png")
fig6.write_image(f"Graph/fig6.png")

In [144]:
for i in range(1 , 7):
   print(i)

1
2
3
4
5
6


In [147]:
for i in range(1 , 7):
    f'fig{i}.write_image',(f"Graph/fig{i}.png")

In [70]:
for i in range(1 , 7):
    with open(f"Graph/fig{i}.png", "w") as file:
            fig1.write_image(f"fig{i}")

FileNotFoundError: [Errno 2] No such file or directory: 'html/fig1.png'

In [None]:
with open(f"Graph/fig6.png", "w") as file:
        fig6.write_image(f"fig6")