In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('WHO-COVID-19-global-data.csv')
df

In [None]:
df.info()

In [None]:
df['Country_code'].unique()

In [None]:
df.shape

We will not target or exploit 'other' countries, as their status and intentions are not clearly defined.

In [None]:
condition = df['Country_code'] == ' '  
rows_to_drop = df[condition].index

df.drop(rows_to_drop, inplace=True)

In [None]:
df.shape

In [None]:
df['Country_code'].unique()

In [None]:
# Convert 'Date' column to datetime format
df['Date_reported'] = pd.to_datetime(df['Date_reported'])

# Group by 'Date' and sum the 'New Cases'
cases_by_date = df.groupby('Date_reported')['New_cases'].sum().to_dict()

In [None]:
import seaborn as sns

fig, ax = plt.subplots(figsize=(50, 25))

sns.scatterplot(x = cases_by_date.keys(), y = cases_by_date.values())

ax.set_title(f"Distribution of new cases")
plt.show()

In [None]:
# Group the data by 'Country' and sum the 'NewCases' column for each group
count_cases_in_countries = df.groupby('Country_code')['New_cases'].sum()

count_cases_in_countries

In [None]:
import geopandas as gpd
import pycountry

# Create a dictionary mapping ISO country codes to two-letter codes
iso_to_alpha2 = {c.alpha_3: c.alpha_2 for c in pycountry.countries}

# Load world map shapefile
world_map = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

# Convert the iso_a3 column to two-letter codes
world_map['iso_a2'] = world_map['iso_a3'].map(iso_to_alpha2)

# Merge country names and numbers with world map data
world_map['New_cases'] = world_map['iso_a2'].map(count_cases_in_countries)

world_map

# Create a new figure and axis object
fig, ax = plt.subplots(figsize=(100, 20))

# Plot the map with cartogram
world_map.plot(column='New_cases', ax=ax, legend=True, legend_kwds={'label': 'Number of Cases'})

# Set axis labels and title
ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')
ax.set_title('COVID-19 Cases by Country')

# Display the map
plt.show()

In [None]:
counties_population = pd.read_csv('List_of_countries_and_dependencies_by_population_1.csv')

counties_population.head()

In [None]:
counties_population.drop('Unnamed: 0', axis=1, inplace=True)
counties_population.drop('Unnamed: 6', axis=1, inplace=True)
counties_population.drop('Date', axis=1, inplace=True)
counties_population.drop('Source (official or from\nthe United Nations)', axis=1, inplace=True)

In [None]:
counties_population

In [None]:
countries_list = df["Country"].unique()
print(countries_list)

In [None]:
cases_count = {}

for country in countries_list:
    cases_count[country] = 0

for country in countries_list:
    value_counts = df[df['Country'] == country]['New_cases']
    total_new_cases = value_counts.sum()
    cases_count[country] = total_new_cases

old_countries = cases_count.keys()
set(old_countries)

In [None]:
cases_count

In [None]:
new_countries = set(counties_population['Country / Dependency'].unique())
new_countries

In [None]:
key_changes = {'American Samoa' : 'American Samoa (US)',
              'Bermuda': 'Bermuda (UK)',
              'Bolivia (Plurinational State of)': 'Bolivia',
              'British Virgin Islands': 'British Virgin Islands (UK)',
              'Brunei Darussalam': 'Brunei',
              'Cabo Verde': 'Cape Verde',
              'Cayman Islands': 'Cayman Islands (UK)',
              'Czechia': 'Czech Republic',
              'Democratic Republic of the Congo': 'DR Congo',
              'Greenland': 'Greenland (Denmark)',
              'Iran (Islamic Republic of)' : 'Iran',
              "Democratic People's Republic of Korea": 'North Korea',
              'occupied Palestinian territory, including east Jerusalem': 'Palestine', 
              'Russian Federation': 'Russia',
              'Republic of Korea': 'South Korea',
              'Republic of Moldova': ' Moldova',
              'Syrian Arab Republic': 'Syria',
              'TÃ¼rkiye': 'Turkey',
              'Turkmenistan': 'Turkmenistan',
              'The United Kingdom': 'United Kingdom',
              'United States of America': 'United States', 
              'Viet Nam': 'Vietnam'}

In [None]:
def replace_country_names(original_dict, key_mapping):
    new_dict = {}
    for country, value in original_dict.items():
        new_country = key_mapping.get(country, country) 
        new_dict[new_country] = value
    return new_dict

# Replace keys using the function
new_dict = replace_country_names(cases_count, key_changes)

new_dict

In [None]:
counties_population['disease_cases'] = counties_population['Country / Dependency'].map(new_dict).fillna(0)

In [None]:
counties_population

In [None]:
counties_population[counties_population['disease_cases'] ==0]['Country / Dependency']

There arent important (big) countries, we can delete them

In [None]:
counties_population.loc[0, 'disease_cases'] = counties_population['disease_cases'].sum()

In [None]:
counties_population = counties_population[counties_population['disease_cases'] != 0]
counties_population

In [None]:
counties_population.reset_index(drop=True, inplace=True)
counties_population.info()

In [None]:
def convert_to_int(formatted_string):
    return int(formatted_string.replace(',', ''))

counties_population['Population'] = counties_population['Population'].apply(convert_to_int)

In [None]:
counties_population

In [None]:
counties_population['Presentege_sick_in_country'] = round((counties_population['disease_cases'] / counties_population['Population']) * 100, 2)

counties_population['Presentege_sick_in_country'] = counties_population['Presentege_sick_in_country'].astype(str) + '%'

counties_population['Presentege_sick_in_world'] = round((counties_population['disease_cases'] / counties_population.loc[0, 'disease_cases']) * 100, 2)

counties_population['Presentege_sick_in_world'] = counties_population['Presentege_sick_in_world'].astype(str) + '%'

counties_population

In [None]:
country_codes = {
    'World': '',
    'China': 'CN',
    'India': 'IN',
    'United States': 'US',
    'Indonesia': 'ID',
    'Pakistan': 'PK',
    'Nigeria': 'NG',
    'Brazil': 'BR',
    'Bangladesh': 'BD',
    'Russia': 'RU',
    'Mexico': 'MX',
    'Japan': 'JP',
    'Philippines': 'PH',
    'Ethiopia': 'ET',
    'Egypt': 'EG',
    'Vietnam': 'VN',
    'DR Congo': 'CD',
    'Turkey': 'TR',
    'Iran': 'IR',
    'Germany': 'DE',
    'Thailand': 'TH',
    'France': 'FR',
    'United Kingdom': 'GB',
    'South Africa': 'ZA',
    'Italy': 'IT',
    'Myanmar': 'MM',
    'Colombia': 'CO',
    'Kenya': 'KE',
    'South Korea': 'KR',
    'Spain': 'ES',
    'Argentina': 'AR',
    'Algeria': 'DZ',
    'Iraq': 'IQ',
    'Uganda': 'UG',
    'Sudan': 'SD',
    'Ukraine': 'UA',
    'Canada': 'CA',
    'Poland': 'PL',
    'Morocco': 'MA',
    'Uzbekistan': 'UZ',
    'Afghanistan': 'AF',
    'Peru': 'PE',
    'Angola': 'AO',
    'Malaysia': 'MY',
    'Mozambique': 'MZ',
    'Saudi Arabia': 'SA',
    'Yemen': 'YE',
    'Ghana': 'GH',
    'Nepal': 'NP',
    'Madagascar': 'MG',
    'Australia': 'AU',
    'Cameroon': 'CM',
    'Niger': 'NE',
    'Mali': 'ML',
    'Syria': 'SY',
    'Burkina Faso': 'BF',
    'Sri Lanka': 'LK',
    'Malawi': 'MW',
    'Chile': 'CL',
    'Kazakhstan': 'KZ',
    'Zambia': 'ZM',
    'Romania': 'RO',
    'Ecuador': 'EC',
    'Somalia': 'SO',
    'Netherlands': 'NL',
    'Senegal': 'SN',
    'Guatemala': 'GT',
    'Chad': 'TD',
    'Cambodia': 'KH',
    'Zimbabwe': 'ZW',
    'Guinea': 'GN',
    'South Sudan': 'SS',
    'Rwanda': 'RW',
    'Burundi': 'BI',
    'Benin': 'BJ',
    'Bolivia': 'BO',
    'Tunisia': 'TN',
    'Papua New Guinea': 'PG',
    'Belgium': 'BE',
    'Haiti': 'HT',
    'Jordan': 'JO',
    'Cuba': 'CU',
    'Czech Republic': 'CZ',
    'Sweden': 'SE',
    'Dominican Republic': 'DO',
    'Greece': 'GR',
    'Portugal': 'PT',
    'Azerbaijan': 'AZ',
    'Tajikistan': 'TJ',
    'Israel': 'IL',
    'Honduras': 'HN',
    'Hungary': 'HU',
    'United Arab Emirates': 'AE',
    'Belarus': 'BY',
    'Austria': 'AT',
    'Switzerland': 'CH',
    'Sierra Leone': 'SL',
    'Togo': 'TG',
    'Paraguay': 'PY',
    'Kyrgyzstan': 'KG',
    'Libya': 'LY',
    'El Salvador': 'SV',
    'Serbia': 'RS',
    'Nicaragua': 'NI',
    'Bulgaria': 'BG',
    'Congo': 'CG',
    'Denmark': 'DK',
    'Central African Republic': 'CF',
    'Finland': 'FI',
    'Norway': 'NO',
    'Lebanon': 'LB',
    'Palestine': 'PS',
    'Singapore': 'SG',
    'Slovakia': 'SK',
    'Costa Rica': 'CR',
    'New Zealand': 'NZ',
    'Ireland': 'IE',
    'Oman': 'OM',
    'Kuwait': 'KW',
    'Liberia': 'LR',
    'Mauritania': 'MR',
    'Panama': 'PA',
    'Croatia': 'HR',
    'Eritrea': 'ER',
    'Georgia': 'GE',
    'Uruguay': 'UY',
    'Mongolia': 'MN',
    'Bosnia and Herzegovina': 'BA',
    'Qatar': 'QA',
    'Armenia': 'AM',
    'Lithuania': 'LT',
    'Jamaica': 'JM',
    'Albania': 'AL',
    'Namibia': 'NA',
    'Gambia': 'GM',
    'Botswana': 'BW',
    'Lesotho': 'LS',
    'Gabon': 'GA',
    'Slovenia': 'SI',
    'Latvia': 'LV',
    'North Macedonia': 'MK',
    'Guinea-Bissau': 'GW',
    'Equatorial Guinea': 'GQ',
    'Bahrain': 'BH',
    'Estonia': 'EE',
    'Trinidad and Tobago': 'TT',
    'Mauritius': 'MU',
    'Eswatini': 'SZ',
    'Djibouti': 'DJ',
    'Cyprus': 'CY',
    'Fiji': 'FJ',
    'Bhutan': 'BT',
    'Comoros': 'KM',
    'Guyana': 'GY',
    'Solomon Islands': 'SB',
    'Luxembourg': 'LU',
    'Montenegro': 'ME',
    'Suriname': 'SR',
    'Cape Verde': 'CV',
    'Malta': 'MT',
    'Belize': 'BZ',
    'Brunei': 'BN',
    'Maldives': 'MV',
    'Bahamas': 'BS',
    'Iceland': 'IS',
    'Vanuatu': 'VU',
    'Barbados': 'BB',
    'Samoa': 'WS',
    'Saint Lucia': 'LC',
    'Kiribati': 'KI',
    'Grenada': 'GD',
    'Saint Vincent and the Grenadines': 'VC',
    'Antigua and Barbuda': 'AG',
    'Seychelles': 'SC',
    'Tonga': 'TO',
    'Andorra': 'AD',
    'Cayman Islands (UK)': 'KY',
    'Dominica': 'DM',
    'Bermuda (UK)': 'BM',
    'Greenland (Denmark)': 'GL',
    'American Samoa (US)': 'AS',
    'Saint Kitts and Nevis': 'KN',
    'Liechtenstein': 'LI',
    'Marshall Islands': 'MH',
    'Monaco': 'MC',
    'San Marino': 'SM',
    'British Virgin Islands (UK)': 'VG',
    'Palau': 'PW',
    'Cook Islands': 'CK',
    'Nauru': 'NR',
    'Tuvalu': 'TV',
    'Niue': 'NU'
}

counties_population['iso_a2'] = country_codes.values()

counties_population

In [None]:
counties_population.columns

In [None]:
precentage_sick = counties_population[['Presentege_sick_in_world', 'iso_a2']]

In [None]:
counties_population

In [None]:
percentage = (counties_population['disease_cases'] / counties_population.loc[0, 'Population']) * 100
percentage = percentage.round(2)

df_for_map = pd.DataFrame({
    'iso_a2': counties_population['iso_a2'],
    'percentage': percentage
})

df_for_map.drop(df_for_map.index[0], inplace=True)
df_for_map.reset_index(drop=True)

df_for_map_series = df_for_map.groupby('iso_a2')['percentage'].mean()

print(df_for_map_series)

In [None]:
# Create a dictionary mapping ISO country codes to two-letter codes
iso_to_alpha2 = {c.alpha_3: c.alpha_2 for c in pycountry.countries}

# Load world map shapefile
world_map = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

# Convert the iso_a3 column to two-letter codes
world_map['iso_a2'] = world_map['iso_a3'].map(iso_to_alpha2)

# Merge country names and numbers with world map data
world_map['percentege_in_country'] = world_map['iso_a2'].map(df_for_map_series)

# Create a new figure and axis object
fig, ax = plt.subplots(figsize=(100, 20))

# Plot the map with cartogram
world_map.plot(column='percentege_in_country', ax=ax, legend=True, legend_kwds={'label': 'percentege_in_country'})

# Set axis labels and title
ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')
ax.set_title('COVID-19 Cases presentege sick in the world')

# Display the map
plt.show()