# Data Visualization Exercise 😊

In [None]:
import pandas as pd
from pandas import DataFrame
import numpy as np
import matplotlib.pyplot as plt

In [None]:
data = pd.read_csv("./countries-of-the-world.csv")

In [None]:
# Cleaning data

data.head()

#### Renaming columns

In [None]:
columns_to_rename: dict = {
    "Area (sq. mi.)": "Area_(sq. mi.)",
    "Pop. Density (per sq. mi.)": "Pop_Density_(per sq. mi.)",
    "Coastline (coast/area ratio)": "Coastline_(coast/area ratio)",
    "Net migration": "Net_migration",
    "Infant mortality (per 1000 births)": "Infant_mortality_(per 1000 births)",
    "GDP ($ per capita)": "GDP_($ per capita)",
    "Literacy (%)": "Literacy_(%)",
    "Phones (per 1000)": "Phones_(per 1000)",
    "Arable (%)": "Arable_(%)",
    "Crops (%)": "Crops_(%)",
    "Other (%)": "Other_(%)"
}

data = data.rename(columns=columns_to_rename)

print(data.columns)

##### Checking for Null and NaN fields and dropping them 

In [None]:
data.isnull().any()

In [None]:
# Identify columns with NaN values
columns_with_nan = data.columns[data.isna().any()].tolist()

# Set NaN values in these columns to 0
data[columns_with_nan] = data[columns_with_nan].fillna(0)

data.isnull().any()

##### Checking for Duplicated Rows

In [None]:
duplicated_rows= data[data.duplicated()]
print(duplicated_rows)

##### Checking Data Type

In [None]:
data.dtypes

In [None]:
data['Pop_Density_(per sq. mi.)'] = data['Pop_Density_(per sq. mi.)'].astype(str)

In [None]:
# Convert all values to strings and replace commas with periods
columns_to_convert = [
    'Pop_Density_(per sq. mi.)', 'Coastline_(coast/area ratio)', 'Net_migration',
    'Infant_mortality_(per 1000 births)', 'Literacy_(%)', 'Phones_(per 1000)',
    'Arable_(%)', 'Crops_(%)', 'Other_(%)', 'Climate', 'Birthrate', 'Deathrate',
    'Agriculture', 'Industry', 'Service'
]

for column in columns_to_convert:
    data[column] = data[column].astype(str).str.replace(',', '.').astype(float)
    
# Convert integer columns
data['Population'] = data['Population'].astype(int)
data['Area_(sq. mi.)'] = data['Area_(sq. mi.)'].astype(int)
data['GDP_($ per capita)'] = data['GDP_($ per capita)'].astype(int)

data.dtypes

In [None]:
data.head(50)

In [None]:
# Done with data cleaning

##### Question 1: Bar chart for first 20 countries in dataset given visualizing data on popultion.

In [None]:
first_20_rows = data.head(20)

X_countries = first_20_rows['Country']
Y_population = first_20_rows['Population']

plt.figure(figsize=(17, 13))
plt.bar(X_countries, Y_population)
plt.xticks(rotation=90)

plt.title('Population of First 20 Countries')
plt.xlabel("Country")
plt.ylabel("Population")

plt.show()

##### Question 2: Line graph of Phones (per 1000) against the population data

In [None]:
plt.figure(figsize=(17, 13))
plt.plot(data['Population'], data['Phones_(per 1000)'], marker='o')
plt.title('Phones (per 1000) vs Population')
plt.xlabel('Population')
plt.ylabel('Phones (per 1000)')
plt.show()

##### Question 3: Literacy rate against population

In [None]:
plt.figure(figsize=(17, 12))
plt.plot(data['Population'], data['Literacy_(%)'], marker='o')
plt.title('Literacy rate vs Population')
plt.xlabel('Population')
plt.ylabel('Literacy rate')
plt.show()

#### Question 4: Plot the data on infant mortality, birthrate and deathrate.

In [None]:
print(data.columns)

In [None]:
plt.figure(figsize=(23, 15))

plt.plot(data['Country'], data['Infant_mortality_(per 1000 births)'], label='Infant Mortality')
plt.plot(data['Country'], data['Birthrate'], label='Birthrate')
plt.plot(data['Country'], data['Deathrate'], label='Deathrate')

plt.xticks(rotation=90)

plt.title('Infant Mortality, Birthrate, and Deathrate')
plt.xlabel('Country')
plt.ylabel('Rate')

plt.legend()

plt.show()

##### Question 5: GDP data against Agriculture, Industry and Service

In [None]:
plt.figure(figsize=(17, 12))

plt.scatter(data['GDP_($ per capita)'], data['Agriculture'], label='Agriculture')
plt.scatter(data['GDP_($ per capita)'], data['Industry'], label='Industry')
plt.scatter(data['GDP_($ per capita)'], data['Service'], label='Service')

plt.title('GDP vs Agriculture, Industry, and Service')
plt.xlabel('GDP ($ per capita)')
plt.ylabel('Percentage')

plt.legend()

plt.show()