In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import warnings
warnings.filterwarnings('ignore')
import folium

In [None]:
# Check the files in the input directory
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# ****Data Loading

In [None]:
# Load the data from the correct file
df = pd.read_csv('/kaggle/input/countries-of-the-world-2023/world-data-2023.csv')

# ****Data Info

In [None]:
# Display the first few rows
df.head()

In [None]:
# Display column names
column_names = df.columns
column_names

In [None]:
# Check unique values in the 'CPI' column
df['CPI'].unique()

# ****Exploratory Data Analysis

In [None]:
map = folium.Map(location=[0, 0], zoom_start=3.3,tiles='stamentoner')

# Markers for each country
for index, row in df.iterrows():
    if pd.notnull(row['Latitude']) and pd.notnull(row['Longitude']):
        folium.Marker([row['Latitude'], row['Longitude']], popup=row['Country']).add_to(map)
map

In [None]:
# Geographic heatmap of GDP
fig = px.choropleth(df, locations='Country', locationmode='country names', color='GDP', hover_name='Country', color_continuous_scale=px.colors.sequential.Plasma, title='Global GDP')
fig.show()

In [None]:
# Geographic heatmap of Population
fig = px.choropleth(df, locations='Country', locationmode='country names', color='Population', hover_name='Country', color_continuous_scale=px.colors.sequential.Plasma, title='Global Population')
fig.show()

In [None]:
# Create a correlation matrix
correlation_matrix = df.corr()

In [None]:
# Create a heatmap
plt.figure(figsize=(16, 12))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', linewidths=0.5)
plt.title('Correlation Heatmap for Numeric Variables')
plt.show()

plt.figure(figsize=(16, 12))
sns.clustermap(correlation_matrix, annot=True, cmap='plasma', linewidths=0.5)
plt.title('Correlation Heatmap for Numeric Variables')
plt.show()



In [None]:
# Create subplots for scatter plots
fig, ax = plt.subplots(5, 2, figsize=(20, 30))

# Scatter plots of different variables
sns.scatterplot(x='GDP', y='Physicians per thousand', data=df, ax=ax[0, 0])
ax[0, 0].set_title('Scatter plot of GDP vs Physicians per Thousand')

sns.scatterplot(x='GDP', y='Maternal mortality ratio', data=df, ax=ax[0, 1])
ax[0, 1].set_title('Scatter plot of GDP vs Maternal Mortality Ratio')

sns.scatterplot(x='GDP', y='Tax revenue (%)', data=df, ax=ax[1, 0])
ax[1, 0].set_title('Scatter plot of GDP vs Tax Revenue (%)')

sns.scatterplot(x='Total tax rate', y='Life expectancy', data=df, ax=ax[1, 1])
ax[1, 1].set_title('Scatter plot of Life Expectancy vs Total Tax Rate')

sns.scatterplot(x='GDP', y='Unemployment rate', data=df, ax=ax[2, 0])
ax[2, 0].set_title('Scatter plot of GDP vs Unemployment Rate')

sns.scatterplot(x='Armed Forces size', y='Life expectancy', data=df, ax=ax[2, 1])
ax[2, 1].set_title('Scatter plot of Life Expectancy vs Armed Forces Size')

sns.scatterplot(x='Density\n(P/Km2)', y='GDP', data=df, ax=ax[3, 0])
ax[3, 0].set_title('Scatter plot of GDP vs Population Density')

sns.scatterplot(x='Forested Area (%)', y='GDP', data=df, ax=ax[3, 1])
ax[3, 1].set_title('Scatter plot of GDP vs Forested Area (%)')

sns.scatterplot(x='Birth Rate', y='GDP', data=df, ax=ax[4, 0])
ax[4, 0].set_title('Scatter plot of GDP vs Birth Rate')

sns.scatterplot(x='Agricultural Land( %)', y='Life expectancy', data=df, ax=ax[4, 1])
ax[4, 1].set_title('Scatter plot of Life Expectancy vs Agricultural Land (%)')

plt.tight_layout()
plt.show()


In [None]:
# Another heatmap with a custom colormap
corr = df.corr()
mask = np.triu(np.ones_like(corr, dtype=bool))
f, ax = plt.subplots(figsize=(11, 9))
cmap = sns.diverging_palette(230, 20, as_cmap=True)
sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, center=0, square=True, linewidths=.5, cbar_kws={"shrink": .5})
plt.show()

In [None]:
# Parallel Coordinates Plot
kpi_df = df[['GDP', 'Population', 'Life expectancy', 'CPI']]
kpi_df_normalized = (kpi_df - kpi_df.min()) / (kpi_df.max() - kpi_df.min())
kpi_df_normalized['Country'] = df['Country']
fig = px.parallel_coordinates(kpi_df_normalized, color="Life expectancy", labels={"GDP": "GDP", "Population": "Population", "Life expectancy": "Life Expectancy", "CPI": "CPI"}, color_continuous_scale=px.colors.diverging.Tealrose, color_continuous_midpoint=0.5)
fig.show()

In [None]:
# More geographic heatmaps using Plotly
# Geographic heatmap of GDP
fig = px.choropleth(df, locations='Country', locationmode='country names', color='GDP', hover_name='Country', color_continuous_scale=px.colors.sequential.Plasma, title='Global GDP')
fig.show()

In [None]:
# Geographic heatmap of Population
fig = px.choropleth(df, locations='Country', locationmode='country names', color='Population', hover_name='Country', color_continuous_scale=px.colors.sequential.Plasma, title='Global Population')
fig.show()

In [None]:
# 3D scatter plot of GDP, Population, and Life Expectancy
fig = px.scatter_3d(df, x='GDP', y='Population', z='Life expectancy', color='Country', opacity=0.7)
fig.update_traces(marker=dict(size=5))
fig.show()

In [None]:
# Another 3D scatter plot
fig = px.scatter_3d(df, x='GDP', y='Population', z='CPI', color='Country')
fig.update_traces(marker=dict(size=5))
fig.show()