github: [ptonso](https://github.com/ptonso)

In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np


In [2]:
df = pd.read_csv("https://gist.githubusercontent.com/aishwarya8615/89d9f36fc014dea62487f7347864d16a/raw/8629d284e13976dcb13bb0b27043224b9266fffa/Life_Expectancy_Data.csv")

df.head()

Unnamed: 0,Country,Continent,Year,Status,Life_expectancy,Adult_Mortality,infant_deaths,Alcohol,percentage_expenditure,Hepatitis_B,...,Polio,Total_expenditure,Diphtheria,HIV/AIDS,GDP,Population,thinness 1-19 years,thinness 5-9 years,Income_composition_of_resources,Schooling
0,Afghanistan,Asia,2015,Developing,65.0,263,62,0.01,71.279624,65.0,...,6.0,8.16,65,0.1,584.25921,33736494,17.2,17.3,0.479,10.1
1,Afghanistan,Asia,2014,Developing,59.9,271,64,0.01,73.523582,62.0,...,58.0,8.18,62,0.1,612.696514,327582,17.5,17.5,0.476,10.0
2,Afghanistan,Asia,2013,Developing,59.9,268,66,0.01,73.219243,64.0,...,62.0,8.13,64,0.1,631.744976,31731688,17.7,17.7,0.47,9.9
3,Afghanistan,Asia,2012,Developing,59.5,272,69,0.01,78.184215,67.0,...,67.0,8.52,67,0.1,669.959,3696958,17.9,18.0,0.463,9.8
4,Afghanistan,Asia,2011,Developing,59.2,275,71,0.01,7.097109,68.0,...,68.0,7.87,68,0.1,63.537231,2978599,18.2,18.2,0.454,9.5


In [3]:
status_counts = df['Status'].value_counts().reset_index()
status_counts.columns = ['Status', 'Count']

print(status_counts.head())

fig = px.pie(status_counts, values='Count', names='Status', title='Distribution of Status')
fig.show()

       Status  Count
0  Developing   2013
1   Developed    448


In [4]:

exp = df['Life_expectancy ']


mean = exp.mean()
variance = exp.var()
std_dev = np.sqrt(variance)

fig = px.histogram(exp, title='Life Expectancy Histogram')

hist_data = np.histogram(exp, bins=30)
y_max = hist_data[0].max()


fig.add_shape(
    go.layout.Shape(
        type="line",
        x0=mean, x1=mean,
        y0=0, y1=y_max,
        line=dict(color="Red", dash="dash"),
    )
)

fig.add_shape(
    go.layout.Shape(
        type="line",
        x0=mean - std_dev, x1=mean - std_dev,
        y0=0, y1=y_max,
        line=dict(color="Green", dash="dash"),
    )
)

fig.add_shape(
    go.layout.Shape(
        type="line",
        x0=mean + std_dev, x1=mean + std_dev,
        y0=0, y1=y_max,
        line=dict(color="Green", dash="dash"),
    )
)

fig.add_annotation(
    go.layout.Annotation(
        x=mean,
        y=y_max,
        text=f"Mean: {mean:.2f}",
        showarrow=True,
        arrowhead=1,
        ax=40,
        ay=-40
    )
)

fig.add_annotation(
    go.layout.Annotation(
        x=mean + std_dev,
        y=y_max * 0.9,
        text=f"+1 SD: {mean + std_dev:.2f}",
        showarrow=False,
    )
)

fig.add_annotation(
    go.layout.Annotation(
        x=mean - std_dev,
        y=y_max * 0.9,
        text=f"-1 SD: {mean - std_dev:.2f}",
        showarrow=False,
    )
)

print(exp.describe())
print(exp.var())
fig.show()


count    2461.000000
mean       69.464567
std         9.639385
min        36.300000
25%        63.400000
50%        72.300000
75%        76.000000
max        89.000000
Name: Life_expectancy , dtype: float64
92.91774400980499


In [5]:

countries_of_interest = ['Ireland', 'Finland', 'Netherlands', 'Zambia', 'Angola']
exp_filt = df[df['Country'].isin(countries_of_interest)]

fig = px.line(exp_filt, x='Year', y='Life_expectancy ', color='Country', title='Life Expectancy of Interest Countries Over Time')


exp_2014 = exp_filt[exp_filt['Year'] == 2014]
max_life_exp_country = exp_2014.loc[exp_2014['Life_expectancy '].idxmax(), 'Country']
min_life_exp_country = exp_2014.loc[exp_2014['Life_expectancy '].idxmin(), 'Country']

print(max_life_exp_country)
print(min_life_exp_country)


fig.show()

Finland
Angola


In [6]:


fig = px.box(exp_filt, x='Country', y='Schooling', title="Schooling boxplot for interest Countries")
fig.show()


In [7]:


df2_list = pd.read_html('https://www.worldometers.info/world-population/population-by-country/')
df2 = df2_list[0]

df2.head()


Unnamed: 0,#,Country (or dependency),Population (2024),Yearly Change,Net Change,Density (P/Km²),Land Area (Km²),Migrants (net),Fert. Rate,Med. Age,Urban Pop %,World Share
0,1,India,1450935791,0.89 %,12866195,488,2973190,-630830,2.0,28,37 %,17.78 %
1,2,China,1419321278,-0.23 %,-3263655,151,9388211,-318992,1.0,40,66 %,17.39 %
2,3,United States,345426571,0.57 %,1949236,38,9147420,1286132,1.6,38,82 %,4.23 %
3,4,Indonesia,283487931,0.82 %,2297864,156,1811570,-38469,2.1,30,59 %,3.47 %
4,5,Pakistan,251269164,1.52 %,3764669,326,770880,-1401173,3.5,20,34 %,3.08 %


In [8]:

df2['Yearly Change'] = df2['Yearly Change'].str.replace('%', '').astype(float) / 100

df2_numeric = df2.select_dtypes(include=[np.number])
corr_matrix = df2_numeric.corr()

fig = px.imshow(corr_matrix)
fig.show()

min_corr = corr_matrix.abs().where(lambda x: np.triu(np.ones(x.shape), k=1).astype(bool)).stack().idxmin()

print(f"\nThe pair of variables with the least correlation: {min_corr}")
print(f"Correlation value: {corr_matrix.loc[min_corr]}")


KeyError: 'Yearly Change'