<a href="https://colab.research.google.com/github/feernandarodg/feernandarodg/blob/main/FinalCorte_expo0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **1. Libraries**

In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# **2. Import File**

* Upload the file to Colab
* Right click on the file and copy the path


In [2]:
# read the file

## Save the path
p = '/content/Economic.xlsx'

# Read ans create the dataframe
d = pd.read_excel(p)
d.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 290 entries, 0 to 289
Data columns (total 7 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Country Name                   290 non-null    object 
 1   Country Code                   290 non-null    object 
 2   Year                           290 non-null    int64  
 3   GDP Per Capita                 290 non-null    float64
 4   Education Expenditure (% GDP)  290 non-null    float64
 5   Health Expenditure (% GDP)     290 non-null    float64
 6   R&D                            290 non-null    float64
dtypes: float64(4), int64(1), object(2)
memory usage: 16.0+ KB


In [3]:
# Explore
d.head()

Unnamed: 0,Country Name,Country Code,Year,GDP Per Capita,Education Expenditure (% GDP),Health Expenditure (% GDP),R&D
0,Algeria,DZA,2013,5519.777576,17.646034,6.035763,544258300.0
1,Algeria,DZA,2014,5516.229463,16.469202,6.547214,554779900.0
2,Algeria,DZA,2015,4197.419971,15.202961,6.978492,430671800.0
3,Algeria,DZA,2016,3967.20066,16.11104,6.607498,415246100.0
4,Algeria,DZA,2017,4134.936099,17.556049,6.279384,908862200.0


In [4]:
# Explore
d.tail()

Unnamed: 0,Country Name,Country Code,Year,GDP Per Capita,Education Expenditure (% GDP),Health Expenditure (% GDP),R&D
285,Zambia,ZMB,2018,1475.199836,17.118719,5.056766,17852880.0
286,Zambia,ZMB,2019,1268.120941,15.29187,5.319501,15815400.0
287,Zambia,ZMB,2020,956.831747,12.37802,5.617884,12288430.0
288,Zambia,ZMB,2021,1137.344395,11.51414,5.200252,15027620.0
289,Zambia,ZMB,2022,1487.907764,10.447814,5.200252,20209350.0


# **3.Cleaning**

## Column names

In [5]:
# Country Name to Country
d.rename(columns = {'Country Name':'Country'}, inplace = True)

# Country Code to Code
d.rename(columns = {'Country Code':'Code'}, inplace = True)

# GDP Per Capita to GDPPC
d.rename(columns = {'GDP Per Capita':'GDPPC'}, inplace = True)

# Education Expenditure (% GDP) to EducationEx
d.rename(columns = {'Education Expenditure (% GDP)':'EducationEx'}, inplace = True)

# Health Expenditure (% GDP) to HealthEx
d.rename(columns = {'Health Expenditure (% GDP)':'HealthEx'}, inplace = True)

d.head()


Unnamed: 0,Country,Code,Year,GDPPC,EducationEx,HealthEx,R&D
0,Algeria,DZA,2013,5519.777576,17.646034,6.035763,544258300.0
1,Algeria,DZA,2014,5516.229463,16.469202,6.547214,554779900.0
2,Algeria,DZA,2015,4197.419971,15.202961,6.978492,430671800.0
3,Algeria,DZA,2016,3967.20066,16.11104,6.607498,415246100.0
4,Algeria,DZA,2017,4134.936099,17.556049,6.279384,908862200.0


In [6]:
# Duplicate the dataframe
d2 = d

In [7]:
# Duplicate year as string / how to create a column
d2['Year_str'] = d2['Year'].astype(str)
d2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 290 entries, 0 to 289
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Country      290 non-null    object 
 1   Code         290 non-null    object 
 2   Year         290 non-null    int64  
 3   GDPPC        290 non-null    float64
 4   EducationEx  290 non-null    float64
 5   HealthEx     290 non-null    float64
 6   R&D          290 non-null    float64
 7   Year_str     290 non-null    object 
dtypes: float64(4), int64(1), object(3)
memory usage: 18.2+ KB


In [8]:
# Indexing
idx = pd.MultiIndex.from_product([d2['Country'].unique(), range(d2.Year.min(), d2.Year.max()+1)],
                                 names = ['Country', 'Year'])

## Attach the index
d2 = d2.set_index(['Country', 'Year'])

d2.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Code,GDPPC,EducationEx,HealthEx,R&D,Year_str
Country,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Algeria,2013,DZA,5519.777576,17.646034,6.035763,544258300.0,2013
Algeria,2014,DZA,5516.229463,16.469202,6.547214,554779900.0,2014
Algeria,2015,DZA,4197.419971,15.202961,6.978492,430671800.0,2015
Algeria,2016,DZA,3967.20066,16.11104,6.607498,415246100.0,2016
Algeria,2017,DZA,4134.936099,17.556049,6.279384,908862200.0,2017


In [9]:
d2['GDPPC'].unique()

array([ 5519.77757552,  5516.22946322,  4197.41997102,  3967.20065952,
        4134.93609898,  4171.79530904,  4021.98360797,  3354.15730265,
        3700.31119461,  4273.92218312,  5101.98387641,  5059.08044129,
        3100.83068531,  1709.51553405,  2283.21423256,  2487.50099555,
        2142.23875713,  1502.95075415,  1903.71740496,  2998.50115811,
        6436.60306321,  6844.03299551,  5869.73777207,  6411.55166555,
        6705.34106168,  6947.81784114,  6691.16105117,  5875.07060571,
        7238.79609591,  7737.65468113,   762.30378033,   767.37134425,
         632.12668584,   665.78632853,   711.18454394,   779.20276806,
         772.16687755,   833.24433611,   893.07719599,   832.88426567,
         241.54766566,   257.81855747,   289.35962714,   242.53952736,
         244.14542214,   232.06061658,   216.97297086,   216.82741745,
         221.15780341,   238.44187566,  3757.65995326,  3739.2782789 ,
        3169.07891541,  3312.69675667,  3534.34358304,  3860.45481136,
      

##Delete NaN

In [10]:
# Display if there ara NaN
d2.isna()

Unnamed: 0_level_0,Unnamed: 1_level_0,Code,GDPPC,EducationEx,HealthEx,R&D,Year_str
Country,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Algeria,2013,False,False,False,False,False,False
Algeria,2014,False,False,False,False,False,False
Algeria,2015,False,False,False,False,False,False
Algeria,2016,False,False,False,False,False,False
Algeria,2017,False,False,False,False,False,False
...,...,...,...,...,...,...,...
Zambia,2018,False,False,False,False,False,False
Zambia,2019,False,False,False,False,False,False
Zambia,2020,False,False,False,False,False,False
Zambia,2021,False,False,False,False,False,False


In [11]:
# Delete NaN
d2 = d2.dropna()
d2.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 290 entries, ('Algeria', 2013) to ('Zambia', 2022)
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Code         290 non-null    object 
 1   GDPPC        290 non-null    float64
 2   EducationEx  290 non-null    float64
 3   HealthEx     290 non-null    float64
 4   R&D          290 non-null    float64
 5   Year_str     290 non-null    object 
dtypes: float64(4), object(2)
memory usage: 15.9+ KB


In [12]:
# By the flies
d2 = d2.replace('..',np.NaN)
d2.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Code,GDPPC,EducationEx,HealthEx,R&D,Year_str
Country,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Algeria,2013,DZA,5519.777576,17.646034,6.035763,544258300.0,2013
Algeria,2014,DZA,5516.229463,16.469202,6.547214,554779900.0,2014
Algeria,2015,DZA,4197.419971,15.202961,6.978492,430671800.0,2015
Algeria,2016,DZA,3967.20066,16.11104,6.607498,415246100.0,2016
Algeria,2017,DZA,4134.936099,17.556049,6.279384,908862200.0,2017


In [13]:
morocco = d2.filter(like = 'Morocco', axis = 0)
morocco.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Code,GDPPC,EducationEx,HealthEx,R&D,Year_str
Country,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Morocco,2013,MAR,3423.882024,14.346839,5.687469,714301200.0,2013
Morocco,2014,MAR,3478.41475,14.346839,5.861554,735232700.0,2014
Morocco,2015,MAR,3183.747478,4.64598,5.570224,681434400.0,2015
Morocco,2016,MAR,3178.058724,14.346839,5.499204,688588100.0,2016
Morocco,2017,MAR,3336.528682,13.66193,5.392125,731589800.0,2017


In [14]:
y20 = d2.filter(like = '2020', axis=0)
y20.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Code,GDPPC,EducationEx,HealthEx,R&D,Year_str
Country,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Algeria,2020,DZA,3354.157303,16.549191,6.32118,378166100.0,2020
Angola,2020,AGO,1502.950754,6.46723,2.911835,16222940.0,2020
Botswana,2020,BWA,5875.070606,15.503146,6.186646,66271100.0,2020
Burkina Faso,2020,BFA,833.244336,21.111717,6.719326,44518380.0,2020
Burundi,2020,BDI,216.827417,20.741949,6.501417,4209194.0,2020


## **4. Pie Chart**

In [15]:
### Sort the data frame
y20_rd = y20.sort_values('GDPPC', ascending=False)

### Extract 30 countries
y20_rd_10 = y20_rd.iloc[0:10]

In [16]:
fig5 = px.pie(y20_rd_10, values = 'GDPPC', names = y20_rd_10.index.get_level_values(0))
fig5.show()

ANALYSIS:GDP per capita is a measure of the average income of a person in a country, and is calculated by dividing the country's total GDP by the population.
The graph shows that Seychelles has the highest GDP per capita in the world, followed by Mauritius, Gabon, Botswana and Namibia. These countries are all rich in natural resources, such as oil, natural gas, and diamonds.
The countries with the lowest GDP per capita in the world are Algeria, Cabo verde, Cote d'lvoire.
In general, African countries have a lower GDP per capita than developed countries. This is due to a number of factors, such as poverty, inequality and political instability. However, there are some African countries that are experiencing significant economic growth.
Countries with higher GDP per capita generally have an economy based on tourism, fishing and agriculture or are rich in natural resources, such as oil and natural gas.
The graph also shows that Africa's GDP per capita has increased in recent years. However, growth has been uneven, with North African countries experiencing faster growth than Sub-Saharan countries.

To increase Africa's GDP per capita, the factors that contribute to economic inequality need to be addressed. This includes improving education, infrastructure and access to markets

## **5. Box Plot**

In [17]:
fig6 = px.box(d2, x = 'Year_str', y = 'R&D')
fig6.show()

ANALYSIS: The graph shows the number of R&D projects in Africa over the years. The number of projects has been increasing every year, reaching a maximum of 908 million in 2017
This increase in the number of R&D projects is a positive sign of Africa's commitment to innovation. The continent is experiencing rapid economic and demographic growth.
There are a number of factors that may contribute to this increase in the number of R&D projects in Africa. One of the most important factors is the increasing investment in R&D by African governments. Governments are increasingly recognizing the importance of R&D for economic and social development, and are increasing their investments in this field.
Another factor contributing to the increase is the growth of African technology companies. These companies are increasingly investing in R&D to develop new products and services that can compete in global markets.
Finally, the increase in the number of R&D projects in Africa is also driven by growing international cooperation. African governments and businesses are increasingly working with their counterparts in other countries to develop joint R&D projects.
However, R&D spending in Africa remains relatively low compared to other regions of the world. There are a number of factors that contribute to low R&D spending in Africa. One of the most important factors is poverty. Most African countries are poor and have a young and growing population.

##**6. A little more complex graph**

In [18]:
## Create a list to select countries
c_list = ['Algeria', 'Angola', 'Burundi', 'Chad', 'Mozambique', 'Sudan']

## Select from the dataframe (unidexed)
d3 = d2.reset_index()
afri = d3[d3['Country'].isin(c_list)]

afri.head()

Unnamed: 0,Country,Year,Code,GDPPC,EducationEx,HealthEx,R&D,Year_str
0,Algeria,2013,DZA,5519.777576,17.646034,6.035763,544258300.0,2013
1,Algeria,2014,DZA,5516.229463,16.469202,6.547214,554779900.0,2014
2,Algeria,2015,DZA,4197.419971,15.202961,6.978492,430671800.0,2015
3,Algeria,2016,DZA,3967.20066,16.11104,6.607498,415246100.0,2016
4,Algeria,2017,DZA,4134.936099,17.556049,6.279384,908862200.0,2017


In [19]:
## GRAPH
fig7 = px.line(afri, x = 'HealthEx', y = 'GDPPC', text = 'Year_str', color = 'Country')
fig7.update_traces(textposition = 'top center')
fig7.show()



ANALYSIS: The growth in health spending is due to a number of factors, including population growth, growth in the economy and increasing awareness of the importance of health.
The graph shows that health spending in Africa has grown steadily in recent years. The most notable are Algeria and Angola, since they are countries with rapid economic growth, which has generated more resources available to invest in health. Both countries have made health a political priority, allocating significant resources to this sector and have a developed infrastructure.
The countries that did not do well are Chad, Mozmbique and Sudan. There are a number of factors that may explain this lower-than-average health spending. These factors include:
Poverty: Chad, Mozambique and Sudan are poor countries, with a GDP per capita below the African average. This means they have fewer resources available to invest in health.
As mentioned above, political instability, as this factor can hinder the planning and execution of health policies.
And the lack of access to health care, since Chad, Mozambique and Sudan usually have poor health infrastructure, making it difficult to access health care for people living in rural or remote areas.


#**7. Animation**

In [20]:
d = d.dropna()
d.info

<bound method DataFrame.info of      Country Code  Year        GDPPC  EducationEx  HealthEx           R&D  \
0    Algeria  DZA  2013  5519.777576    17.646034  6.035763  5.442583e+08   
1    Algeria  DZA  2014  5516.229463    16.469202  6.547214  5.547799e+08   
2    Algeria  DZA  2015  4197.419971    15.202961  6.978492  4.306718e+08   
3    Algeria  DZA  2016  3967.200660    16.111040  6.607498  4.152461e+08   
4    Algeria  DZA  2017  4134.936099    17.556049  6.279384  9.088622e+08   
..       ...  ...   ...          ...          ...       ...           ...   
285   Zambia  ZMB  2018  1475.199836    17.118719  5.056766  1.785288e+07   
286   Zambia  ZMB  2019  1268.120941    15.291870  5.319501  1.581540e+07   
287   Zambia  ZMB  2020   956.831747    12.378020  5.617884  1.228843e+07   
288   Zambia  ZMB  2021  1137.344395    11.514140  5.200252  1.502762e+07   
289   Zambia  ZMB  2022  1487.907764    10.447814  5.200252  2.020935e+07   

    Year_str  
0       2013  
1       2014 

In [21]:
px.scatter(d, x = 'HealthEx', y = 'GDPPC',
           size = 'EducationEx', hover_name = 'Country',
           size_max = 40,
           animation_group = 'Country', animation_frame = 'Year')

In [22]:
## Create a list to select countries
c_list = ['Algeria', 'Angola', 'Burundi', 'Chad', 'Mozambique', 'Sudan',
          'Nigeria', 'Morocco', 'Mali', 'Madagascar' ]

## Select from the dataframe (unidexed)
afri = d[d['Country'].isin(c_list)]

afri.head()

Unnamed: 0,Country,Code,Year,GDPPC,EducationEx,HealthEx,R&D,Year_str
0,Algeria,DZA,2013,5519.777576,17.646034,6.035763,544258300.0,2013
1,Algeria,DZA,2014,5516.229463,16.469202,6.547214,554779900.0,2014
2,Algeria,DZA,2015,4197.419971,15.202961,6.978492,430671800.0,2015
3,Algeria,DZA,2016,3967.20066,16.11104,6.607498,415246100.0,2016
4,Algeria,DZA,2017,4134.936099,17.556049,6.279384,908862200.0,2017


In [23]:
px.scatter(afri, x = 'HealthEx', y = 'GDPPC',
           size = 'EducationEx', hover_name = 'Country', color = 'Country',
           size_max = 40,
           animation_group = 'Country', animation_frame = 'Year')

## 7.1.Animation from Scratch

In [24]:
### rows    = years
### columns = country
### values  = net

l2 = afri.pivot (index = 'Year',
                  columns = 'Country',
                  values = 'HealthEx')

l2 = l2.reset_index()
l2['Year_str'] = l2['Year'].astype(str)
l2.head()

Country,Year,Algeria,Angola,Burundi,Chad,Madagascar,Mali,Morocco,Mozambique,Nigeria,Sudan,Year_str
0,2013,6.035763,2.732827,8.537285,4.066453,3.738969,3.964087,5.687469,6.12556,3.420693,6.964875,2013
1,2014,6.547214,2.434129,7.210815,4.273169,4.428337,4.481583,5.861554,6.332318,3.348404,5.676197,2014
2,2015,6.978492,2.605795,6.290226,4.523952,4.972244,4.111434,5.570224,6.716633,3.58195,7.267686,2015
3,2016,6.607498,2.71315,7.171379,5.050778,5.256974,3.773776,5.499204,7.281345,3.647737,5.483687,2016
4,2017,6.279384,2.793838,7.280694,4.557227,4.844316,3.674258,5.392125,7.836802,3.747625,5.923124,2017


## 4.2 Create the frame

In [25]:
# The frame
fig = go.Figure(
    layout = go.Layout(
        updatemenus = [dict(type = 'buttons', direction = 'right', x = 0.9, y = 1.16),],
        xaxis = dict(range = [2013, 2017],
                     autorange = False, tickwidth = 2,
                     title_text = 'Year'),
        yaxis = dict(range = [0 ,20],
                     autorange = False,
                     title_text = ''),
        title = 'Health expenditures in African countries',
        title_font_size = 30,
        title_x = 0.5
    )
)
fig.show()

In [26]:
## Add Traces
init = 1


### Madagascar
fig.add_trace(
    go.Scatter(
        x = l2.Year[:init],
        y = l2.Madagascar[:init],
        name = 'Madagascar',
        line = dict(color='black'),
        mode = 'lines'
    )
)



### Morocco
fig.add_trace(
    go.Scatter(
        x = l2.Year[:init],
        y = l2.Morocco[:init],
        name = 'Morocco',
        line = dict(color='red'),
        mode = 'lines'
    )
)


### Nigeria
fig.add_trace(
    go.Scatter(
        x = l2.Year[:init],
        y = l2.Nigeria[:init],
        name = 'Nigeria',
        line = dict(color='blue'),
        mode = 'lines'
    )
)

In [27]:
## Frames
frames = [
    go.Frame(
        data = [
            go.Scatter(x=l2.Year[:k], y=l2.Madagascar[:k]),
            go.Scatter(x=l2.Year[:k], y=l2.Morocco[:k]),
            go.Scatter(x=l2.Year[:k], y=l2.Nigeria[:k])
        ]
    )
    for k in range(init,len(l2)+1)
]


## Animation
fig.update(frames=frames)

In [28]:
## Play button
fig.update_layout(
    updatemenus = [
        dict(
            buttons = list([
                dict(
                label = 'Play',
          method = 'animate',
          args = [None, {'frame':{'duration':800}}]
          )
       ]
            )
        )
    ]
)

ANALYSIS: Health care spending is an important measure of the health of a population. Higher health care spending generally indicates better population health, as it translates into greater access to health care, better health outcomes, and longer life expectancy.
In Africa, healthcare spending is significantly lower than in developed countries. This is due to a number of factors, including:
Low income: Most African countries are low income, which limits the amount of resources that can be allocated to healthcare.
Income inequality: Income inequality is high in many African countries, meaning that a large proportion of the population lives in poverty and cannot afford private healthcare.
Health problems: Africa has a number of major health problems, such as infectious diseases, malaria and tuberculosis.
By 2013, Morocco invested more in health spending compared to the other two countries, but by 2015, Madagascar began to get closer and Nigeria presented a straight line where it neither went down nor went up, which means that it has not changed significantly. in recent years, which could indicate that government resources are being very limited, which increases the probability of dying prematurely or suffering from chronic diseases.