In [30]:
import numpy as np
import pandas as pd
import matplotlib as plt
import plotly as py
import plotly.express as px
import plotly.graph_objects as go
import plotly.subplots as sp

In [41]:
# Crear DF a partir de datos raw de población mundial total.
path_pobl_mund = "./Data/TOTAL_poblacion_mundial_world_bank_API_SP.POP.TOTL_DS2_en_csv_v2_5358404.csv"
pobl_mund_df = pd.read_csv(path_pobl_mund, skiprows=4) # Las filas previas no son útiles y causan problemas.
pobl_mund_df.head(2)

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,Unnamed: 66
0,Aruba,ABW,"Population, total",SP.POP.TOTL,54608.0,55811.0,56682.0,57475.0,58178.0,58782.0,...,102880.0,103594.0,104257.0,104874.0,105439.0,105962.0,106442.0,106585.0,106537.0,
1,Africa Eastern and Southern,AFE,"Population, total",SP.POP.TOTL,130692579.0,134169237.0,137835590.0,141630546.0,145605995.0,149742351.0,...,567891875.0,583650827.0,600008150.0,616377331.0,632746296.0,649756874.0,667242712.0,685112705.0,702976832.0,


In [68]:
# Si no quiero ver los números en notación científica:
pd.set_option('display.float_format', lambda x: "{:,.1f}".format(x))

In [74]:
# Limpiar el DF para quedarme solo con los datos de China y las columnas que me interesan.
china_pop_df = pobl_mund_df.copy()
china_pop_df = pobl_mund_df.loc[pobl_mund_df["Country Name"] == "China"]
china_pop_df = china_pop_df.drop("Unnamed: 66", axis = 1).drop("Country Code", axis= 1).drop("Indicator Code", axis = 1).drop("Indicator Name", axis = 1).drop("Country Name", axis = 1)
china_pop_df

Unnamed: 0,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
40,667070000.0,660330000.0,665770000.0,682335000.0,698355000.0,715185000.0,735400000.0,754550000.0,774510000.0,796025000.0,...,1354190000.0,1363240000.0,1371860000.0,1379860000.0,1387790000.0,1396215000.0,1402760000.0,1407745000.0,1411100000.0,1412360000.0


In [75]:
# Como ahora no serán demasiadas filas puedo quitar el límite de filas mostradas.
pd.set_option('display.max_rows', None)

In [76]:
# Aún hay que pasar el DF de formato ancho a largo.
china_pop_df_largo = china_pop_df.copy()
china_pop_df_largo = china_pop_df_largo.melt(var_name= "Year", value_name= "Poblacion total")
china_pop_df_largo


Unnamed: 0,Year,Poblacion total
0,1960,667070000.0
1,1961,660330000.0
2,1962,665770000.0
3,1963,682335000.0
4,1964,698355000.0
5,1965,715185000.0
6,1966,735400000.0
7,1967,754550000.0
8,1968,774510000.0
9,1969,796025000.0


In [77]:
# Ahora este DF está listo para ser comprobado visualmente.
fig_pob_total = px.line(china_pop_df_largo, x="Year", y='Poblacion total', title="Evolución de la población")
fig_pob_total.show()


In [81]:
# Ahora queremos comprobar la tasa de variación interanual.
total_var_interanual_china = china_pop_df_largo.copy()
total_var_interanual_china["Var interanual"] = total_var_interanual_china["Poblacion total"].pct_change(periods = 1)*100
total_var_interanual_china

Unnamed: 0,Year,Poblacion total,Var interanual
0,1960,667070000.0,
1,1961,660330000.0,-1.0
2,1962,665770000.0,0.8
3,1963,682335000.0,2.5
4,1964,698355000.0,2.3
5,1965,715185000.0,2.4
6,1966,735400000.0,2.8
7,1967,754550000.0,2.6
8,1968,774510000.0,2.6
9,1969,796025000.0,2.8


In [82]:
# El valor NaN de 1960 lo rellenaré con valor 0.
total_var_interanual_china["Var interanual"] = total_var_interanual_china["Var interanual"].fillna(0)
total_var_interanual_china


Unnamed: 0,Year,Poblacion total,Var interanual
0,1960,667070000.0,0.0
1,1961,660330000.0,-1.0
2,1962,665770000.0,0.8
3,1963,682335000.0,2.5
4,1964,698355000.0,2.3
5,1965,715185000.0,2.4
6,1966,735400000.0,2.8
7,1967,754550000.0,2.6
8,1968,774510000.0,2.6
9,1969,796025000.0,2.8
