In [1]:
import numpy as np
import pandas as pd
import datetime as dt
import seaborn as sns
#import missingno as msno

# matplotlib
from matplotlib import pyplot as plt
from matplotlib.dates import date2num, num2date
from matplotlib import dates as mdates
from matplotlib import ticker
from matplotlib.colors import ListedColormap
from matplotlib.patches import Patch

# scipy specifics
from scipy import stats as sps
from scipy.interpolate import interp1d

In [21]:
try:
    from urllib.request import Request, urlopen  # Python 3
except ImportError:
    from urllib2 import Request, urlopen  # Python 2

req = Request('https://cloud.minsa.gob.pe/s/Y8w3wHsEdYQSZRp/download')
req.add_header('User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:77.0) Gecko/20100101 Firefox/77.0')
content = urlopen(req)

test = pd.read_csv(content , sep=";", na_values='EN INVESTIGACIÓN')
print(test)

         FECHA_CORTE                              UUID DEPARTAMENTO  \
0           20210703  7320cabdc1aaca6c59014cae76a134e6   LAMBAYEQUE   
1           20210703  cecdbf10074dbc011ae05b3cbd320a6f         LIMA   
2           20210703  71ecb6bccb248b0bb2ac72ed51b5e979         LIMA   
3           20210703  566af4276cbe9359abe93f9aa86396c3         LIMA   
4           20210703  027561e9d126e7c283d79c02cede562d         LIMA   
...              ...                               ...          ...   
2065108     20210703  0295e3f769c42a483a4567aa384dd18c         LIMA   
2065109     20210703  a7d0f2742d66fa2653d2542ec53b5adc     AREQUIPA   
2065110     20210703  efaf0e5497b15883203fb4214c0ccf1b   LAMBAYEQUE   
2065111     20210703  dac870c3531fdeaf891db7c8c88527f8  LA LIBERTAD   
2065112     20210703  c0fb1b2eb3c9225b3ede3db6677c5c02         LIMA   

         PROVINCIA    DISTRITO METODODX  EDAD       SEXO  FECHA_RESULTADO  
0        FERREÑAFE   FERREÑAFE       PR  35.0   FEMENINO       20200329

In [22]:
test['FECHA_RESULTADO'] = pd.to_datetime(test['FECHA_RESULTADO'].astype(str), format='%Y%m%d')
test

Unnamed: 0,FECHA_CORTE,UUID,DEPARTAMENTO,PROVINCIA,DISTRITO,METODODX,EDAD,SEXO,FECHA_RESULTADO
0,20210703,7320cabdc1aaca6c59014cae76a134e6,LAMBAYEQUE,FERREÑAFE,FERREÑAFE,PR,35.0,FEMENINO,2020-03-29
1,20210703,cecdbf10074dbc011ae05b3cbd320a6f,LIMA,LIMA,CHORRILLOS,PR,36.0,MASCULINO,2020-03-30
2,20210703,71ecb6bccb248b0bb2ac72ed51b5e979,LIMA,LIMA,LIMA,PR,1.0,FEMENINO,2020-03-30
3,20210703,566af4276cbe9359abe93f9aa86396c3,LIMA,LIMA,LIMA,PR,65.0,FEMENINO,2020-03-30
4,20210703,027561e9d126e7c283d79c02cede562d,LIMA,LIMA,LIMA,PR,32.0,FEMENINO,2020-03-30
...,...,...,...,...,...,...,...,...,...
2065108,20210703,0295e3f769c42a483a4567aa384dd18c,LIMA,LIMA,LIMA,PCR,67.0,MASCULINO,2021-05-29
2065109,20210703,a7d0f2742d66fa2653d2542ec53b5adc,AREQUIPA,AREQUIPA,CAYMA,PCR,48.0,MASCULINO,2021-05-29
2065110,20210703,efaf0e5497b15883203fb4214c0ccf1b,LAMBAYEQUE,CHICLAYO,CHICLAYO,PCR,58.0,MASCULINO,2021-05-29
2065111,20210703,dac870c3531fdeaf891db7c8c88527f8,LA LIBERTAD,TRUJILLO,TRUJILLO,PCR,46.0,MASCULINO,2021-05-28


In [23]:
test.isnull().sum()

FECHA_CORTE             0
UUID                    0
DEPARTAMENTO            0
PROVINCIA          105843
DISTRITO           105843
METODODX                0
EDAD                  344
SEXO                    1
FECHA_RESULTADO      2023
dtype: int64

In [24]:
poblacion_csv = pd.read_csv('poblacion_provincia.csv')
poblacion_csv

Unnamed: 0,UBIGEO,PROVINCIA,POBLACION
0,10100,CHACHAPOYAS,63188
1,10200,BAGUA,84672
2,10300,BONGARA,26830
3,10400,CONDORCANQUI,51344
4,10500,LUYA,47827
...,...,...,...
191,240300,ZARUMILLA,56038
192,250100,CORONEL PORTILLO,447733
193,250200,ATALAYA,61049
194,250300,PADRE ABAD,77044


In [25]:
poblacion_dict = poblacion_csv.to_dict('split')
poblacion_dict['data']

[[10100, 'CHACHAPOYAS', 63188],
 [10200, 'BAGUA', 84672],
 [10300, 'BONGARA', 26830],
 [10400, 'CONDORCANQUI', 51344],
 [10500, 'LUYA', 47827],
 [10600, 'RODRIGUEZ DE MENDOZA', 33651],
 [10700, 'UTCUBAMBA', 119294],
 [20100, 'HUARAZ', 185276],
 [20200, 'AIJA', 6433],
 [20300, 'ANTONIO RAYMONDI', 13950],
 [20400, 'ASUNCION', 7710],
 [20500, 'BOLOGNESI', 24012],
 [20600, 'CARHUAZ', 50007],
 [20700, 'CARLOS FERMIN FITZCARRALD', 18496],
 [20800, 'CASMA', 57256],
 [20900, 'CORONGO', 8017],
 [21000, 'HUARI', 63264],
 [21100, 'HUARMEY', 33066],
 [21200, 'HUAYLAS', 56557],
 [21300, 'MARISCAL LUZURIAGA', 21787],
 [21400, 'OCROS', 7224],
 [21500, 'PALLASCA', 24371],
 [21600, 'POMABAMBA', 26675],
 [21700, 'RECUAY', 18085],
 [21800, 'SANTA', 474053],
 [21900, 'SIHUAS', 28630],
 [22000, 'YUNGAY', 55769],
 [30100, 'ABANCAY', 120116],
 [30200, 'ANDAHUAYLAS', 150758],
 [30300, 'ANTABAMBA', 11781],
 [30400, 'AYMARAES', 24570],
 [30500, 'COTABAMBAS', 55208],
 [30600, 'CHINCHEROS', 46544],
 [30700, 'GRAU

In [26]:
poblacion =[]
for x in range(len(poblacion_dict['data'])):
    array = poblacion_dict['data'][x][2]
    poblacion.append(array)
    
poblacion

[63188,
 84672,
 26830,
 51344,
 47827,
 33651,
 119294,
 185276,
 6433,
 13950,
 7710,
 24012,
 50007,
 18496,
 57256,
 8017,
 63264,
 33066,
 56557,
 21787,
 7224,
 24371,
 26675,
 18085,
 474053,
 28630,
 55769,
 120116,
 150758,
 11781,
 24570,
 55208,
 46544,
 21759,
 1175765,
 61708,
 43690,
 34743,
 97458,
 16426,
 54851,
 12797,
 317801,
 32482,
 8341,
 97205,
 75277,
 51838,
 29139,
 9909,
 9292,
 19866,
 17063,
 388170,
 83167,
 83916,
 151714,
 29357,
 123948,
 83913,
 203724,
 145770,
 51678,
 47114,
 22638,
 38602,
 1129854,
 511019,
 24000,
 63131,
 71582,
 34754,
 106476,
 70143,
 62059,
 167910,
 26644,
 47579,
 101735,
 70043,
 121265,
 37503,
 53901,
 14588,
 33883,
 18182,
 85995,
 315799,
 53247,
 32427,
 16372,
 52095,
 138275,
 29160,
 50086,
 36987,
 17114,
 18705,
 445752,
 262110,
 78472,
 14832,
 174016,
 595183,
 59138,
 167385,
 88405,
 22757,
 239105,
 91849,
 40041,
 57604,
 1118724,
 123480,
 15982,
 86411,
 30987,
 85091,
 112970,
 85092,
 168670,
 55868

In [27]:
provincia =[]
for x in range(len(poblacion_dict['data'])):
    array = poblacion_dict['data'][x][1]
    provincia.append(array)
    
provincia

['CHACHAPOYAS',
 'BAGUA',
 'BONGARA',
 'CONDORCANQUI',
 'LUYA',
 'RODRIGUEZ DE MENDOZA',
 'UTCUBAMBA',
 'HUARAZ',
 'AIJA',
 'ANTONIO RAYMONDI',
 'ASUNCION',
 'BOLOGNESI',
 'CARHUAZ',
 'CARLOS FERMIN FITZCARRALD',
 'CASMA',
 'CORONGO',
 'HUARI',
 'HUARMEY',
 'HUAYLAS',
 'MARISCAL LUZURIAGA',
 'OCROS',
 'PALLASCA',
 'POMABAMBA',
 'RECUAY',
 'SANTA',
 'SIHUAS',
 'YUNGAY',
 'ABANCAY',
 'ANDAHUAYLAS',
 'ANTABAMBA',
 'AYMARAES',
 'COTABAMBAS',
 'CHINCHEROS',
 'GRAU',
 'AREQUIPA',
 'CAMANA',
 'CARAVELI',
 'CASTILLA',
 'CAYLLOMA',
 'CONDESUYOS',
 'ISLAY',
 'LA UNION',
 'HUAMANGA',
 'CANGALLO',
 'HUANCA SANCOS',
 'HUANTA',
 'LA MAR',
 'LUCANAS',
 'PARINACOCHAS',
 'PAUCAR DEL SARA SARA',
 'SUCRE',
 'VICTOR FAJARDO',
 'VILCAS HUAMAN',
 'CAJAMARCA',
 'CAJABAMBA',
 'CELENDIN',
 'CHOTA',
 'CONTUMAZA',
 'CUTERVO',
 'HUALGAYOC',
 'JAEN',
 'SAN IGNACIO',
 'SAN MARCOS',
 'SAN MIGUEL',
 'SAN PABLO',
 'SANTA CRUZ',
 'CALLAO',
 'CUSCO',
 'ACOMAYO',
 'ANTA',
 'CALCA',
 'CANAS',
 'CANCHIS',
 'CHUMBIVILCAS',


In [28]:
res = {provincia[i]: poblacion[i] for i in range(len(provincia))}
res

{'CHACHAPOYAS': 63188,
 'BAGUA': 84672,
 'BONGARA': 26830,
 'CONDORCANQUI': 51344,
 'LUYA': 47827,
 'RODRIGUEZ DE MENDOZA': 33651,
 'UTCUBAMBA': 119294,
 'HUARAZ': 185276,
 'AIJA': 6433,
 'ANTONIO RAYMONDI': 13950,
 'ASUNCION': 7710,
 'BOLOGNESI': 24012,
 'CARHUAZ': 50007,
 'CARLOS FERMIN FITZCARRALD': 18496,
 'CASMA': 57256,
 'CORONGO': 8017,
 'HUARI': 63264,
 'HUARMEY': 33066,
 'HUAYLAS': 56557,
 'MARISCAL LUZURIAGA': 21787,
 'OCROS': 7224,
 'PALLASCA': 24371,
 'POMABAMBA': 26675,
 'RECUAY': 18085,
 'SANTA': 474053,
 'SIHUAS': 28630,
 'YUNGAY': 55769,
 'ABANCAY': 120116,
 'ANDAHUAYLAS': 150758,
 'ANTABAMBA': 11781,
 'AYMARAES': 24570,
 'COTABAMBAS': 55208,
 'CHINCHEROS': 46544,
 'GRAU': 21759,
 'AREQUIPA': 1175765,
 'CAMANA': 61708,
 'CARAVELI': 43690,
 'CASTILLA': 34743,
 'CAYLLOMA': 97458,
 'CONDESUYOS': 16426,
 'ISLAY': 54851,
 'LA UNION': 12797,
 'HUAMANGA': 317801,
 'CANGALLO': 32482,
 'HUANCA SANCOS': 8341,
 'HUANTA': 97205,
 'LA MAR': 75277,
 'LUCANAS': 51838,
 'PARINACOCHAS':

In [29]:
test['POBLACION'] = test['PROVINCIA'].map(res)

In [30]:
null_columns = test.columns[test.isnull().any()]
test[test["SEXO"].isnull()][null_columns]

Unnamed: 0,PROVINCIA,DISTRITO,EDAD,SEXO,FECHA_RESULTADO,POBLACION
1611478,LIMA,LIMA,0.0,,2021-03-13,9674755.0


In [31]:
find_provincia = test['PROVINCIA'] == 'LIMA'
find_distrito = test['DISTRITO'] == 'LIMA'
find_fecha = test['FECHA_RESULTADO'] == '2021-03-13'

data_exploratoria = test[find_provincia & find_distrito & find_fecha]
data_exploratoria

Unnamed: 0,FECHA_CORTE,UUID,DEPARTAMENTO,PROVINCIA,DISTRITO,METODODX,EDAD,SEXO,FECHA_RESULTADO,POBLACION
1715,20210703,80add061b48b602a09bf21af5d35982a,LIMA,LIMA,LIMA,PR,25.0,FEMENINO,2021-03-13,9674755.0
702544,20210703,85487d33b0ecd925f6bc9aabc0ed9f8f,LIMA,LIMA,LIMA,PR,29.0,FEMENINO,2021-03-13,9674755.0
742643,20210703,86930b986640153a658ab64b157c200c,LIMA,LIMA,LIMA,PR,35.0,MASCULINO,2021-03-13,9674755.0
745736,20210703,fd1fbc489f3f9e7021aed7061cb1250d,LIMA,LIMA,LIMA,PR,49.0,FEMENINO,2021-03-13,9674755.0
753035,20210703,f01bb9bb39ce59059794607c7bbcc1f2,LIMA,LIMA,LIMA,PR,48.0,FEMENINO,2021-03-13,9674755.0
...,...,...,...,...,...,...,...,...,...,...
1614777,20210703,24d999c1b7b3a99b4e57071ba54ec864,LIMA,LIMA,LIMA,PCR,40.0,MASCULINO,2021-03-13,9674755.0
1617652,20210703,b80966b88d7aec0b760f4f42b976906e,LIMA,LIMA,LIMA,PCR,57.0,MASCULINO,2021-03-13,9674755.0
1658343,20210703,2b7c8df732137fda4523e76905795ba1,LIMA,LIMA,LIMA,PCR,36.0,MASCULINO,2021-03-13,9674755.0
1679161,20210703,9ec7274aa40590eed05b2ee513eef16f,LIMA,LIMA,LIMA,PCR,50.0,FEMENINO,2021-03-13,9674755.0


In [32]:
data_exploratoria.groupby("SEXO").count()

Unnamed: 0_level_0,FECHA_CORTE,UUID,DEPARTAMENTO,PROVINCIA,DISTRITO,METODODX,EDAD,FECHA_RESULTADO,POBLACION
SEXO,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
FEMENINO,88,88,88,88,88,88,88,88,88
MASCULINO,86,86,86,86,86,86,86,86,86


In [33]:
test['SEXO'] = test['SEXO'].fillna('FEMENINO')

In [34]:
null_columns = test.columns[test.isnull().any()]
test[test["PROVINCIA"].isnull()][null_columns]

Unnamed: 0,PROVINCIA,DISTRITO,EDAD,FECHA_RESULTADO,POBLACION
8,,,41.0,2020-03-30,
10,,,24.0,2020-03-30,
14,,,1.0,2020-03-30,
17,,,43.0,2020-03-30,
22,,,62.0,2020-03-30,
...,...,...,...,...,...
2065015,,,65.0,2021-05-28,
2065018,,,49.0,2021-05-29,
2065047,,,71.0,2021-05-29,
2065049,,,28.0,2021-05-29,


In [35]:
test['FECHA_RESULTADO'] = pd.to_datetime(test['FECHA_RESULTADO'], errors='coerce', dayfirst=True)
test['FECHA_RESULTADO']

0         2020-03-29
1         2020-03-30
2         2020-03-30
3         2020-03-30
4         2020-03-30
             ...    
2065108   2021-05-29
2065109   2021-05-29
2065110   2021-05-29
2065111   2021-05-28
2065112   2021-05-29
Name: FECHA_RESULTADO, Length: 2065113, dtype: datetime64[ns]

In [36]:
test['POBLACION_DIEZMIL'] = test['POBLACION']/10000

#test = test.drop(labels="POBLACION_CIENMIL", axis=1)
test

Unnamed: 0,FECHA_CORTE,UUID,DEPARTAMENTO,PROVINCIA,DISTRITO,METODODX,EDAD,SEXO,FECHA_RESULTADO,POBLACION,POBLACION_DIEZMIL
0,20210703,7320cabdc1aaca6c59014cae76a134e6,LAMBAYEQUE,FERREÑAFE,FERREÑAFE,PR,35.0,FEMENINO,2020-03-29,107241.0,10.7241
1,20210703,cecdbf10074dbc011ae05b3cbd320a6f,LIMA,LIMA,CHORRILLOS,PR,36.0,MASCULINO,2020-03-30,9674755.0,967.4755
2,20210703,71ecb6bccb248b0bb2ac72ed51b5e979,LIMA,LIMA,LIMA,PR,1.0,FEMENINO,2020-03-30,9674755.0,967.4755
3,20210703,566af4276cbe9359abe93f9aa86396c3,LIMA,LIMA,LIMA,PR,65.0,FEMENINO,2020-03-30,9674755.0,967.4755
4,20210703,027561e9d126e7c283d79c02cede562d,LIMA,LIMA,LIMA,PR,32.0,FEMENINO,2020-03-30,9674755.0,967.4755
...,...,...,...,...,...,...,...,...,...,...,...
2065108,20210703,0295e3f769c42a483a4567aa384dd18c,LIMA,LIMA,LIMA,PCR,67.0,MASCULINO,2021-05-29,9674755.0,967.4755
2065109,20210703,a7d0f2742d66fa2653d2542ec53b5adc,AREQUIPA,AREQUIPA,CAYMA,PCR,48.0,MASCULINO,2021-05-29,1175765.0,117.5765
2065110,20210703,efaf0e5497b15883203fb4214c0ccf1b,LAMBAYEQUE,CHICLAYO,CHICLAYO,PCR,58.0,MASCULINO,2021-05-29,862709.0,86.2709
2065111,20210703,dac870c3531fdeaf891db7c8c88527f8,LA LIBERTAD,TRUJILLO,TRUJILLO,PCR,46.0,MASCULINO,2021-05-28,1118724.0,111.8724


In [37]:
salidasxsemanas = test.sort_values(by = 'FECHA_RESULTADO')
start_date = "2021-06-21"
#filtro_extra = "LORETO"

after_start_date = salidasxsemanas["FECHA_RESULTADO"] >= start_date
#also_filtro = salidasxsemanas["PROVINCIA"] == filtro_extra
between_two_dates = after_start_date

filtered_dates = salidasxsemanas.loc[between_two_dates]

filtered_dates

Unnamed: 0,FECHA_CORTE,UUID,DEPARTAMENTO,PROVINCIA,DISTRITO,METODODX,EDAD,SEXO,FECHA_RESULTADO,POBLACION,POBLACION_DIEZMIL
1431836,20210703,6d84adae616af71b65c4dfff715df9c0,TACNA,TACNA,CORONEL GREGORIO ALBARRACIN L.,AG,37.0,MASCULINO,2021-06-21,346192.0,34.6192
1177002,20210703,0a0bf59a896dfb4e5f7e6e262926adec,CAJAMARCA,SAN IGNACIO,SAN IGNACIO,AG,46.0,MASCULINO,2021-06-21,145770.0,14.5770
2042015,20210703,fbb63a9768b18a72fd641ee5c70b516e,AREQUIPA,AREQUIPA,AREQUIPA,PCR,47.0,MASCULINO,2021-06-21,1175765.0,117.5765
893141,20210703,be0fb62ab6bda13f49848f41da71409d,SAN MARTIN,SAN MARTIN,MORALES,PR,47.0,FEMENINO,2021-06-21,218074.0,21.8074
1431837,20210703,d5d4e376979061734e11e17b10c0fbe7,TACNA,JORGE BASADRE,LOCUMBA,AG,38.0,MASCULINO,2021-06-21,12549.0,1.2549
...,...,...,...,...,...,...,...,...,...,...,...
1437654,20210703,360dd9329137e4d7c528089e045f9a42,CUSCO,CANCHIS,SICUANI,AG,64.0,MASCULINO,2021-07-03,106476.0,10.6476
1437653,20210703,6c157776bed8b26e5d2297e6036a7b90,JUNIN,HUANCAYO,HUANCAYO,AG,31.0,MASCULINO,2021-07-03,595183.0,59.5183
1437650,20210703,ea7ae385e7c9c92495c8b990aeb99bcc,AREQUIPA,AREQUIPA,UCHUMAYO,AG,35.0,MASCULINO,2021-07-03,1175765.0,117.5765
978152,20210703,c4a01e5da8149e80f2341632be6722f9,LIMA,LIMA,LIMA,PR,29.0,MASCULINO,2021-07-03,9674755.0,967.4755


In [38]:
weekly_sales = filtered_dates.groupby(["SEXO","DEPARTAMENTO","PROVINCIA", "POBLACION", "POBLACION_DIEZMIL", pd.Grouper(key="FECHA_RESULTADO",freq="W-MON")]).size()
weekly_sales = weekly_sales.unstack(0).fillna(0)
weekly_sales.sort_values(by=['FECHA_RESULTADO'], inplace=True, ascending=True)
weekly_sales.loc[:,'TOTAL'] = weekly_sales.sum(numeric_only=True, axis=1)
weekly_sales = weekly_sales.reset_index()
weekly_sales

SEXO,DEPARTAMENTO,PROVINCIA,POBLACION,POBLACION_DIEZMIL,FECHA_RESULTADO,FEMENINO,MASCULINO,TOTAL
0,AMAZONAS,BAGUA,84672.0,8.4672,2021-06-21,7.0,7.0,14.0
1,LORETO,REQUENA,64459.0,6.4459,2021-06-21,6.0,0.0,6.0
2,AYACUCHO,SUCRE,9292.0,0.9292,2021-06-21,2.0,4.0,6.0
3,LORETO,MAYNAS,550551.0,55.0551,2021-06-21,38.0,25.0,63.0
4,AYACUCHO,VICTOR FAJARDO,19866.0,1.9866,2021-06-21,1.0,0.0,1.0
...,...,...,...,...,...,...,...,...
508,CAJAMARCA,CAJABAMBA,83167.0,8.3167,2021-07-05,4.0,8.0,12.0
509,LORETO,ALTO AMAZONAS,149892.0,14.9892,2021-07-05,4.0,2.0,6.0
510,AYACUCHO,VILCAS HUAMAN,17063.0,1.7063,2021-07-05,2.0,1.0,3.0
511,AREQUIPA,CARAVELI,43690.0,4.3690,2021-07-05,3.0,2.0,5.0


In [39]:
weekly_sales[['FEMENINO','MASCULINO', 'TOTAL']] = weekly_sales[['FEMENINO','MASCULINO', 'TOTAL']].div(weekly_sales['POBLACION_DIEZMIL'].values,axis=0)
weekly_sales

SEXO,DEPARTAMENTO,PROVINCIA,POBLACION,POBLACION_DIEZMIL,FECHA_RESULTADO,FEMENINO,MASCULINO,TOTAL
0,AMAZONAS,BAGUA,84672.0,8.4672,2021-06-21,0.826720,0.826720,1.653439
1,LORETO,REQUENA,64459.0,6.4459,2021-06-21,0.930824,0.000000,0.930824
2,AYACUCHO,SUCRE,9292.0,0.9292,2021-06-21,2.152389,4.304778,6.457167
3,LORETO,MAYNAS,550551.0,55.0551,2021-06-21,0.690218,0.454091,1.144308
4,AYACUCHO,VICTOR FAJARDO,19866.0,1.9866,2021-06-21,0.503373,0.000000,0.503373
...,...,...,...,...,...,...,...,...
508,CAJAMARCA,CAJABAMBA,83167.0,8.3167,2021-07-05,0.480960,0.961920,1.442880
509,LORETO,ALTO AMAZONAS,149892.0,14.9892,2021-07-05,0.266859,0.133429,0.400288
510,AYACUCHO,VILCAS HUAMAN,17063.0,1.7063,2021-07-05,1.172127,0.586063,1.758190
511,AREQUIPA,CARAVELI,43690.0,4.3690,2021-07-05,0.686656,0.457771,1.144427


In [40]:
weekly_sales.FEMENINO = weekly_sales.FEMENINO.round()
weekly_sales.MASCULINO = weekly_sales.MASCULINO.round()
weekly_sales.TOTAL = weekly_sales.FEMENINO + weekly_sales.MASCULINO
weekly_sales.FECHA_RESULTADO = weekly_sales.FECHA_RESULTADO.dt.strftime('%Y-%m-%d')
weekly_sales

SEXO,DEPARTAMENTO,PROVINCIA,POBLACION,POBLACION_DIEZMIL,FECHA_RESULTADO,FEMENINO,MASCULINO,TOTAL
0,AMAZONAS,BAGUA,84672.0,8.4672,2021-06-21,1.0,1.0,2.0
1,LORETO,REQUENA,64459.0,6.4459,2021-06-21,1.0,0.0,1.0
2,AYACUCHO,SUCRE,9292.0,0.9292,2021-06-21,2.0,4.0,6.0
3,LORETO,MAYNAS,550551.0,55.0551,2021-06-21,1.0,0.0,1.0
4,AYACUCHO,VICTOR FAJARDO,19866.0,1.9866,2021-06-21,1.0,0.0,1.0
...,...,...,...,...,...,...,...,...
508,CAJAMARCA,CAJABAMBA,83167.0,8.3167,2021-07-05,0.0,1.0,1.0
509,LORETO,ALTO AMAZONAS,149892.0,14.9892,2021-07-05,0.0,0.0,0.0
510,AYACUCHO,VILCAS HUAMAN,17063.0,1.7063,2021-07-05,1.0,1.0,2.0
511,AREQUIPA,CARAVELI,43690.0,4.3690,2021-07-05,1.0,0.0,1.0


In [41]:
weekly_sales.to_csv('dataset_covid_total.csv' , index=False)

In [42]:
weekly_sales.to_json('dataset_covid_total.json', orient="table")