In [1]:
import numpy as np
import pandas as pd
import datetime as dt
import seaborn as sns
import urllib.request
from io import StringIO
import requests
#import missingno as msno

# matplotlib
from matplotlib import pyplot as plt
from matplotlib.dates import date2num, num2date
from matplotlib import dates as mdates
from matplotlib import ticker
from matplotlib.colors import ListedColormap
from matplotlib.patches import Patch

# scipy specifics
from scipy import stats as sps
from scipy.interpolate import interp1d

In [2]:
try:
    from urllib.request import Request, urlopen  # Python 3
except ImportError:
    from urllib2 import Request, urlopen  # Python 2

req = Request('https://cloud.minsa.gob.pe/s/AC2adyLkHCKjmfm/download')
req.add_header('User-Agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:66.0) Gecko/20100101 Firefox/66.0')
content = urlopen(req)

test = pd.read_csv(content, sep=";", na_values='EN INVESTIGACIÓN', encoding='utf-8')
test

HTTPError: HTTP Error 503: Service Temporarily Unavailable

In [3]:
content= "positivos_covid.csv"

test = pd.read_csv(content, sep=";", na_values='EN INVESTIGACIÓN', encoding='utf-8')
test

Unnamed: 0,FECHA_CORTE,DEPARTAMENTO,PROVINCIA,DISTRITO,METODODX,EDAD,SEXO,FECHA_RESULTADO,UBIGEO,id_persona
0,20220313,JUNIN,HUANCAYO,EL TAMBO,AG,68.0,FEMENINO,20210414.0,120114.0,24615058.0
1,20220313,LIMA,LIMA,SAN JUAN DE MIRAFLORES,AG,21.0,MASCULINO,20210413.0,150133.0,24731649.0
2,20220313,AREQUIPA,AREQUIPA,PAUCARPATA,AG,27.0,MASCULINO,20220222.0,40112.0,24731664.0
3,20220313,PIURA,PIURA,CASTILLA,AG,20.0,MASCULINO,20220114.0,200104.0,24731665.0
4,20220313,ANCASH,SANTA,CHIMBOTE,PR,23.0,MASCULINO,20200704.0,21801.0,24856166.0
...,...,...,...,...,...,...,...,...,...,...
3536836,20220313,LA LIBERTAD,TRUJILLO,TRUJILLO,AG,54.0,FEMENINO,20210325.0,130101.0,
3536837,20220313,LIMA,LIMA,JESUS MARIA,PCR,18.0,FEMENINO,20210506.0,150113.0,
3536838,20220313,UCAYALI,CORONEL PORTILLO,YARINACOCHA,PCR,36.0,FEMENINO,20210122.0,250105.0,
3536839,20220313,CUSCO,ESPINAR,ESPINAR,AG,18.0,MASCULINO,20220112.0,80801.0,


In [4]:
test['FECHA_RESULTADO'] = pd.to_datetime(test['FECHA_RESULTADO'].astype(str), format='%Y%m%d')
test.tail()

Unnamed: 0,FECHA_CORTE,DEPARTAMENTO,PROVINCIA,DISTRITO,METODODX,EDAD,SEXO,FECHA_RESULTADO,UBIGEO,id_persona
3536836,20220313,LA LIBERTAD,TRUJILLO,TRUJILLO,AG,54.0,FEMENINO,2021-03-25,130101.0,
3536837,20220313,LIMA,LIMA,JESUS MARIA,PCR,18.0,FEMENINO,2021-05-06,150113.0,
3536838,20220313,UCAYALI,CORONEL PORTILLO,YARINACOCHA,PCR,36.0,FEMENINO,2021-01-22,250105.0,
3536839,20220313,CUSCO,ESPINAR,ESPINAR,AG,18.0,MASCULINO,2022-01-12,80801.0,
3536840,20220313,CAJAMARCA,JAEN,JAEN,AG,0.0,FEMENINO,2022-01-15,60801.0,


In [5]:
test.isnull().sum()

FECHA_CORTE             0
DEPARTAMENTO          321
PROVINCIA          168322
DISTRITO           168322
METODODX                0
EDAD                  347
SEXO                    1
FECHA_RESULTADO      2023
UBIGEO             168322
id_persona          67322
dtype: int64

In [6]:
indice_departamento = pd.read_csv('https://raw.githubusercontent.com/annaabsi/git-scraper-covid19/main/resultados/positivos_por_departamentos.csv')
indice_departamento

Unnamed: 0,DEPARTAMENTO,METODODX,POBLACION,INDICE
0,AMAZONAS,43809,426806,10264
1,ANCASH,125111,1180638,10597
2,APURIMAC,38705,430736,8986
3,AREQUIPA,211916,1497438,14152
4,AYACUCHO,47964,668213,7178
5,CAJAMARCA,95504,1453711,6570
6,CALLAO,146220,1129854,12941
7,CUSCO,116378,1357075,8576
8,HUANCAVELICA,25562,365317,6997
9,HUANUCO,51921,760267,6829


In [7]:
test = pd.merge(test, indice_departamento,
                        how="left", on=["DEPARTAMENTO"])
test

Unnamed: 0,FECHA_CORTE,DEPARTAMENTO,PROVINCIA,DISTRITO,METODODX_x,EDAD,SEXO,FECHA_RESULTADO,UBIGEO,id_persona,METODODX_y,POBLACION,INDICE
0,20220313,JUNIN,HUANCAYO,EL TAMBO,AG,68.0,FEMENINO,2021-04-14,120114.0,24615058.0,125344.0,1361467.0,9207.0
1,20220313,LIMA,LIMA,SAN JUAN DE MIRAFLORES,AG,21.0,MASCULINO,2021-04-13,150133.0,24731649.0,1601151.0,10628470.0,15065.0
2,20220313,AREQUIPA,AREQUIPA,PAUCARPATA,AG,27.0,MASCULINO,2022-02-22,40112.0,24731664.0,211916.0,1497438.0,14152.0
3,20220313,PIURA,PIURA,CASTILLA,AG,20.0,MASCULINO,2022-01-14,200104.0,24731665.0,157327.0,2047954.0,7682.0
4,20220313,ANCASH,SANTA,CHIMBOTE,PR,23.0,MASCULINO,2020-07-04,21801.0,24856166.0,125111.0,1180638.0,10597.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3536836,20220313,LA LIBERTAD,TRUJILLO,TRUJILLO,AG,54.0,FEMENINO,2021-03-25,130101.0,,146110.0,2016771.0,7245.0
3536837,20220313,LIMA,LIMA,JESUS MARIA,PCR,18.0,FEMENINO,2021-05-06,150113.0,,1601151.0,10628470.0,15065.0
3536838,20220313,UCAYALI,CORONEL PORTILLO,YARINACOCHA,PCR,36.0,FEMENINO,2021-01-22,250105.0,,39361.0,589110.0,6681.0
3536839,20220313,CUSCO,ESPINAR,ESPINAR,AG,18.0,MASCULINO,2022-01-12,80801.0,,116378.0,1357075.0,8576.0


In [8]:
poblacion_csv = pd.read_csv('poblacion_provincia.csv')
poblacion_csv

Unnamed: 0,UBIGEO,PROVINCIA,POBLACION
0,10100,CHACHAPOYAS,63188
1,10200,BAGUA,84672
2,10300,BONGARA,26830
3,10400,CONDORCANQUI,51344
4,10500,LUYA,47827
...,...,...,...
191,240300,ZARUMILLA,56038
192,250100,CORONEL PORTILLO,447733
193,250200,ATALAYA,61049
194,250300,PADRE ABAD,77044


In [9]:
poblacion_dict = poblacion_csv.to_dict('split')
poblacion_dict['data']

[[10100, 'CHACHAPOYAS', 63188],
 [10200, 'BAGUA', 84672],
 [10300, 'BONGARA', 26830],
 [10400, 'CONDORCANQUI', 51344],
 [10500, 'LUYA', 47827],
 [10600, 'RODRIGUEZ DE MENDOZA', 33651],
 [10700, 'UTCUBAMBA', 119294],
 [20100, 'HUARAZ', 185276],
 [20200, 'AIJA', 6433],
 [20300, 'ANTONIO RAYMONDI', 13950],
 [20400, 'ASUNCION', 7710],
 [20500, 'BOLOGNESI', 24012],
 [20600, 'CARHUAZ', 50007],
 [20700, 'CARLOS FERMIN FITZCARRALD', 18496],
 [20800, 'CASMA', 57256],
 [20900, 'CORONGO', 8017],
 [21000, 'HUARI', 63264],
 [21100, 'HUARMEY', 33066],
 [21200, 'HUAYLAS', 56557],
 [21300, 'MARISCAL LUZURIAGA', 21787],
 [21400, 'OCROS', 7224],
 [21500, 'PALLASCA', 24371],
 [21600, 'POMABAMBA', 26675],
 [21700, 'RECUAY', 18085],
 [21800, 'SANTA', 474053],
 [21900, 'SIHUAS', 28630],
 [22000, 'YUNGAY', 55769],
 [30100, 'ABANCAY', 120116],
 [30200, 'ANDAHUAYLAS', 150758],
 [30300, 'ANTABAMBA', 11781],
 [30400, 'AYMARAES', 24570],
 [30500, 'COTABAMBAS', 55208],
 [30600, 'CHINCHEROS', 46544],
 [30700, 'GRAU

In [10]:
poblacion =[]
for x in range(len(poblacion_dict['data'])):
    array = poblacion_dict['data'][x][2]
    poblacion.append(array)
    
poblacion

[63188,
 84672,
 26830,
 51344,
 47827,
 33651,
 119294,
 185276,
 6433,
 13950,
 7710,
 24012,
 50007,
 18496,
 57256,
 8017,
 63264,
 33066,
 56557,
 21787,
 7224,
 24371,
 26675,
 18085,
 474053,
 28630,
 55769,
 120116,
 150758,
 11781,
 24570,
 55208,
 46544,
 21759,
 1175765,
 61708,
 43690,
 34743,
 97458,
 16426,
 54851,
 12797,
 317801,
 32482,
 8341,
 97205,
 75277,
 51838,
 29139,
 9909,
 9292,
 19866,
 17063,
 388170,
 83167,
 83916,
 151714,
 29357,
 123948,
 83913,
 203724,
 145770,
 51678,
 47114,
 22638,
 38602,
 1129854,
 511019,
 24000,
 63131,
 71582,
 34754,
 106476,
 70143,
 62059,
 167910,
 26644,
 47579,
 101735,
 70043,
 121265,
 37503,
 53901,
 14588,
 33883,
 18182,
 85995,
 315799,
 53247,
 32427,
 16372,
 52095,
 138275,
 29160,
 50086,
 36987,
 17114,
 18705,
 445752,
 262110,
 78472,
 14832,
 174016,
 595183,
 59138,
 167385,
 88405,
 22757,
 239105,
 91849,
 40041,
 57604,
 1118724,
 123480,
 15982,
 86411,
 30987,
 85091,
 112970,
 85092,
 168670,
 55868

In [11]:
provincia =[]
for x in range(len(poblacion_dict['data'])):
    array = poblacion_dict['data'][x][1]
    provincia.append(array)
    
provincia

['CHACHAPOYAS',
 'BAGUA',
 'BONGARA',
 'CONDORCANQUI',
 'LUYA',
 'RODRIGUEZ DE MENDOZA',
 'UTCUBAMBA',
 'HUARAZ',
 'AIJA',
 'ANTONIO RAYMONDI',
 'ASUNCION',
 'BOLOGNESI',
 'CARHUAZ',
 'CARLOS FERMIN FITZCARRALD',
 'CASMA',
 'CORONGO',
 'HUARI',
 'HUARMEY',
 'HUAYLAS',
 'MARISCAL LUZURIAGA',
 'OCROS',
 'PALLASCA',
 'POMABAMBA',
 'RECUAY',
 'SANTA',
 'SIHUAS',
 'YUNGAY',
 'ABANCAY',
 'ANDAHUAYLAS',
 'ANTABAMBA',
 'AYMARAES',
 'COTABAMBAS',
 'CHINCHEROS',
 'GRAU',
 'AREQUIPA',
 'CAMANA',
 'CARAVELI',
 'CASTILLA',
 'CAYLLOMA',
 'CONDESUYOS',
 'ISLAY',
 'LA UNION',
 'HUAMANGA',
 'CANGALLO',
 'HUANCA SANCOS',
 'HUANTA',
 'LA MAR',
 'LUCANAS',
 'PARINACOCHAS',
 'PAUCAR DEL SARA SARA',
 'SUCRE',
 'VICTOR FAJARDO',
 'VILCAS HUAMAN',
 'CAJAMARCA',
 'CAJABAMBA',
 'CELENDIN',
 'CHOTA',
 'CONTUMAZA',
 'CUTERVO',
 'HUALGAYOC',
 'JAEN',
 'SAN IGNACIO',
 'SAN MARCOS',
 'SAN MIGUEL',
 'SAN PABLO',
 'SANTA CRUZ',
 'CALLAO',
 'CUSCO',
 'ACOMAYO',
 'ANTA',
 'CALCA',
 'CANAS',
 'CANCHIS',
 'CHUMBIVILCAS',


In [12]:
res = {provincia[i]: poblacion[i] for i in range(len(provincia))}
res

{'CHACHAPOYAS': 63188,
 'BAGUA': 84672,
 'BONGARA': 26830,
 'CONDORCANQUI': 51344,
 'LUYA': 47827,
 'RODRIGUEZ DE MENDOZA': 33651,
 'UTCUBAMBA': 119294,
 'HUARAZ': 185276,
 'AIJA': 6433,
 'ANTONIO RAYMONDI': 13950,
 'ASUNCION': 7710,
 'BOLOGNESI': 24012,
 'CARHUAZ': 50007,
 'CARLOS FERMIN FITZCARRALD': 18496,
 'CASMA': 57256,
 'CORONGO': 8017,
 'HUARI': 63264,
 'HUARMEY': 33066,
 'HUAYLAS': 56557,
 'MARISCAL LUZURIAGA': 21787,
 'OCROS': 7224,
 'PALLASCA': 24371,
 'POMABAMBA': 26675,
 'RECUAY': 18085,
 'SANTA': 474053,
 'SIHUAS': 28630,
 'YUNGAY': 55769,
 'ABANCAY': 120116,
 'ANDAHUAYLAS': 150758,
 'ANTABAMBA': 11781,
 'AYMARAES': 24570,
 'COTABAMBAS': 55208,
 'CHINCHEROS': 46544,
 'GRAU': 21759,
 'AREQUIPA': 1175765,
 'CAMANA': 61708,
 'CARAVELI': 43690,
 'CASTILLA': 34743,
 'CAYLLOMA': 97458,
 'CONDESUYOS': 16426,
 'ISLAY': 54851,
 'LA UNION': 12797,
 'HUAMANGA': 317801,
 'CANGALLO': 32482,
 'HUANCA SANCOS': 8341,
 'HUANTA': 97205,
 'LA MAR': 75277,
 'LUCANAS': 51838,
 'PARINACOCHAS':

In [13]:
test['POBLACION'] = test['PROVINCIA'].map(res)

In [14]:
null_columns = test.columns[test.isnull().any()]
test[test["SEXO"].isnull()][null_columns]

Unnamed: 0,DEPARTAMENTO,PROVINCIA,DISTRITO,EDAD,SEXO,FECHA_RESULTADO,UBIGEO,id_persona,METODODX_y,POBLACION,INDICE
2620339,LIMA,LIMA,LIMA,0.0,,2021-03-13,150101.0,10881464.0,1601151.0,9674755.0,15065.0


In [15]:
#find_provincia = test['PROVINCIA'] == 'UCAYALI'
find_distrito = test['DEPARTAMENTO'] == 'LORETO'
find_fecha = test['FECHA_RESULTADO'] == '2022-01-02'

data_exploratoria = test[find_distrito & find_fecha]
data_exploratoria

Unnamed: 0,FECHA_CORTE,DEPARTAMENTO,PROVINCIA,DISTRITO,METODODX_x,EDAD,SEXO,FECHA_RESULTADO,UBIGEO,id_persona,METODODX_y,POBLACION,INDICE
120448,20220313,LORETO,MAYNAS,PUNCHANA,AG,27.0,MASCULINO,2022-01-02,160108.0,19990922.0,57775.0,550551.0,5623.0
184588,20220313,LORETO,MAYNAS,SAN JUAN BAUTISTA,PR,32.0,FEMENINO,2022-01-02,160113.0,20925980.0,57775.0,550551.0,5623.0
308714,20220313,LORETO,MAYNAS,PUNCHANA,PR,31.0,MASCULINO,2022-01-02,160108.0,22425039.0,57775.0,550551.0,5623.0
1124010,20220313,LORETO,MAYNAS,IQUITOS,AG,24.0,MASCULINO,2022-01-02,160101.0,34702609.0,57775.0,550551.0,5623.0
1751357,20220313,LORETO,MAYNAS,IQUITOS,AG,69.0,FEMENINO,2022-01-02,160101.0,3428514.0,57775.0,550551.0,5623.0


In [16]:
data_exploratoria.groupby("SEXO").count()

Unnamed: 0_level_0,FECHA_CORTE,DEPARTAMENTO,PROVINCIA,DISTRITO,METODODX_x,EDAD,FECHA_RESULTADO,UBIGEO,id_persona,METODODX_y,POBLACION,INDICE
SEXO,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
FEMENINO,2,2,2,2,2,2,2,2,2,2,2,2
MASCULINO,3,3,3,3,3,3,3,3,3,3,3,3


In [17]:
test['SEXO'] = test['SEXO'].fillna('FEMENINO')

In [18]:
null_columns = test.columns[test.isnull().any()]
test[test["PROVINCIA"].isnull()][null_columns]

Unnamed: 0,DEPARTAMENTO,PROVINCIA,DISTRITO,EDAD,FECHA_RESULTADO,UBIGEO,id_persona,METODODX_y,POBLACION,INDICE
21,LIMA,,,54.0,2020-11-21,,24615020.0,1601151.0,,15065.0
29,LIMA,,,39.0,2020-07-24,,24833739.0,1601151.0,,15065.0
36,LIMA,,,28.0,2022-01-21,,24833787.0,1601151.0,,15065.0
49,ICA,,,22.0,2022-01-15,,24662193.0,103263.0,,10589.0
76,LIMA,,,27.0,2021-02-26,,24769312.0,1601151.0,,15065.0
...,...,...,...,...,...,...,...,...,...,...
3536703,LIMA,,,28.0,2022-01-27,,,1601151.0,,15065.0
3536704,LIMA,,,22.0,2022-01-23,,,1601151.0,,15065.0
3536705,LIMA,,,41.0,2022-01-25,,,1601151.0,,15065.0
3536706,LIMA,,,30.0,2022-01-24,,,1601151.0,,15065.0


In [19]:
test['FECHA_RESULTADO'] = pd.to_datetime(test['FECHA_RESULTADO'], errors='coerce', dayfirst=True)
test['FECHA_RESULTADO']

0         2021-04-14
1         2021-04-13
2         2022-02-22
3         2022-01-14
4         2020-07-04
             ...    
3536836   2021-03-25
3536837   2021-05-06
3536838   2021-01-22
3536839   2022-01-12
3536840   2022-01-15
Name: FECHA_RESULTADO, Length: 3536841, dtype: datetime64[ns]

In [20]:
test['POBLACION_CIENMIL'] = test['POBLACION']/100000

#test = test.drop(labels="POBLACION_CIENMIL", axis=1)
test

Unnamed: 0,FECHA_CORTE,DEPARTAMENTO,PROVINCIA,DISTRITO,METODODX_x,EDAD,SEXO,FECHA_RESULTADO,UBIGEO,id_persona,METODODX_y,POBLACION,INDICE,POBLACION_CIENMIL
0,20220313,JUNIN,HUANCAYO,EL TAMBO,AG,68.0,FEMENINO,2021-04-14,120114.0,24615058.0,125344.0,595183.0,9207.0,5.95183
1,20220313,LIMA,LIMA,SAN JUAN DE MIRAFLORES,AG,21.0,MASCULINO,2021-04-13,150133.0,24731649.0,1601151.0,9674755.0,15065.0,96.74755
2,20220313,AREQUIPA,AREQUIPA,PAUCARPATA,AG,27.0,MASCULINO,2022-02-22,40112.0,24731664.0,211916.0,1175765.0,14152.0,11.75765
3,20220313,PIURA,PIURA,CASTILLA,AG,20.0,MASCULINO,2022-01-14,200104.0,24731665.0,157327.0,894847.0,7682.0,8.94847
4,20220313,ANCASH,SANTA,CHIMBOTE,PR,23.0,MASCULINO,2020-07-04,21801.0,24856166.0,125111.0,474053.0,10597.0,4.74053
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3536836,20220313,LA LIBERTAD,TRUJILLO,TRUJILLO,AG,54.0,FEMENINO,2021-03-25,130101.0,,146110.0,1118724.0,7245.0,11.18724
3536837,20220313,LIMA,LIMA,JESUS MARIA,PCR,18.0,FEMENINO,2021-05-06,150113.0,,1601151.0,9674755.0,15065.0,96.74755
3536838,20220313,UCAYALI,CORONEL PORTILLO,YARINACOCHA,PCR,36.0,FEMENINO,2021-01-22,250105.0,,39361.0,447733.0,6681.0,4.47733
3536839,20220313,CUSCO,ESPINAR,ESPINAR,AG,18.0,MASCULINO,2022-01-12,80801.0,,116378.0,62059.0,8576.0,0.62059


In [21]:
salidasxsemanas = test.sort_values(by = 'FECHA_RESULTADO')
start_date = "2022-01-23"
end_date = "2022-03-13"

after_start_date = salidasxsemanas["FECHA_RESULTADO"] >= start_date
before_end_date = salidasxsemanas["FECHA_RESULTADO"] <= end_date
between_two_dates = after_start_date & before_end_date

filtered_dates = salidasxsemanas.loc[between_two_dates]

filtered_dates

Unnamed: 0,FECHA_CORTE,DEPARTAMENTO,PROVINCIA,DISTRITO,METODODX_x,EDAD,SEXO,FECHA_RESULTADO,UBIGEO,id_persona,METODODX_y,POBLACION,INDICE,POBLACION_CIENMIL
2541748,20220313,AREQUIPA,AREQUIPA,PAUCARPATA,PCR,31.0,MASCULINO,2022-01-23,40112.0,10323944.0,211916.0,1175765.0,14152.0,11.75765
206956,20220313,LIMA,LIMA,MAGDALENA DEL MAR,PCR,26.0,MASCULINO,2022-01-23,150120.0,21010258.0,1601151.0,9674755.0,15065.0,96.74755
2807410,20220313,PUNO,HUANCANE,HUANCANE,AG,70.0,MASCULINO,2022-01-23,210601.0,12826161.0,65242.0,58957.0,5270.0,0.58957
3312428,20220313,LIMA,LIMA,VILLA EL SALVADOR,AG,20.0,MASCULINO,2022-01-23,150142.0,17963183.0,1601151.0,9674755.0,15065.0,96.74755
407947,20220313,LIMA,LIMA,LIMA,PCR,16.0,MASCULINO,2022-01-23,150101.0,23499201.0,1601151.0,9674755.0,15065.0,96.74755
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1102368,20220313,LA LIBERTAD,TRUJILLO,TRUJILLO,PCR,30.0,MASCULINO,2022-03-13,130101.0,34556969.0,146110.0,1118724.0,7245.0,11.18724
697856,20220313,ANCASH,SIHUAS,SIHUAS,AG,27.0,FEMENINO,2022-03-13,21901.0,27564150.0,125111.0,28630.0,10597.0,0.28630
2204237,20220313,LIMA,BARRANCA,SUPE,AG,11.0,FEMENINO,2022-03-13,150204.0,6577054.0,1601151.0,151095.0,15065.0,1.51095
214177,20220313,AREQUIPA,AREQUIPA,AREQUIPA,PCR,34.0,MASCULINO,2022-03-13,40101.0,21324390.0,211916.0,1175765.0,14152.0,11.75765


In [22]:
filtered_dates['PROVINCIA'] = filtered_dates['PROVINCIA'].replace('EN INVESTIGACIÃN', np.nan)
filtered_dates

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_dates['PROVINCIA'] = filtered_dates['PROVINCIA'].replace('EN INVESTIGACIÃN', np.nan)


Unnamed: 0,FECHA_CORTE,DEPARTAMENTO,PROVINCIA,DISTRITO,METODODX_x,EDAD,SEXO,FECHA_RESULTADO,UBIGEO,id_persona,METODODX_y,POBLACION,INDICE,POBLACION_CIENMIL
2541748,20220313,AREQUIPA,AREQUIPA,PAUCARPATA,PCR,31.0,MASCULINO,2022-01-23,40112.0,10323944.0,211916.0,1175765.0,14152.0,11.75765
206956,20220313,LIMA,LIMA,MAGDALENA DEL MAR,PCR,26.0,MASCULINO,2022-01-23,150120.0,21010258.0,1601151.0,9674755.0,15065.0,96.74755
2807410,20220313,PUNO,HUANCANE,HUANCANE,AG,70.0,MASCULINO,2022-01-23,210601.0,12826161.0,65242.0,58957.0,5270.0,0.58957
3312428,20220313,LIMA,LIMA,VILLA EL SALVADOR,AG,20.0,MASCULINO,2022-01-23,150142.0,17963183.0,1601151.0,9674755.0,15065.0,96.74755
407947,20220313,LIMA,LIMA,LIMA,PCR,16.0,MASCULINO,2022-01-23,150101.0,23499201.0,1601151.0,9674755.0,15065.0,96.74755
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1102368,20220313,LA LIBERTAD,TRUJILLO,TRUJILLO,PCR,30.0,MASCULINO,2022-03-13,130101.0,34556969.0,146110.0,1118724.0,7245.0,11.18724
697856,20220313,ANCASH,SIHUAS,SIHUAS,AG,27.0,FEMENINO,2022-03-13,21901.0,27564150.0,125111.0,28630.0,10597.0,0.28630
2204237,20220313,LIMA,BARRANCA,SUPE,AG,11.0,FEMENINO,2022-03-13,150204.0,6577054.0,1601151.0,151095.0,15065.0,1.51095
214177,20220313,AREQUIPA,AREQUIPA,AREQUIPA,PCR,34.0,MASCULINO,2022-03-13,40101.0,21324390.0,211916.0,1175765.0,14152.0,11.75765


In [23]:
filtered_dates.isnull().sum()

FECHA_CORTE              0
DEPARTAMENTO           320
PROVINCIA            21399
DISTRITO             21399
METODODX_x               0
EDAD                     0
SEXO                     0
FECHA_RESULTADO          0
UBIGEO               21399
id_persona           11285
METODODX_y             321
POBLACION            22258
INDICE                 321
POBLACION_CIENMIL    22258
dtype: int64

In [24]:
weekly_sales = filtered_dates.groupby(["SEXO","DEPARTAMENTO","PROVINCIA", "POBLACION", "POBLACION_CIENMIL", "INDICE", pd.Grouper(key="FECHA_RESULTADO",freq="W-SUN")]).size()
weekly_sales = weekly_sales.unstack(0).fillna(0)
weekly_sales.sort_values(by=['FECHA_RESULTADO'], inplace=True, ascending=True)
weekly_sales.loc[:,'TOTAL'] = weekly_sales.sum(numeric_only=True, axis=1)

weekly_sales = weekly_sales.reset_index()
weekly_sales

SEXO,DEPARTAMENTO,PROVINCIA,POBLACION,POBLACION_CIENMIL,INDICE,FECHA_RESULTADO,FEMENINO,MASCULINO,TOTAL
0,AMAZONAS,BAGUA,84672.0,0.84672,10264.0,2022-01-23,22.0,15.0,37.0
1,LA LIBERTAD,GRAN CHIMU,28290.0,0.28290,7245.0,2022-01-23,2.0,6.0,8.0
2,LA LIBERTAD,CHEPEN,86411.0,0.86411,7245.0,2022-01-23,13.0,11.0,24.0
3,ANCASH,SIHUAS,28630.0,0.28630,10597.0,2022-01-23,5.0,3.0,8.0
4,LA LIBERTAD,BOLIVAR,15982.0,0.15982,7245.0,2022-01-23,2.0,0.0,2.0
...,...,...,...,...,...,...,...,...,...
1466,AMAZONAS,RODRIGUEZ DE MENDOZA,33651.0,0.33651,10264.0,2022-03-13,6.0,9.0,15.0
1467,APURIMAC,AYMARAES,24570.0,0.24570,8986.0,2022-03-13,1.0,0.0,1.0
1468,CAJAMARCA,JAEN,203724.0,2.03724,6570.0,2022-03-13,2.0,3.0,5.0
1469,ANCASH,HUARI,63264.0,0.63264,10597.0,2022-03-13,1.0,5.0,6.0


In [25]:
weekly_sales.groupby("FECHA_RESULTADO").count()

SEXO,DEPARTAMENTO,PROVINCIA,POBLACION,POBLACION_CIENMIL,INDICE,FEMENINO,MASCULINO,TOTAL
FECHA_RESULTADO,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-01-23,185,185,185,185,185,185,185,185
2022-01-30,193,193,193,193,193,193,193,193
2022-02-06,193,193,193,193,193,193,193,193
2022-02-13,192,192,192,192,192,192,192,192
2022-02-20,190,190,190,190,190,190,190,190
2022-02-27,180,180,180,180,180,180,180,180
2022-03-06,174,174,174,174,174,174,174,174
2022-03-13,164,164,164,164,164,164,164,164


In [26]:
cero_cases = weekly_sales['PROVINCIA']== "ATALAYA"
data_cero = weekly_sales[cero_cases]
data_cero

SEXO,DEPARTAMENTO,PROVINCIA,POBLACION,POBLACION_CIENMIL,INDICE,FECHA_RESULTADO,FEMENINO,MASCULINO,TOTAL
179,UCAYALI,ATALAYA,61049.0,0.61049,6681.0,2022-01-23,1.0,0.0,1.0
187,UCAYALI,ATALAYA,61049.0,0.61049,6681.0,2022-01-30,4.0,2.0,6.0
500,UCAYALI,ATALAYA,61049.0,0.61049,6681.0,2022-02-06,2.0,2.0,4.0
695,UCAYALI,ATALAYA,61049.0,0.61049,6681.0,2022-02-13,0.0,1.0,1.0
787,UCAYALI,ATALAYA,61049.0,0.61049,6681.0,2022-02-20,1.0,1.0,2.0


In [27]:
weekly_sales[['FEMENINO','MASCULINO', 'TOTAL']] = weekly_sales[['FEMENINO','MASCULINO', 'TOTAL']].div(weekly_sales['POBLACION_CIENMIL'].values,axis=0)
weekly_sales

SEXO,DEPARTAMENTO,PROVINCIA,POBLACION,POBLACION_CIENMIL,INDICE,FECHA_RESULTADO,FEMENINO,MASCULINO,TOTAL
0,AMAZONAS,BAGUA,84672.0,0.84672,10264.0,2022-01-23,25.982615,17.715420,43.698035
1,LA LIBERTAD,GRAN CHIMU,28290.0,0.28290,7245.0,2022-01-23,7.069636,21.208908,28.278544
2,LA LIBERTAD,CHEPEN,86411.0,0.86411,7245.0,2022-01-23,15.044381,12.729861,27.774242
3,ANCASH,SIHUAS,28630.0,0.28630,10597.0,2022-01-23,17.464198,10.478519,27.942717
4,LA LIBERTAD,BOLIVAR,15982.0,0.15982,7245.0,2022-01-23,12.514078,0.000000,12.514078
...,...,...,...,...,...,...,...,...,...
1466,AMAZONAS,RODRIGUEZ DE MENDOZA,33651.0,0.33651,10264.0,2022-03-13,17.830079,26.745119,44.575198
1467,APURIMAC,AYMARAES,24570.0,0.24570,8986.0,2022-03-13,4.070004,0.000000,4.070004
1468,CAJAMARCA,JAEN,203724.0,2.03724,6570.0,2022-03-13,0.981720,1.472581,2.454301
1469,ANCASH,HUARI,63264.0,0.63264,10597.0,2022-03-13,1.580678,7.903389,9.484067


In [28]:
weekly_sales.FEMENINO = weekly_sales.FEMENINO.round()
weekly_sales.MASCULINO = weekly_sales.MASCULINO.round()
weekly_sales.TOTAL = weekly_sales.FEMENINO + weekly_sales.MASCULINO
weekly_sales.FECHA_RESULTADO = weekly_sales.FECHA_RESULTADO.dt.strftime('%Y-%m-%d')
weekly_sales

SEXO,DEPARTAMENTO,PROVINCIA,POBLACION,POBLACION_CIENMIL,INDICE,FECHA_RESULTADO,FEMENINO,MASCULINO,TOTAL
0,AMAZONAS,BAGUA,84672.0,0.84672,10264.0,2022-01-23,26.0,18.0,44.0
1,LA LIBERTAD,GRAN CHIMU,28290.0,0.28290,7245.0,2022-01-23,7.0,21.0,28.0
2,LA LIBERTAD,CHEPEN,86411.0,0.86411,7245.0,2022-01-23,15.0,13.0,28.0
3,ANCASH,SIHUAS,28630.0,0.28630,10597.0,2022-01-23,17.0,10.0,27.0
4,LA LIBERTAD,BOLIVAR,15982.0,0.15982,7245.0,2022-01-23,13.0,0.0,13.0
...,...,...,...,...,...,...,...,...,...
1466,AMAZONAS,RODRIGUEZ DE MENDOZA,33651.0,0.33651,10264.0,2022-03-13,18.0,27.0,45.0
1467,APURIMAC,AYMARAES,24570.0,0.24570,8986.0,2022-03-13,4.0,0.0,4.0
1468,CAJAMARCA,JAEN,203724.0,2.03724,6570.0,2022-03-13,1.0,1.0,2.0
1469,ANCASH,HUARI,63264.0,0.63264,10597.0,2022-03-13,2.0,8.0,10.0


In [29]:
weekly_sales.to_csv('dataset_covid_total.csv' , index=False)

In [30]:
weekly_sales.to_json('dataset_covid_total.json', orient="table")