In [2]:
import numpy as np
import pandas as pd
import re

# GET RAW DATA

In [3]:
df = pd.read_csv('./get_data/raw_data.csv')
df

Unnamed: 0,main,n_df
0,CENTRO DE EST...,0
1,DIREC.DE SIST.y COMUNICACIONES ...,0
2,* UNIDAD P.A.D.* ...,0
3,RESULTADOS GENE...,0
4,=============================================...,0
...,...,...
135174,0013 060469 POLO REYES JULIO MARTIN ...,205
135175,0014 108769 ALFARO MEDRANO MARIA JOSE ...,205
135176,0015 068169 JESUS VEGA CRISTIAN JOEL ...,205
135177,********************************************...,205


## DELETE AND REPLACE WHITE SPACES

In [4]:
many_white_spaces = re.compile(r'\s+')
one_white_space = ' '

df['main'] = (
    df['main']
    .str.replace(many_white_spaces, one_white_space, regex=True)
    .str.strip()
)
df

Unnamed: 0,main,n_df
0,CENTRO DE ESTUDIOS PREUNIVERSITARIOS DE LA UNT...,0
1,DIREC.DE SIST.y COMUNICACIONES EXAMENES SUMATI...,0
2,* UNIDAD P.A.D.* << TRUJILLO >> 12/08/2018,0
3,RESULTADOS GENERALES - ORDEN DE MERITO POR ESC...,0
4,==============================================...,0
...,...,...
135174,0013 060469 POLO REYES JULIO MARTIN 14.262 24....,205
135175,0014 108769 ALFARO MEDRANO MARIA JOSE 18.346 1...,205
135176,0015 068169 JESUS VEGA CRISTIAN JOEL -0.422 25...,205
135177,**********************************************...,205


# ONLY RESULTS

In [4]:
# results_first_character = '0'
fourt_digits = re.compile(r'^\d{4}')

filter_by_character = (
    df['main']
    # .str.startswith(results_first_character)
    .str.contains(fourt_digits, regex=True)
)

raw_results_df = df[filter_by_character]
raw_results_df

Unnamed: 0,main,n_df
8,0001 171558 RODRIGUEZ SANCHEZ ROSICELA ELIZABE...,0
9,0002 061758 LLANOS SOLIS KIMBERLYN YEI 37.504 ...,0
10,0003 178758 ALVA PEREZ TAMARA ANTONELLA 48.918...,0
11,0004 028758 URIOL FLORES LIZ ROCIO 39.342 57.7...,0
12,0005 170858 VASQUEZ GUERRA CESAR IVAN 35.462 5...,0
...,...,...
135172,0011 065269 LAZARO LLANOS RICHARD JOEL 31.799 ...,205
135173,0012 147969 ALBUJAR VIERA LUIS ALBERTO 28.948 ...,205
135174,0013 060469 POLO REYES JULIO MARTIN 14.262 24....,205
135175,0014 108769 ALFARO MEDRANO MARIA JOSE 18.346 1...,205


# CABECERAS

In [5]:
titles_df = df[~filter_by_character]
titles_df

Unnamed: 0,main,n_df
0,CENTRO DE ESTUDIOS PREUNIVERSITARIOS DE LA UNT...,0
1,DIREC.DE SIST.y COMUNICACIONES EXAMENES SUMATI...,0
2,* UNIDAD P.A.D.* << TRUJILLO >> 12/08/2018,0
3,RESULTADOS GENERALES - ORDEN DE MERITO POR ESC...,0
4,==============================================...,0
...,...,...
135154,OR- I II PUNTAJE,205
135155,DEN CARNET APELIIDOS y NOMBRES SUMATIVO SUMATI...,205
135156,==============================================...,205
135177,**********************************************...,205


In [6]:
pag_number = re.compile(r'Pag\.\s\d+')
empty_string = ''

titles_df.loc[:, 'main'] = (
    titles_df['main']
    .str.replace(pag_number, empty_string, regex=True)
    .str.rstrip()
    )

titles_df

Unnamed: 0,main,n_df
0,CENTRO DE ESTUDIOS PREUNIVERSITARIOS DE LA UNT...,0
1,DIREC.DE SIST.y COMUNICACIONES EXAMENES SUMATI...,0
2,* UNIDAD P.A.D.* << TRUJILLO >> 12/08/2018,0
3,RESULTADOS GENERALES - ORDEN DE MERITO POR ESC...,0
4,==============================================...,0
...,...,...
135154,OR- I II PUNTAJE,205
135155,DEN CARNET APELIIDOS y NOMBRES SUMATIVO SUMATI...,205
135156,==============================================...,205
135177,**********************************************...,205


In [7]:
titles_sep_sign = "="

def filter_group(dataframe_group):

    filter_title = (
        dataframe_group
        ['main']
        .str.startswith(titles_sep_sign)
    )

    index_value = (
        dataframe_group
        [filter_title]
        .index
        [0]
    )

    return dataframe_group.loc[:index_value]

titles_group_by_n_df = (
    titles_df
    .drop_duplicates(['main', 'n_df'])
    .groupby('n_df')
)

In [8]:
filtered_titles = map(lambda values: filter_group(values[1]), titles_group_by_n_df)
raw_titles_df = pd.concat(filtered_titles)
raw_titles_df # .head(40)

raw_titles_df

Unnamed: 0,main,n_df
0,CENTRO DE ESTUDIOS PREUNIVERSITARIOS DE LA UNT...,0
1,DIREC.DE SIST.y COMUNICACIONES EXAMENES SUMATI...,0
2,* UNIDAD P.A.D.* << TRUJILLO >> 12/08/2018,0
3,RESULTADOS GENERALES - ORDEN DE MERITO POR ESC...,0
4,==============================================...,0
...,...,...
131038,==============================================...,204
131326,OFICINA DE TECNOLOGIAS DE LA INFORMACION DE LA...,205
131327,18/02/2024 EXAMENES SUMATIVOS 2024-II - CEPUNT II,205
131328,RESULTADOS GENERALES POR ESCUELA PROFESIONAL *...,205


In [9]:
filter_sign_equal = (
    raw_titles_df
    ['main']
    .str.startswith(titles_sep_sign)
)

raw_titles_df = (
    raw_titles_df
    [~filter_sign_equal]
)

raw_titles_df

Unnamed: 0,main,n_df
0,CENTRO DE ESTUDIOS PREUNIVERSITARIOS DE LA UNT...,0
1,DIREC.DE SIST.y COMUNICACIONES EXAMENES SUMATI...,0
2,* UNIDAD P.A.D.* << TRUJILLO >> 12/08/2018,0
3,RESULTADOS GENERALES - ORDEN DE MERITO POR ESC...,0
3370,CENTRO DE ESTUDIOS PREUNIVERSITARIOS DE LA UNT...,1
...,...,...
131036,18/02/2024 EXAMENES SUMATIVOS 2024-II - CEPUNT II,204
131037,RESULTADOS GENERALES POR ESCUELA PROFESIONAL *...,204
131326,OFICINA DE TECNOLOGIAS DE LA INFORMACION DE LA...,205
131327,18/02/2024 EXAMENES SUMATIVOS 2024-II - CEPUNT II,205


In [10]:
n_rows_titles = raw_titles_df.groupby("n_df").count()
n_rows_titles

Unnamed: 0_level_0,main
n_df,Unnamed: 1_level_1
0,4
1,4
2,4
3,4
4,4
...,...
201,4
202,4
203,3
204,3


In [11]:
raw_titles_df = pd.merge(raw_titles_df, n_rows_titles, left_on='n_df', right_on='n_df')
raw_titles_df

Unnamed: 0,main_x,n_df,main_y
0,CENTRO DE ESTUDIOS PREUNIVERSITARIOS DE LA UNT...,0,4
1,DIREC.DE SIST.y COMUNICACIONES EXAMENES SUMATI...,0,4
2,* UNIDAD P.A.D.* << TRUJILLO >> 12/08/2018,0,4
3,RESULTADOS GENERALES - ORDEN DE MERITO POR ESC...,0,4
4,CENTRO DE ESTUDIOS PREUNIVERSITARIOS DE LA UNT...,1,4
...,...,...,...
716,18/02/2024 EXAMENES SUMATIVOS 2024-II - CEPUNT II,204,3
717,RESULTADOS GENERALES POR ESCUELA PROFESIONAL *...,204,3
718,OFICINA DE TECNOLOGIAS DE LA INFORMACION DE LA...,205,3
719,18/02/2024 EXAMENES SUMATIVOS 2024-II - CEPUNT II,205,3


In [12]:
def add_serie(df):
    max_value = df['main_y'].max()
    df['aux'] = range(max_value)
    return df

In [13]:
aux_df = map(lambda values: add_serie(values[1]), raw_titles_df.groupby("n_df"))
raw_titles_df = pd.concat(aux_df)
raw_titles_df

Unnamed: 0,main_x,n_df,main_y,aux
0,CENTRO DE ESTUDIOS PREUNIVERSITARIOS DE LA UNT...,0,4,0
1,DIREC.DE SIST.y COMUNICACIONES EXAMENES SUMATI...,0,4,1
2,* UNIDAD P.A.D.* << TRUJILLO >> 12/08/2018,0,4,2
3,RESULTADOS GENERALES - ORDEN DE MERITO POR ESC...,0,4,3
4,CENTRO DE ESTUDIOS PREUNIVERSITARIOS DE LA UNT...,1,4,0
...,...,...,...,...
716,18/02/2024 EXAMENES SUMATIVOS 2024-II - CEPUNT II,204,3,1
717,RESULTADOS GENERALES POR ESCUELA PROFESIONAL *...,204,3,2
718,OFICINA DE TECNOLOGIAS DE LA INFORMACION DE LA...,205,3,0
719,18/02/2024 EXAMENES SUMATIVOS 2024-II - CEPUNT II,205,3,1


In [14]:
# number_of_first_row = 0
# filter_rows_by_aux = raw_titles_df['aux'] != number_of_first_row

# raw_titles_df = raw_titles_df[filter_rows_by_aux]
# raw_titles_df

In [15]:
# raw_titles_df.loc[:, ['n_df', 'aux', 'main_x']].set_index(['n_df', 'aux']).unstack()
unstacked_titles = (
    raw_titles_df
    .loc[:, ['n_df', 'aux', 'main_x']]
    .set_index(['n_df', 'aux'])
    .unstack()
    .droplevel(['aux'], axis=1)
)
unstacked_titles.columns = ['first', 'second', 'third', 'fourth']
unstacked_titles

Unnamed: 0_level_0,first,second,third,fourth
n_df,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,CENTRO DE ESTUDIOS PREUNIVERSITARIOS DE LA UNT...,DIREC.DE SIST.y COMUNICACIONES EXAMENES SUMATI...,* UNIDAD P.A.D.* << TRUJILLO >> 12/08/2018,RESULTADOS GENERALES - ORDEN DE MERITO POR ESC...
1,CENTRO DE ESTUDIOS PREUNIVERSITARIOS DE LA UNT...,DIREC.DE SIST.y COMUNICACIONES EXAMENES SUMATI...,* UNIDAD P.A.D.* << JEQUETEPEQUE >> 12/08/2018,RESULTADOS GENERALES - ORDEN DE MERITO POR ESC...
2,CENTRO DE ESTUDIOS PREUNIVERSITARIOS DE LA UNT...,DIREC.DE SIST.y COMUNICACIONES EXAMENES SUMATI...,* UNIDAD P.A.D.* << HUAMACHUCO >> 12/08/2018,RESULTADOS GENERALES - ORDEN DE MERITO POR ESC...
3,UNIVERSIDAD NACIONAL DE TRUJILLO - UNT,DIREC.DE SIST.y COMUNIC. EXAMEN DE ADMISION OR...,* UNIDAD P.A.D.* GRUPO : A 22/09/2018,RESULTADOS GENERALES POR ESCUELA PROFESIONAL
4,UNIVERSIDAD NACIONAL DE TRUJILLO - UNT,DIREC.DE SIST.y COMUNIC. EXAMEN DE ADMISION OR...,* UNIDAD P.A.D.* GRUPO : B 23/09/2018,RESULTADOS GENERALES POR ESCUELA PROFESIONAL
...,...,...,...,...
201,CENTRO DE ESTUDIOS PREUNIVERSITARIOS DE LA UNT...,OFI.DE TECNOLOG.DE LA INFORMAC. EXAMENES SUMAT...,* AREA P.A.D.* << V A L L E >> 20/08/2023,RESULTADOS GENERALES - ORDEN DE MERITO POR ESC...
202,CENTRO DE ESTUDIOS PREUNIVERSITARIOS DE LA UNT...,OFI.DE TECNOLOG.DE LA INFORMAC. EXAMENES SUMAT...,* AREA P.A.D.* << HUAMACHUCO >> 20/08/2023,RESULTADOS GENERALES - ORDEN DE MERITO POR ESC...
203,OFICINA DE TECNOLOGIAS DE LA INFORMACION DE LA...,18/02/2024 EXAMENES SUMATIVOS 2024-II - CEPUNT II,RESULTADOS GENERALES POR ESCUELA PROFESIONAL *...,
204,OFICINA DE TECNOLOGIAS DE LA INFORMACION DE LA...,18/02/2024 EXAMENES SUMATIVOS 2024-II - CEPUNT II,RESULTADOS GENERALES POR ESCUELA PROFESIONAL *...,


# FIRST COLUMN

In [16]:
unstacked_titles['first'].value_counts()

first
UNIVERSIDAD NACIONAL DE TRUJILLO - UNT                                                   174
OFICINA DE TECNOLOGIAS DE LA INFORMACION DE LA UNIVERSIDAD NACIONAL DE TRUJILLO - UNT     17
CENTRO DE ESTUDIOS PREUNIVERSITARIOS DE LA UNT - CEPUNT                                   15
Name: count, dtype: int64

## FIRST COLUMN TESTS

In [17]:
ends_with = re.compile(r'(CEPUNT|UNT)$')

first_test = (
    unstacked_titles
    ['first']
    .str.extract(ends_with)
    [0]
    # .value_counts()
    # .sum()
)

first_test


n_df
0      CEPUNT
1      CEPUNT
2      CEPUNT
3         UNT
4         UNT
        ...  
201    CEPUNT
202    CEPUNT
203       UNT
204       UNT
205       UNT
Name: 0, Length: 206, dtype: object

# SECOND COLUMN

In [18]:
unstacked_titles['second'].value_counts().sort_index(ascending=False).head(49)

second
OFI.DE TECNOLOG.DE LA INFORMAC. EXAMENES SUMATIVOS 2024- I - CEPUNT I                 3
OF.DE TECNOLOG.DE LA INFOR. EXAMEN DE ADMISION ORDINARIO 2024-I                       3
OF.DE TECNOLOG.DE LA INFOR. EXAMEN DE ADMISION ORDINARIO 2024- I - TRUJILLO           3
OF.DE TECNOLOG.DE LA INFOR. EXAMEN DE ADMISION ORDINARIO 2023-II - V A L L E          3
OF.DE TECNOLOG.DE LA INFOR. EXAMEN DE ADMISION ORDINARIO 2023-II - TRUJILLO           3
OF.DE TECNOLOG.DE LA INFOR. EXAMEN DE ADMISION ORDINARIO 2023-II - STGO.DE CHUCO      1
OF.DE TECNOLOG.DE LA INFOR. EXAMEN DE ADMISION ORDINARIO 2023-II - HUAMACHUCO         3
OF.DE TECNOLOG.DE LA INFOR. EXAMEN DE ADMISION ORDINARIO 2023-I                       3
OF.DE TECNOLOG.DE LA INFOR. EXAMEN DE ADMISION ORDINARIO 2023- I - TRUJILLO           3
OF.DE TECNOLOG.DE LA INFOR. EXAMEN DE ADMISION ORDINARIO 2022-II - V A L L E          3
OF.DE TECNOLOG.DE LA INFOR. EXAMEN DE ADMISION ORDINARIO 2022-II - TRUJILLO           3
OF.DE TECNOLOG.DE LA INFO

In [19]:
unstacked_titles['second'].value_counts().sort_index(ascending=False).tail(50)

second
DIREC.DE SIST.y COMUNICACIONES EXAMENES SUMATIVOS 2019-II - CEPUNT II                 3
DIREC.DE SIST.y COMUNICACIONES EXAMENES SUMATIVOS 2019- I - CEPUNT I                  3
DIREC.DE SIST.y COMUNICAC. EXAMEN DE ADMISION ORDINARIO 2021-II * STGO.DE CHUCO       1
DIREC.DE SIST.y COMUNICAC. EXAMEN DE ADMISION ORDINARIO 2020-II * STGO.DE CHUCO       1
DIREC.DE SIST.y COMUNICAC. EXAMEN DE ADMISION ORDINARIO 2019-II * STGO.DE CHUCO       1
DIREC.DE SIST.y COMUNIC. EXAMEN DE ADMISION ORDINARIO 2021-II - TRUJILLO              3
DIREC.DE SIST.y COMUNIC. EXAMEN DE ADMISION ORDINARIO 2021-II * JEQUETEPEQUE          3
DIREC.DE SIST.y COMUNIC. EXAMEN DE ADMISION ORDINARIO 2021- I - TRUJILLO              4
DIREC.DE SIST.y COMUNIC. EXAMEN DE ADMISION ORDINARIO 2021- I * TRUJILLO              4
DIREC.DE SIST.y COMUNIC. EXAMEN DE ADMISION ORDINARIO 2020-II - TRUJILLO              2
DIREC.DE SIST.y COMUNIC. EXAMEN DE ADMISION ORDINARIO 2020-II * JEQUETEPEQUE          2
DIREC.DE SIST.y COMUNIC. 

In [20]:
# places = {
#     'TRUJILLO': 'TRUJILLO',
#     'HUAMACHUCO': 'HUAMACHUCO',
#     'JEQUETEPEQUE': 'JEQUETEPEQUE',
#     'STGOCHUCO': 'STGO.DE CHUCO',
#     'VALLE': 'V A L L E'
# }

# date_format = '\d\d/\d\d/\d\d\d\d'
# places = 'STGO\.DE CHUCO|TRUJILLO|HUAMACHUCO|JEQUETEPEQUE|V A L L E'
# type_tests = 'ORDINARIO|CEPUNT|EXTRAORDINARIO'
# type_users = 'DISCAPACITADOS|DEPORTISTAS CALIFICADOS|VICT.+DE LA VIOLENCIA|QUINTO GRADO DE EDUCACION SECUNDARIA'

# unstacked_titles['second'].str.extractall(f"({date_format}|{type_tests}|{places}|{type_users})").unstack().tail(50)

## SECOND COLUMN DATES

In [21]:
valid_date = re.compile(r'(\d\d/\d\d/\d+)')

second_date = (
    unstacked_titles
    ['second']
    .str.extract(valid_date)
    [0]
    # .value_counts()
    # .sum()
)

second_date

n_df
0             NaN
1             NaN
2             NaN
3             NaN
4             NaN
          ...    
201           NaN
202           NaN
203    18/02/2024
204    18/02/2024
205    18/02/2024
Name: 0, Length: 206, dtype: object

## SECOND COLUMN PLACES

In [22]:
valid_places = re.compile(r'(STGO.+DE CHUCO|TRUJILLO|HUAMACHUCO|JEQUETEPEQUE|V A L L E)')

second_places = (
    unstacked_titles
    ['second']
    .str.extract(valid_places)
    [0]
    .str.replace('V A L L E', 'VALLE', regex=False)
    # .value_counts()
    # .sum()
)

second_places.value_counts()

0
TRUJILLO         88
HUAMACHUCO       36
VALLE            21
JEQUETEPEQUE     16
STGO.DE CHUCO     7
Name: count, dtype: int64

## SECOND COLUMN TESTS

In [23]:
valid_tests = re.compile(r'(ORDINARIO|CEPUNT|EXTRAORDINARIO)')

second_tests = (
    unstacked_titles
    ['second']
    .str.extract(valid_tests)
    [0]
    # .value_counts()
    # .sum()
)

second_tests

n_df
0         CEPUNT
1         CEPUNT
2         CEPUNT
3      ORDINARIO
4      ORDINARIO
         ...    
201       CEPUNT
202       CEPUNT
203       CEPUNT
204       CEPUNT
205       CEPUNT
Name: 0, Length: 206, dtype: object

## SECOND COLUMN USERS

In [24]:
valid_users = re.compile(r'(DISCAPACITADOS|DEPORTISTAS CALIFICADOS|VICT.+DE LA VIOLENCIA|QUINTO GRADO DE EDUCACION SECUNDARIA)')

second_users = (
    unstacked_titles
    ['second']
    .str.extract(valid_users)
    [0]
    # .value_counts()
    # .sum()
)

second_users.value_counts()

# no hay users

Series([], Name: count, dtype: int64)

# THIRD COLUMN

In [25]:
unstacked_titles['third'].value_counts().sort_index().head(60)

third
* AREA P.A.D.* << HUAMACHUCO >> 20/08/2023                                                           1
* AREA P.A.D.* << TRUJILLO >> 20/08/2023                                                             1
* AREA P.A.D.* << V A L L E >> 20/08/2023                                                            1
* AREA P.A.D.* AREA * A 30/09/2023                                                                   1
* AREA P.A.D.* AREA - D 23/09/2023                                                                   1
* AREA P.A.D.* AREA : A 07/04/2022                                                                   3
* AREA P.A.D.* AREA : A 21/10/2022                                                                   1
* AREA P.A.D.* AREA : A 24/03/2023                                                                   3
* AREA P.A.D.* AREA : A 28/10/2021                                                                   1
* AREA P.A.D.* AREA : B 08/04/2022                                 

In [26]:
unstacked_titles['third'].value_counts().sort_index().tail(60)

third
* UNIDAD P.A.D.* << JEQUETEPEQUE >> 12/08/2018                                             1
* UNIDAD P.A.D.* << JEQUETEPEQUE >> 18/08/2019                                             1
* UNIDAD P.A.D.* << TRUJILLO >> 09/02/2020                                                 1
* UNIDAD P.A.D.* << TRUJILLO >> 10/02/2019                                                 1
* UNIDAD P.A.D.* << TRUJILLO >> 12/08/2018                                                 1
* UNIDAD P.A.D.* << TRUJILLO >> 18/08/2019                                                 1
* UNIDAD P.A.D.* AREA : A 02/06/2021                                                       3
* UNIDAD P.A.D.* AREA : A 02/12/2020                                                       1
* UNIDAD P.A.D.* AREA : B 03/12/2020                                                       1
* UNIDAD P.A.D.* AREA : B 28/05/2021                                                       4
* UNIDAD P.A.D.* AREA : C 04/12/2020                            

## THIRD COLUMN DATES

In [27]:
# valid_date = re.compile(r'(\d\d/\d\d/\d+)')
nombre = 'third'
third_date = (
    unstacked_titles
    [nombre]
    .str.extract(valid_date)
    [0]
    # .value_counts()
    # .sum()
)

third_date

n_df
0      12/08/2018
1      12/08/2018
2      12/08/2018
3      22/09/2018
4      23/09/2018
          ...    
201    20/08/2023
202    20/08/2023
203           NaN
204           NaN
205           NaN
Name: 0, Length: 206, dtype: object

## THIRD COLUMN PLACES

In [28]:
# valid_places = re.compile(r'(STGO\.DE CHUCO|TRUJILLO|HUAMACHUCO|JEQUETEPEQUE|V A L L E)')

third_places = (
    unstacked_titles
    ['third']
    .str.extract(valid_places)
    [0]
    # .value_counts()
    # .sum()
)

third_places

n_df
0          TRUJILLO
1      JEQUETEPEQUE
2        HUAMACHUCO
3               NaN
4               NaN
           ...     
201       V A L L E
202      HUAMACHUCO
203      HUAMACHUCO
204       V A L L E
205        TRUJILLO
Name: 0, Length: 206, dtype: object

## THIRD COLUMN TESTS

In [29]:
# valid_tests = re.compile(r'(ORDINARIO|CEPUNT|EXTRAORDINARIO)')

third_tests = (
    unstacked_titles
    ['third']
    .str.extract(valid_tests)
    [0]
    # .value_counts()
    # .sum()
)

third_tests.value_counts() # no tiene nada

Series([], Name: count, dtype: int64)

## THIRD COLUMN USERS

In [30]:
# valid_users = re.compile(r'(DISCAPACITADOS|DEPORTISTAS CALIFICADOS|VICT\.DE LA VIOLENCIA|QUINTO GRADO DE EDUCACION SECUNDARIA)')

third_users = (
    unstacked_titles
    ['third']
    .str.extract(valid_users)
    [0]
    # .value_counts()
    # .sum()
)

third_users.value_counts()

0
QUINTO GRADO DE EDUCACION SECUNDARIA    17
DISCAPACITADOS                          11
VICTIMAS DE LA VIOLENCIA                 6
DEPORTISTAS CALIFICADOS                  4
VICT.DE LA VIOLENCIA                     1
Name: count, dtype: int64

## THIRD COLUMN AREAS

In [31]:
get_areas = re.compile(r'(AREAS?|GRUPOS?)(.+)\d\d/\d\d/\d+')
duplicated_value = 'AREA P.A.D.'
not_areas = re.compile(r'[^ABCDy-]')
y = 'y'
min_sign = '-'
start_min = re.compile(r'^-')

third_areas = (
    unstacked_titles
    ['third']
    .str.replace(duplicated_value, empty_string, regex=False)
    .str.extract(get_areas)
    [1]
    .replace(not_areas, empty_string, regex=True)
    .str.replace(y, min_sign, regex=False)
    .str.replace(start_min, empty_string, regex=True)
    # .value_counts()
)
third_areas

n_df
0      NaN
1      NaN
2      NaN
3        A
4        B
      ... 
201    NaN
202    NaN
203    NaN
204    NaN
205    NaN
Name: 1, Length: 206, dtype: object

# FOURTH COLUMN

In [32]:
(
    unstacked_titles
    ['fourth']
    .value_counts()
)

fourth
RESULTADOS GENERALES POR ESCUELA PROFESIONAL                                67
RESULTADOS GENERALES - ORDEN DE MERITO POR ESCUELA ACADEMICO PROFESIONAL    15
RESULTADOS GENERALES POR ESCUELA PROFESIONAL - GRUPOS A y B                  4
RESULTADOS POR ESCUELA ACADEMICO PROFESIONAL - AREA - A                      3
RESULTADOS POR ESCUELA ACADEMICO PROFESIONAL - AREA - B                      3
RESULTADOS POR ESCUELA ACADEMICO PROFESIONAL - GRUPO A                       2
RESULTADOS POR ESCUELA ACADEMICO PROFESIONAL - GRUPO B                       2
RESULTADOS POR ESCUELA ACADEMICO PROFESIONAL - AREA - D                      2
RESULTADOS POR ESCUELA ACADEMICO PROFESIONAL - AREA - C-D                    2
RESULTADOS POR ESCUELA ACADEMICO PROFESIONAL - AREA - C                      1
RESULTADOS POR ESCUELA ACADEMICO PROFESIONAL - AREA * A                      1
RESULTADOS POR ESCUELA ACADEMICO PROFESIONAL - AREAS * B - C                 1
Name: count, dtype: int64

## FOURTH COLUMN AREAS

In [33]:
get_areas_2 = re.compile(r'(AREAS?|GRUPOS?)(.+)$')

fourth_areas = (
    unstacked_titles
    ['fourth']
    .str.extract(get_areas_2)
    [1]
    .replace(not_areas, empty_string, regex=True)
    .str.replace(y, min_sign, regex=False)
    .str.replace(start_min, empty_string, regex=True)
    # .value_counts()
    # .sum()
)

fourth_areas

n_df
0      NaN
1      NaN
2      NaN
3      NaN
4      NaN
      ... 
201    NaN
202    NaN
203    NaN
204    NaN
205    NaN
Name: 1, Length: 206, dtype: object

# UNIENDO TITLES

In [34]:
first_test

second_tests
second_date
second_places
second_users

third_date
third_places
third_users
third_areas
# third_tests no tiene nada

fourth_areas

n_df
0      NaN
1      NaN
2      NaN
3      NaN
4      NaN
      ... 
201    NaN
202    NaN
203    NaN
204    NaN
205    NaN
Name: 1, Length: 206, dtype: object

In [35]:
# first_test.value_counts()

(
    second_tests
)

n_df
0         CEPUNT
1         CEPUNT
2         CEPUNT
3      ORDINARIO
4      ORDINARIO
         ...    
201       CEPUNT
202       CEPUNT
203       CEPUNT
204       CEPUNT
205       CEPUNT
Name: 0, Length: 206, dtype: object

In [36]:
dates_df = (
    second_date
    .str.cat(third_date, na_rep=empty_string)
    .replace(empty_string, np.nan, regex=False)
 )

dates_df

n_df
0      12/08/2018
1      12/08/2018
2      12/08/2018
3      22/09/2018
4      23/09/2018
          ...    
201    20/08/2023
202    20/08/2023
203    18/02/2024
204    18/02/2024
205    18/02/2024
Name: 0, Length: 206, dtype: object

In [37]:
# pd.concat([second_places, third_places], axis=1)
valle_with_spaces = 'V A L L E'
valle_without_spaces = 'VALLE'

places_df = (
    second_places
    .str.cat(third_places, na_rep=empty_string)
    .replace(empty_string, np.nan, regex=False)
    .replace(valle_with_spaces, valle_without_spaces, regex=False)
)

places_df

n_df
0          TRUJILLO
1      JEQUETEPEQUE
2        HUAMACHUCO
3          TRUJILLO
4          TRUJILLO
           ...     
201           VALLE
202      HUAMACHUCO
203      HUAMACHUCO
204           VALLE
205        TRUJILLO
Name: 0, Length: 206, dtype: object

In [38]:
# pd.concat([second_users, third_users], axis=1)
users_df = (
    second_users
    .str.cat(third_users, na_rep=empty_string)
    .replace(empty_string, np.nan, regex=False)
)
users_df

n_df
0      NaN
1      NaN
2      NaN
3      NaN
4      NaN
      ... 
201    NaN
202    NaN
203    NaN
204    NaN
205    NaN
Name: 0, Length: 206, dtype: object

In [39]:
# pd.concat([third_areas, fourth_areas], axis=1)
areas_df = (
    third_areas
    .str.cat(fourth_areas, na_rep=empty_string)
    .replace(empty_string, np.nan, regex=False)
)
areas_df

n_df
0      NaN
1      NaN
2      NaN
3        A
4        B
      ... 
201    NaN
202    NaN
203    NaN
204    NaN
205    NaN
Name: 1, Length: 206, dtype: object

# RESULTADO FINAL TITLES

In [40]:
final_columns = ['tipo', 'fecha', 'lugar', 'alumno', 'area']
final_titles_df = (
    pd
    .concat(
        [second_tests,
         dates_df,
         places_df,
         users_df,
         areas_df], axis=1)
)

final_titles_df.columns = final_columns
final_titles_df

Unnamed: 0_level_0,tipo,fecha,lugar,alumno,area
n_df,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,CEPUNT,12/08/2018,TRUJILLO,,
1,CEPUNT,12/08/2018,JEQUETEPEQUE,,
2,CEPUNT,12/08/2018,HUAMACHUCO,,
3,ORDINARIO,22/09/2018,TRUJILLO,,A
4,ORDINARIO,23/09/2018,TRUJILLO,,B
...,...,...,...,...,...
201,CEPUNT,20/08/2023,VALLE,,
202,CEPUNT,20/08/2023,HUAMACHUCO,,
203,CEPUNT,18/02/2024,HUAMACHUCO,,
204,CEPUNT,18/02/2024,VALLE,,


In [41]:
final_titles_df['fecha']

n_df
0      12/08/2018
1      12/08/2018
2      12/08/2018
3      22/09/2018
4      23/09/2018
          ...    
201    20/08/2023
202    20/08/2023
203    18/02/2024
204    18/02/2024
205    18/02/2024
Name: fecha, Length: 206, dtype: object

# TRANSFORMANDO RESULTADOS

## NAMES

In [42]:
valid_name = re.compile(r'\s(\D+[0]?\D+[0]?)\s-?\d')
# r'(\s\D+[0]?\D+[0]?\s-?\d)'
# r'^\d{4}\s\d{6}([\D]+)\d+\.'

names = (
    raw_results_df
    # .set_index('n_df')
    ['main']
    .str.extract(valid_name)
    [0]
    .str.strip()
)
names

8         RODRIGUEZ SANCHEZ ROSICELA ELIZABETH
9                   LLANOS SOLIS KIMBERLYN YEI
10                 ALVA PEREZ TAMARA ANTONELLA
11                      URIOL FLORES LIZ ROCIO
12                   VASQUEZ GUERRA CESAR IVAN
                          ...                 
135172              LAZARO LLANOS RICHARD JOEL
135173              ALBUJAR VIERA LUIS ALBERTO
135174                 POLO REYES JULIO MARTIN
135175               ALFARO MEDRANO MARIA JOSE
135176                JESUS VEGA CRISTIAN JOEL
Name: 0, Length: 115385, dtype: object

In [43]:
# names[0].notnull().sum()

## GRADES

In [44]:
valid_grades = re.compile(r'(-?\d+\.\d+)')

grades = (
    raw_results_df
    # .set_index('n_df')
    ['main']
    .str.extractall(valid_grades)
    .unstack()
    .droplevel(level=0, axis=1)
)
grades

match,0,1,2,3,4
8,41.577,78.261,126.368,246.206,94.842
9,37.504,67.242,109.044,213.790,94.842
10,48.918,53.192,109.036,211.146,94.842
11,39.342,57.783,102.429,199.554,94.842
12,35.462,50.730,109.548,195.740,94.842
...,...,...,...,...,...
135172,31.799,42.197,73.996,,
135173,28.948,27.507,56.455,,
135174,14.262,24.459,38.721,,
135175,18.346,18.952,37.298,,


In [45]:

grades

match,0,1,2,3,4
8,41.577,78.261,126.368,246.206,94.842
9,37.504,67.242,109.044,213.790,94.842
10,48.918,53.192,109.036,211.146,94.842
11,39.342,57.783,102.429,199.554,94.842
12,35.462,50.730,109.548,195.740,94.842
...,...,...,...,...,...
135172,31.799,42.197,73.996,,
135173,28.948,27.507,56.455,,
135174,14.262,24.459,38.721,,
135175,18.346,18.952,37.298,,


## SCHOOL AND DETAILS

In [46]:
after_grades = re.compile(r'(\.\d+\s[A-Z].*)')
digits = re.compile(r'\.\d+\s')
empty_string = ''

school_and_details = (
        raw_results_df
        ['main']
        .str.extract(after_grades)
        [0]
        .str.replace(digits, empty_string, regex=True)
)

school_and_details

8           SI ADMINISTRACION
9           SI ADMINISTRACION
10          SI ADMINISTRACION
11          SI ADMINISTRACION
12          SI ADMINISTRACION
                 ...         
135172    ED.SEC: HISTORIA NO
135173    ED.SEC: HISTORIA NO
135174    ED.SEC: HISTORIA NO
135175    ED.SEC: HISTORIA NO
135176    ED.SEC: HISTORIA NO
Name: 0, Length: 115385, dtype: object

In [47]:
school_and_details

8           SI ADMINISTRACION
9           SI ADMINISTRACION
10          SI ADMINISTRACION
11          SI ADMINISTRACION
12          SI ADMINISTRACION
                 ...         
135172    ED.SEC: HISTORIA NO
135173    ED.SEC: HISTORIA NO
135174    ED.SEC: HISTORIA NO
135175    ED.SEC: HISTORIA NO
135176    ED.SEC: HISTORIA NO
Name: 0, Length: 115385, dtype: object

### DETAILS

In [48]:
valid_result = re.compile(r'(INGRESA.*|NO\sINGRESA.*|ING\.\s?2.*|AUSENTE.*|ANULADO.*|^SI\s|^NO\s|\sSI$|\sNO$)')
possitive_pattern = re.compile(r'INGRESA([\w\s-]+)?')
negative_pattern = re.compile(r'NO\D+')
positive_result = 'SI'
negative_result = 'NO'

# r'(INGRESA|NO\sINGRESA|ING\.\s?2.*|AUSENTE.*|ANULADO.*|^SI\s|^NO\s|\sSI$|\sNO$)'
details = (
        school_and_details
        .str.extract(valid_result)
        [0]
        .str.strip()
        .str.replace(negative_pattern, negative_result, regex=True)
        .str.replace(possitive_pattern, positive_result, regex=True)
)
details

8         SI
9         SI
10        SI
11        SI
12        SI
          ..
135172    NO
135173    NO
135174    NO
135175    NO
135176    NO
Name: 0, Length: 115385, dtype: object

In [49]:
# details[details.isnull()]
details.value_counts()

0
NO            95052
SI            15745
AUSENTE        3003
ING. 2-OPC     1564
ANULADO          20
Name: count, dtype: int64

### SCHOOL

In [50]:
valid_result2 = re.compile(r'INGRESA.*|NO\sINGRESA.*|ING\.\s?2.*|AUSENTE.*|ANULADO.*|^SI\s|^NO\s|\sSI$|\sNO$')

school = (
    school_and_details
    .str.replace(valid_result2, empty_string, regex=True)
    .str.upper()
    .str.strip()
    
)
school

8           ADMINISTRACION
9           ADMINISTRACION
10          ADMINISTRACION
11          ADMINISTRACION
12          ADMINISTRACION
                ...       
135172    ED.SEC: HISTORIA
135173    ED.SEC: HISTORIA
135174    ED.SEC: HISTORIA
135175    ED.SEC: HISTORIA
135176    ED.SEC: HISTORIA
Name: 0, Length: 115385, dtype: object

In [51]:
school.notnull().sum()

115385

In [52]:
school_and_details[88692]

'MATEMATICAS'

In [53]:
raw_results_df.loc[88692]['main']

'0013 026335 PAREDES QUEZADA DEIVER SANTOS 25.409 -8.218 17.191 53.635 MATEMATICAS'

## JOIN RESULTS

In [54]:
combine_result = pd.concat(
    [
        names,
        grades,
        school,
        details,
        raw_results_df['n_df']
    ], axis=1    
)

combine_result.columns = ['names', 'r1', 'r2', 'r3', 'r4', 'r5', 'escuela', 'resultado', 'n_df']
combine_result

Unnamed: 0,names,r1,r2,r3,r4,r5,escuela,resultado,n_df
8,RODRIGUEZ SANCHEZ ROSICELA ELIZABETH,41.577,78.261,126.368,246.206,94.842,ADMINISTRACION,SI,0
9,LLANOS SOLIS KIMBERLYN YEI,37.504,67.242,109.044,213.790,94.842,ADMINISTRACION,SI,0
10,ALVA PEREZ TAMARA ANTONELLA,48.918,53.192,109.036,211.146,94.842,ADMINISTRACION,SI,0
11,URIOL FLORES LIZ ROCIO,39.342,57.783,102.429,199.554,94.842,ADMINISTRACION,SI,0
12,VASQUEZ GUERRA CESAR IVAN,35.462,50.730,109.548,195.740,94.842,ADMINISTRACION,SI,0
...,...,...,...,...,...,...,...,...,...
135172,LAZARO LLANOS RICHARD JOEL,31.799,42.197,73.996,,,ED.SEC: HISTORIA,NO,205
135173,ALBUJAR VIERA LUIS ALBERTO,28.948,27.507,56.455,,,ED.SEC: HISTORIA,NO,205
135174,POLO REYES JULIO MARTIN,14.262,24.459,38.721,,,ED.SEC: HISTORIA,NO,205
135175,ALFARO MEDRANO MARIA JOSE,18.346,18.952,37.298,,,ED.SEC: HISTORIA,NO,205


In [55]:
combine_result.loc[88692]['names']

'PAREDES QUEZADA DEIVER SANTOS'

In [56]:
combine_result

Unnamed: 0,names,r1,r2,r3,r4,r5,escuela,resultado,n_df
8,RODRIGUEZ SANCHEZ ROSICELA ELIZABETH,41.577,78.261,126.368,246.206,94.842,ADMINISTRACION,SI,0
9,LLANOS SOLIS KIMBERLYN YEI,37.504,67.242,109.044,213.790,94.842,ADMINISTRACION,SI,0
10,ALVA PEREZ TAMARA ANTONELLA,48.918,53.192,109.036,211.146,94.842,ADMINISTRACION,SI,0
11,URIOL FLORES LIZ ROCIO,39.342,57.783,102.429,199.554,94.842,ADMINISTRACION,SI,0
12,VASQUEZ GUERRA CESAR IVAN,35.462,50.730,109.548,195.740,94.842,ADMINISTRACION,SI,0
...,...,...,...,...,...,...,...,...,...
135172,LAZARO LLANOS RICHARD JOEL,31.799,42.197,73.996,,,ED.SEC: HISTORIA,NO,205
135173,ALBUJAR VIERA LUIS ALBERTO,28.948,27.507,56.455,,,ED.SEC: HISTORIA,NO,205
135174,POLO REYES JULIO MARTIN,14.262,24.459,38.721,,,ED.SEC: HISTORIA,NO,205
135175,ALFARO MEDRANO MARIA JOSE,18.346,18.952,37.298,,,ED.SEC: HISTORIA,NO,205


# UNIENDO TITLES CON RESULTS

In [57]:
results_with_titles = pd.merge(
    combine_result,
    final_titles_df,
    left_on='n_df',
    right_index=True
)

results_with_titles

Unnamed: 0,names,r1,r2,r3,r4,r5,escuela,resultado,n_df,tipo,fecha,lugar,alumno,area
8,RODRIGUEZ SANCHEZ ROSICELA ELIZABETH,41.577,78.261,126.368,246.206,94.842,ADMINISTRACION,SI,0,CEPUNT,12/08/2018,TRUJILLO,,
9,LLANOS SOLIS KIMBERLYN YEI,37.504,67.242,109.044,213.790,94.842,ADMINISTRACION,SI,0,CEPUNT,12/08/2018,TRUJILLO,,
10,ALVA PEREZ TAMARA ANTONELLA,48.918,53.192,109.036,211.146,94.842,ADMINISTRACION,SI,0,CEPUNT,12/08/2018,TRUJILLO,,
11,URIOL FLORES LIZ ROCIO,39.342,57.783,102.429,199.554,94.842,ADMINISTRACION,SI,0,CEPUNT,12/08/2018,TRUJILLO,,
12,VASQUEZ GUERRA CESAR IVAN,35.462,50.730,109.548,195.740,94.842,ADMINISTRACION,SI,0,CEPUNT,12/08/2018,TRUJILLO,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135172,LAZARO LLANOS RICHARD JOEL,31.799,42.197,73.996,,,ED.SEC: HISTORIA,NO,205,CEPUNT,18/02/2024,TRUJILLO,,
135173,ALBUJAR VIERA LUIS ALBERTO,28.948,27.507,56.455,,,ED.SEC: HISTORIA,NO,205,CEPUNT,18/02/2024,TRUJILLO,,
135174,POLO REYES JULIO MARTIN,14.262,24.459,38.721,,,ED.SEC: HISTORIA,NO,205,CEPUNT,18/02/2024,TRUJILLO,,
135175,ALFARO MEDRANO MARIA JOSE,18.346,18.952,37.298,,,ED.SEC: HISTORIA,NO,205,CEPUNT,18/02/2024,TRUJILLO,,


In [58]:
results_with_titles.loc[88692]
# results_with_titles['fecha'].notnull().sum()

names        PAREDES QUEZADA DEIVER SANTOS
r1                                  25.409
r2                                  -8.218
r3                                  17.191
r4                                  53.635
r5                                     NaN
escuela                        MATEMATICAS
resultado                              NaN
n_df                                   149
tipo                             ORDINARIO
fecha                           22/10/2022
lugar                             TRUJILLO
alumno                                 NaN
area                                     B
Name: 88692, dtype: object

# AGREGANDO I Y PERIODO

In [59]:
# results_exampanded = (
#     pd.to_datetime(
#         results_with_titles['fecha'],
#         format='%d/%m/%Y'
#     )
# )
# results_exampanded
results_with_titles['fecha'] = (
    pd.to_datetime(
        results_with_titles['fecha'],
        format="%d/%m/%Y"
    )
)
results_with_titles

Unnamed: 0,names,r1,r2,r3,r4,r5,escuela,resultado,n_df,tipo,fecha,lugar,alumno,area
8,RODRIGUEZ SANCHEZ ROSICELA ELIZABETH,41.577,78.261,126.368,246.206,94.842,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,
9,LLANOS SOLIS KIMBERLYN YEI,37.504,67.242,109.044,213.790,94.842,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,
10,ALVA PEREZ TAMARA ANTONELLA,48.918,53.192,109.036,211.146,94.842,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,
11,URIOL FLORES LIZ ROCIO,39.342,57.783,102.429,199.554,94.842,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,
12,VASQUEZ GUERRA CESAR IVAN,35.462,50.730,109.548,195.740,94.842,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135172,LAZARO LLANOS RICHARD JOEL,31.799,42.197,73.996,,,ED.SEC: HISTORIA,NO,205,CEPUNT,2024-02-18,TRUJILLO,,
135173,ALBUJAR VIERA LUIS ALBERTO,28.948,27.507,56.455,,,ED.SEC: HISTORIA,NO,205,CEPUNT,2024-02-18,TRUJILLO,,
135174,POLO REYES JULIO MARTIN,14.262,24.459,38.721,,,ED.SEC: HISTORIA,NO,205,CEPUNT,2024-02-18,TRUJILLO,,
135175,ALFARO MEDRANO MARIA JOSE,18.346,18.952,37.298,,,ED.SEC: HISTORIA,NO,205,CEPUNT,2024-02-18,TRUJILLO,,


In [60]:
results_with_titles['year'] = (
    results_with_titles
    ['fecha']
    .dt.year
)
results_with_titles

Unnamed: 0,names,r1,r2,r3,r4,r5,escuela,resultado,n_df,tipo,fecha,lugar,alumno,area,year
8,RODRIGUEZ SANCHEZ ROSICELA ELIZABETH,41.577,78.261,126.368,246.206,94.842,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,,2018
9,LLANOS SOLIS KIMBERLYN YEI,37.504,67.242,109.044,213.790,94.842,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,,2018
10,ALVA PEREZ TAMARA ANTONELLA,48.918,53.192,109.036,211.146,94.842,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,,2018
11,URIOL FLORES LIZ ROCIO,39.342,57.783,102.429,199.554,94.842,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,,2018
12,VASQUEZ GUERRA CESAR IVAN,35.462,50.730,109.548,195.740,94.842,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,,2018
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135172,LAZARO LLANOS RICHARD JOEL,31.799,42.197,73.996,,,ED.SEC: HISTORIA,NO,205,CEPUNT,2024-02-18,TRUJILLO,,,2024
135173,ALBUJAR VIERA LUIS ALBERTO,28.948,27.507,56.455,,,ED.SEC: HISTORIA,NO,205,CEPUNT,2024-02-18,TRUJILLO,,,2024
135174,POLO REYES JULIO MARTIN,14.262,24.459,38.721,,,ED.SEC: HISTORIA,NO,205,CEPUNT,2024-02-18,TRUJILLO,,,2024
135175,ALFARO MEDRANO MARIA JOSE,18.346,18.952,37.298,,,ED.SEC: HISTORIA,NO,205,CEPUNT,2024-02-18,TRUJILLO,,,2024


In [61]:
results_with_titles['month'] = (
    results_with_titles
    ['fecha']
    .dt.month
)

In [62]:
# results_with_titles['fecha'].dt.month > 6
filter_by_month = (
    results_with_titles
    ['fecha']
    .dt.month > 6
)
filter_by_month

results_with_titles.loc[filter_by_month, "number"] = "I"
results_with_titles.loc[~filter_by_month, "number"] = "II"

In [63]:
results_with_titles['number'].value_counts()

number
II    61293
I     54092
Name: count, dtype: int64

In [64]:
# results_with_titles['month'].apply(lambda month: "I" if month > 6 else "II" )
filter_by_number = (
    results_with_titles
    ['number'] == "I"
)

results_with_titles.loc[filter_by_number, "periodo"] = results_with_titles["fecha"].dt.year + 1
results_with_titles.loc[~filter_by_number, "periodo"] = results_with_titles["fecha"].dt.year

In [65]:
results_with_titles[['number', 'periodo']].value_counts()

number  periodo
I       2024.0     12058
II      2024.0     11660
        2023.0     11547
I       2023.0     10869
II      2020.0     10308
        2022.0     10272
        2019.0      9674
I       2020.0      9557
        2019.0      9357
        2022.0      8109
II      2021.0      7832
I       2021.0      4142
Name: count, dtype: int64

In [66]:
results_with_titles[['number', 'periodo', 'year', 'month']].value_counts()

number  periodo  year  month
II      2023.0   2023  3        10906
        2024.0   2024  3         7962
I       2024.0   2023  9         7936
        2022.0   2021  10        7875
        2023.0   2022  10        7605
II      2020.0   2020  3         6610
        2022.0   2022  4         6430
        2021.0   2021  5         6366
I       2020.0   2019  9         6357
II      2019.0   2019  3         6174
I       2019.0   2018  9         5718
        2024.0   2023  8         4122
II      2020.0   2020  2         3698
        2024.0   2024  2         3698
I       2019.0   2018  8         3639
II      2019.0   2019  2         3500
        2022.0   2022  3         3302
I       2023.0   2022  9         3264
        2020.0   2019  8         3200
        2021.0   2020  12        2514
                       11        1628
II      2021.0   2021  6         1466
        2023.0   2023  2          641
        2022.0   2022  2          540
I       2022.0   2021  9          234
Name: count, dtype: i

In [67]:
results_with_titles

Unnamed: 0,names,r1,r2,r3,r4,r5,escuela,resultado,n_df,tipo,fecha,lugar,alumno,area,year,month,number,periodo
8,RODRIGUEZ SANCHEZ ROSICELA ELIZABETH,41.577,78.261,126.368,246.206,94.842,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,,2018,8,I,2019.0
9,LLANOS SOLIS KIMBERLYN YEI,37.504,67.242,109.044,213.790,94.842,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,,2018,8,I,2019.0
10,ALVA PEREZ TAMARA ANTONELLA,48.918,53.192,109.036,211.146,94.842,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,,2018,8,I,2019.0
11,URIOL FLORES LIZ ROCIO,39.342,57.783,102.429,199.554,94.842,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,,2018,8,I,2019.0
12,VASQUEZ GUERRA CESAR IVAN,35.462,50.730,109.548,195.740,94.842,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,,2018,8,I,2019.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135172,LAZARO LLANOS RICHARD JOEL,31.799,42.197,73.996,,,ED.SEC: HISTORIA,NO,205,CEPUNT,2024-02-18,TRUJILLO,,,2024,2,II,2024.0
135173,ALBUJAR VIERA LUIS ALBERTO,28.948,27.507,56.455,,,ED.SEC: HISTORIA,NO,205,CEPUNT,2024-02-18,TRUJILLO,,,2024,2,II,2024.0
135174,POLO REYES JULIO MARTIN,14.262,24.459,38.721,,,ED.SEC: HISTORIA,NO,205,CEPUNT,2024-02-18,TRUJILLO,,,2024,2,II,2024.0
135175,ALFARO MEDRANO MARIA JOSE,18.346,18.952,37.298,,,ED.SEC: HISTORIA,NO,205,CEPUNT,2024-02-18,TRUJILLO,,,2024,2,II,2024.0


In [68]:
results_with_titles['periodo'] = results_with_titles['periodo'].astype(int)
results_with_titles

Unnamed: 0,names,r1,r2,r3,r4,r5,escuela,resultado,n_df,tipo,fecha,lugar,alumno,area,year,month,number,periodo
8,RODRIGUEZ SANCHEZ ROSICELA ELIZABETH,41.577,78.261,126.368,246.206,94.842,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,,2018,8,I,2019
9,LLANOS SOLIS KIMBERLYN YEI,37.504,67.242,109.044,213.790,94.842,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,,2018,8,I,2019
10,ALVA PEREZ TAMARA ANTONELLA,48.918,53.192,109.036,211.146,94.842,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,,2018,8,I,2019
11,URIOL FLORES LIZ ROCIO,39.342,57.783,102.429,199.554,94.842,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,,2018,8,I,2019
12,VASQUEZ GUERRA CESAR IVAN,35.462,50.730,109.548,195.740,94.842,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,,2018,8,I,2019
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135172,LAZARO LLANOS RICHARD JOEL,31.799,42.197,73.996,,,ED.SEC: HISTORIA,NO,205,CEPUNT,2024-02-18,TRUJILLO,,,2024,2,II,2024
135173,ALBUJAR VIERA LUIS ALBERTO,28.948,27.507,56.455,,,ED.SEC: HISTORIA,NO,205,CEPUNT,2024-02-18,TRUJILLO,,,2024,2,II,2024
135174,POLO REYES JULIO MARTIN,14.262,24.459,38.721,,,ED.SEC: HISTORIA,NO,205,CEPUNT,2024-02-18,TRUJILLO,,,2024,2,II,2024
135175,ALFARO MEDRANO MARIA JOSE,18.346,18.952,37.298,,,ED.SEC: HISTORIA,NO,205,CEPUNT,2024-02-18,TRUJILLO,,,2024,2,II,2024


# RESETEANDO INDEX

In [69]:
results_with_titles.reset_index(inplace=True)
del results_with_titles['index']
results_with_titles

Unnamed: 0,names,r1,r2,r3,r4,r5,escuela,resultado,n_df,tipo,fecha,lugar,alumno,area,year,month,number,periodo
0,RODRIGUEZ SANCHEZ ROSICELA ELIZABETH,41.577,78.261,126.368,246.206,94.842,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,,2018,8,I,2019
1,LLANOS SOLIS KIMBERLYN YEI,37.504,67.242,109.044,213.790,94.842,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,,2018,8,I,2019
2,ALVA PEREZ TAMARA ANTONELLA,48.918,53.192,109.036,211.146,94.842,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,,2018,8,I,2019
3,URIOL FLORES LIZ ROCIO,39.342,57.783,102.429,199.554,94.842,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,,2018,8,I,2019
4,VASQUEZ GUERRA CESAR IVAN,35.462,50.730,109.548,195.740,94.842,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,,2018,8,I,2019
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115380,LAZARO LLANOS RICHARD JOEL,31.799,42.197,73.996,,,ED.SEC: HISTORIA,NO,205,CEPUNT,2024-02-18,TRUJILLO,,,2024,2,II,2024
115381,ALBUJAR VIERA LUIS ALBERTO,28.948,27.507,56.455,,,ED.SEC: HISTORIA,NO,205,CEPUNT,2024-02-18,TRUJILLO,,,2024,2,II,2024
115382,POLO REYES JULIO MARTIN,14.262,24.459,38.721,,,ED.SEC: HISTORIA,NO,205,CEPUNT,2024-02-18,TRUJILLO,,,2024,2,II,2024
115383,ALFARO MEDRANO MARIA JOSE,18.346,18.952,37.298,,,ED.SEC: HISTORIA,NO,205,CEPUNT,2024-02-18,TRUJILLO,,,2024,2,II,2024


# ELIMINADO PUNTAJES

In [79]:
filter_by_cepunt = results_with_titles['tipo'] == 'CEPUNT'

In [80]:
filter_by_periodo = results_with_titles['periodo'].isin([2019, 2020])

In [85]:
puntaje_4 = results_with_titles[(filter_by_cepunt) & (filter_by_periodo)]
puntaje_4

Unnamed: 0,names,r1,r2,r3,r4,r5,escuela,resultado,n_df,tipo,fecha,lugar,alumno,area,year,month,number,periodo
0,RODRIGUEZ SANCHEZ ROSICELA ELIZABETH,41.577,78.261,126.368,246.206,94.842,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,,2018,8,I,2019
1,LLANOS SOLIS KIMBERLYN YEI,37.504,67.242,109.044,213.790,94.842,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,,2018,8,I,2019
2,ALVA PEREZ TAMARA ANTONELLA,48.918,53.192,109.036,211.146,94.842,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,,2018,8,I,2019
3,URIOL FLORES LIZ ROCIO,39.342,57.783,102.429,199.554,94.842,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,,2018,8,I,2019
4,VASQUEZ GUERRA CESAR IVAN,35.462,50.730,109.548,195.740,94.842,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,,2018,8,I,2019
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31547,LAYZA SANCHEZ ELVIS ALEX,5.900,7.619,7.111,20.630,44.190,ING.DE MINAS,NO,37,CEPUNT,2020-02-09,HUAMACHUCO,,,2020,2,II,2020
31548,INFANTES REYES JONATAN,2.238,7.939,7.085,17.262,44.190,ING.DE MINAS,NO,37,CEPUNT,2020-02-09,HUAMACHUCO,,,2020,2,II,2020
31549,SANCHEZ FERNANDEZ SAMUEL JHONATAN,1.827,12.527,0.000,14.354,44.190,ING.DE MINAS,NO,37,CEPUNT,2020-02-09,HUAMACHUCO,,,2020,2,II,2020
31550,CAIPO POLO HILARIO,5.900,0.000,0.000,5.900,44.190,ING.DE MINAS,NO,37,CEPUNT,2020-02-09,HUAMACHUCO,,,2020,2,II,2020


In [112]:
# puntaje_4.drop(['r1', 'r2', 'r3', 'r5'], axis=1)
puntaje_4.rename(columns={'r4': 'puntaje'}, inplace=True)
puntaje_4

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  puntaje_4.rename(columns={'r4': 'puntaje'}, inplace=True)


Unnamed: 0,names,puntaje,escuela,resultado,n_df,tipo,fecha,lugar,alumno,area,year,month,number,periodo
0,RODRIGUEZ SANCHEZ ROSICELA ELIZABETH,246.206,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,,2018,8,I,2019
1,LLANOS SOLIS KIMBERLYN YEI,213.790,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,,2018,8,I,2019
2,ALVA PEREZ TAMARA ANTONELLA,211.146,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,,2018,8,I,2019
3,URIOL FLORES LIZ ROCIO,199.554,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,,2018,8,I,2019
4,VASQUEZ GUERRA CESAR IVAN,195.740,ADMINISTRACION,SI,0,CEPUNT,2018-08-12,TRUJILLO,,,2018,8,I,2019
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31547,LAYZA SANCHEZ ELVIS ALEX,20.630,ING.DE MINAS,NO,37,CEPUNT,2020-02-09,HUAMACHUCO,,,2020,2,II,2020
31548,INFANTES REYES JONATAN,17.262,ING.DE MINAS,NO,37,CEPUNT,2020-02-09,HUAMACHUCO,,,2020,2,II,2020
31549,SANCHEZ FERNANDEZ SAMUEL JHONATAN,14.354,ING.DE MINAS,NO,37,CEPUNT,2020-02-09,HUAMACHUCO,,,2020,2,II,2020
31550,CAIPO POLO HILARIO,5.900,ING.DE MINAS,NO,37,CEPUNT,2020-02-09,HUAMACHUCO,,,2020,2,II,2020


In [99]:
filter_by_tipo = results_with_titles['tipo'].isin(["EXTRAORDINARIO", "ORDINARIO"])
filter_by_periodo2 = results_with_titles['periodo'] == 2024
filter_by_number2 = results_with_titles['number'] == "II"


In [101]:
puntaje1 = results_with_titles[(filter_by_tipo) & (filter_by_periodo2) & (filter_by_number2)]
puntaje1

Unnamed: 0,names,r1,r2,r3,r4,r5,escuela,resultado,n_df,tipo,fecha,lugar,alumno,area,year,month,number,periodo
99603,CASTRO GARCIA PAULO GAMALIEL,147.835,,,,,ADMINISTRACION,SI,186,EXTRAORDINARIO,2024-03-03,TRUJILLO,,,2024,3,II,2024
99604,PONCE SALVADOR NAOMI BELEN,126.434,,,,,ADMINISTRACION,SI,186,EXTRAORDINARIO,2024-03-03,TRUJILLO,,,2024,3,II,2024
99605,PINTADO PEÑA LUCIANA NICOLLE,124.382,,,,,ADMINISTRACION,SI,186,EXTRAORDINARIO,2024-03-03,TRUJILLO,,,2024,3,II,2024
99606,LEZAMA DIAZ JENNIFER ANGHELINA,124.377,,,,,ADMINISTRACION,SI,186,EXTRAORDINARIO,2024-03-03,TRUJILLO,,,2024,3,II,2024
99607,ABILA VILLANUEVA ADRIANA AMPARITO,123.361,,,,,ADMINISTRACION,SI,186,EXTRAORDINARIO,2024-03-03,TRUJILLO,,,2024,3,II,2024
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107560,CASTILLO ROJAS HUGO GABRIEL,4.044,,,,,ING.AMBIENTAL,NO,199,ORDINARIO,2024-03-17,STGO.DE CHUCO,,,2024,3,II,2024
107561,RUIZ JACOBO MERLY MILAGROS,0.986,,,,,ING.AMBIENTAL,NO,199,ORDINARIO,2024-03-17,STGO.DE CHUCO,,,2024,3,II,2024
107562,ULLOA FLORES MANUEL JHAN FRAN,-0.100,,,,,ING.AMBIENTAL,NO,199,ORDINARIO,2024-03-17,STGO.DE CHUCO,,,2024,3,II,2024
107563,CONTRERAS GUTIERREZ BRAYAN MERCEDES,-4.159,,,,,ING.AMBIENTAL,NO,199,ORDINARIO,2024-03-17,STGO.DE CHUCO,,,2024,3,II,2024


In [111]:
# puntaje1 = puntaje1.drop(['r2', 'r3', 'r4', 'r5'], axis=1)
puntaje1.rename(columns={'r1': 'puntaje'}, inplace=True)
puntaje1

Unnamed: 0,names,puntaje,escuela,resultado,n_df,tipo,fecha,lugar,alumno,area,year,month,number,periodo
99603,CASTRO GARCIA PAULO GAMALIEL,147.835,ADMINISTRACION,SI,186,EXTRAORDINARIO,2024-03-03,TRUJILLO,,,2024,3,II,2024
99604,PONCE SALVADOR NAOMI BELEN,126.434,ADMINISTRACION,SI,186,EXTRAORDINARIO,2024-03-03,TRUJILLO,,,2024,3,II,2024
99605,PINTADO PEÑA LUCIANA NICOLLE,124.382,ADMINISTRACION,SI,186,EXTRAORDINARIO,2024-03-03,TRUJILLO,,,2024,3,II,2024
99606,LEZAMA DIAZ JENNIFER ANGHELINA,124.377,ADMINISTRACION,SI,186,EXTRAORDINARIO,2024-03-03,TRUJILLO,,,2024,3,II,2024
99607,ABILA VILLANUEVA ADRIANA AMPARITO,123.361,ADMINISTRACION,SI,186,EXTRAORDINARIO,2024-03-03,TRUJILLO,,,2024,3,II,2024
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107560,CASTILLO ROJAS HUGO GABRIEL,4.044,ING.AMBIENTAL,NO,199,ORDINARIO,2024-03-17,STGO.DE CHUCO,,,2024,3,II,2024
107561,RUIZ JACOBO MERLY MILAGROS,0.986,ING.AMBIENTAL,NO,199,ORDINARIO,2024-03-17,STGO.DE CHUCO,,,2024,3,II,2024
107562,ULLOA FLORES MANUEL JHAN FRAN,-0.100,ING.AMBIENTAL,NO,199,ORDINARIO,2024-03-17,STGO.DE CHUCO,,,2024,3,II,2024
107563,CONTRERAS GUTIERREZ BRAYAN MERCEDES,-4.159,ING.AMBIENTAL,NO,199,ORDINARIO,2024-03-17,STGO.DE CHUCO,,,2024,3,II,2024


In [114]:
filter_general = (((filter_by_cepunt) & (filter_by_periodo)) | ((filter_by_tipo) & (filter_by_periodo2) & (filter_by_number2))) 

puntaje3 = results_with_titles[~filter_general]

In [115]:
puntaje3 = puntaje3.drop(['r1', 'r2', 'r4', 'r5'], axis=1)
puntaje3 = puntaje3.rename(columns={'r3': 'puntaje'})
puntaje3

Unnamed: 0,names,puntaje,escuela,resultado,n_df,tipo,fecha,lugar,alumno,area,year,month,number,periodo
3266,MARCELO SOTO DILMER OLIVER,190.591,CCAS.BIOLOGICAS,SI,3,ORDINARIO,2018-09-22,TRUJILLO,,A,2018,9,I,2019
3267,AREVALO RAMIREZ DULCEMARIA JANINA,136.534,CCAS.BIOLOGICAS,SI,3,ORDINARIO,2018-09-22,TRUJILLO,,A,2018,9,I,2019
3268,RODRIGUEZ SEMINARIO CARMEN EMILIA,115.111,CCAS.BIOLOGICAS,SI,3,ORDINARIO,2018-09-22,TRUJILLO,,A,2018,9,I,2019
3269,RODRIGUEZ PESANTES DAIANA LISETT,115.080,CCAS.BIOLOGICAS,SI,3,ORDINARIO,2018-09-22,TRUJILLO,,A,2018,9,I,2019
3270,DIOSES IBAÑEZ GUILLERMO FRANCO,107.995,CCAS.BIOLOGICAS,SI,3,ORDINARIO,2018-09-22,TRUJILLO,,A,2018,9,I,2019
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115380,LAZARO LLANOS RICHARD JOEL,73.996,ED.SEC: HISTORIA,NO,205,CEPUNT,2024-02-18,TRUJILLO,,,2024,2,II,2024
115381,ALBUJAR VIERA LUIS ALBERTO,56.455,ED.SEC: HISTORIA,NO,205,CEPUNT,2024-02-18,TRUJILLO,,,2024,2,II,2024
115382,POLO REYES JULIO MARTIN,38.721,ED.SEC: HISTORIA,NO,205,CEPUNT,2024-02-18,TRUJILLO,,,2024,2,II,2024
115383,ALFARO MEDRANO MARIA JOSE,37.298,ED.SEC: HISTORIA,NO,205,CEPUNT,2024-02-18,TRUJILLO,,,2024,2,II,2024


# UNIENDO PUNTAJES

In [122]:
final_dirty = pd.concat(
    [
        puntaje1,
        puntaje3,
        puntaje_4
    ]
)
final_dirty

Unnamed: 0,names,puntaje,escuela,resultado,n_df,tipo,fecha,lugar,alumno,area,year,month,number,periodo
99603,CASTRO GARCIA PAULO GAMALIEL,147.835,ADMINISTRACION,SI,186,EXTRAORDINARIO,2024-03-03,TRUJILLO,,,2024,3,II,2024
99604,PONCE SALVADOR NAOMI BELEN,126.434,ADMINISTRACION,SI,186,EXTRAORDINARIO,2024-03-03,TRUJILLO,,,2024,3,II,2024
99605,PINTADO PEÑA LUCIANA NICOLLE,124.382,ADMINISTRACION,SI,186,EXTRAORDINARIO,2024-03-03,TRUJILLO,,,2024,3,II,2024
99606,LEZAMA DIAZ JENNIFER ANGHELINA,124.377,ADMINISTRACION,SI,186,EXTRAORDINARIO,2024-03-03,TRUJILLO,,,2024,3,II,2024
99607,ABILA VILLANUEVA ADRIANA AMPARITO,123.361,ADMINISTRACION,SI,186,EXTRAORDINARIO,2024-03-03,TRUJILLO,,,2024,3,II,2024
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31547,LAYZA SANCHEZ ELVIS ALEX,20.630,ING.DE MINAS,NO,37,CEPUNT,2020-02-09,HUAMACHUCO,,,2020,2,II,2020
31548,INFANTES REYES JONATAN,17.262,ING.DE MINAS,NO,37,CEPUNT,2020-02-09,HUAMACHUCO,,,2020,2,II,2020
31549,SANCHEZ FERNANDEZ SAMUEL JHONATAN,14.354,ING.DE MINAS,NO,37,CEPUNT,2020-02-09,HUAMACHUCO,,,2020,2,II,2020
31550,CAIPO POLO HILARIO,5.900,ING.DE MINAS,NO,37,CEPUNT,2020-02-09,HUAMACHUCO,,,2020,2,II,2020


In [123]:
final_dirty = final_dirty.reset_index()
final_dirty = final_dirty.drop(['index', 'n_df', 'year', 'month'], axis=1)
final_dirty

Unnamed: 0,names,puntaje,escuela,resultado,tipo,fecha,lugar,alumno,area,number,periodo
0,CASTRO GARCIA PAULO GAMALIEL,147.835,ADMINISTRACION,SI,EXTRAORDINARIO,2024-03-03,TRUJILLO,,,II,2024
1,PONCE SALVADOR NAOMI BELEN,126.434,ADMINISTRACION,SI,EXTRAORDINARIO,2024-03-03,TRUJILLO,,,II,2024
2,PINTADO PEÑA LUCIANA NICOLLE,124.382,ADMINISTRACION,SI,EXTRAORDINARIO,2024-03-03,TRUJILLO,,,II,2024
3,LEZAMA DIAZ JENNIFER ANGHELINA,124.377,ADMINISTRACION,SI,EXTRAORDINARIO,2024-03-03,TRUJILLO,,,II,2024
4,ABILA VILLANUEVA ADRIANA AMPARITO,123.361,ADMINISTRACION,SI,EXTRAORDINARIO,2024-03-03,TRUJILLO,,,II,2024
...,...,...,...,...,...,...,...,...,...,...,...
115380,LAYZA SANCHEZ ELVIS ALEX,20.630,ING.DE MINAS,NO,CEPUNT,2020-02-09,HUAMACHUCO,,,II,2020
115381,INFANTES REYES JONATAN,17.262,ING.DE MINAS,NO,CEPUNT,2020-02-09,HUAMACHUCO,,,II,2020
115382,SANCHEZ FERNANDEZ SAMUEL JHONATAN,14.354,ING.DE MINAS,NO,CEPUNT,2020-02-09,HUAMACHUCO,,,II,2020
115383,CAIPO POLO HILARIO,5.900,ING.DE MINAS,NO,CEPUNT,2020-02-09,HUAMACHUCO,,,II,2020
