Define ```displayFileLink``` function that displays a link of a file with ```filename``` string filename prefixed by the message given by ```message``` string.

In [None]:
def displayFileLink(message, filename):
    divText = '<div style="display:inline-block">' + message + '&nbsp</div>'
    file = FileLink(filename, result_html_prefix=divText)
    display(file)

Define ```downloadPDF``` function that downloads a PDF file from a link provided by ```url``` string and rename it to ```filename_pdf``` input string.

In [None]:
def downloadPDF(url, pdf_filename):
    !wget $url -O $pdf_filename
    displayFileLink('PDF file saved as:', pdf_filename)

Define ```convertPDFtoCSV``` function that converts a PDF file given by ```pdf_filename``` string file name to a CSV file with filename given by ```csv_filename``` input (*credits to [tabula-java team](https://github.com/tabulapdf/tabula-java)*).

In [None]:
def convertPDFtoCSV(pdf_filename, csv_filename):
    !java -Dfile.encoding=utf-8 -jar tabula.jar -l --pages 3 $pdf_filename -o $csv_filename
    displayFileLink('CSV file saved as:', csv_filename)

Define ```convertCSVtoJSON``` function that converts a CSV file with filename given by ```csv_filename``` string input to a JSON properly processed file with filename given by ```json_filename``` input and data given by ```json_data``` output.

In [None]:
def convertCSVtoJSON(csv_filename, json_filename):
    with open(csv_filename, encoding="utf-8") as file:
        import csv
        csv_data = csv.reader(file, delimiter=',', quotechar='"')
        week_names = ('segunda','terça','quarta','quinta','sexta','sábado','domingo')
        full_data = []
        index = -1
        for row in csv_data:
            index = index + 1
            if index:
    #             print(', '.join(row).replace('\r',''))
    #             print()
                column = 0
                for cell in row:
                    column = column + 1
                    data = cell.replace('\r','').replace('\n',' ').replace(' , ',', ').strip()
                    if   data == '¬': data = ''
                    elif data == '0': data = ''

                    # Código
                    if column == 1:
                        codigo = data.upper()

                    # Disciplina - turma
                    elif column == 2:
                        # Campus
                        data, _, campus = data.rpartition('(')
                        campus = title_pos_tag(campus[:-1])

                        # Disciplina
                        disciplina, _, data = data.strip().rpartition(' ')
                        disciplina = title_pos_tag(disciplina)

                        # Turma e período
                        turma, _, periodo = data.strip().rpartition('-')
                        turma   = turma.upper()
                        periodo = periodo.capitalize()

                        # Subcódigo
                        subcodigo, _, _ = codigo.partition('-')
                        subcodigo = subcodigo[len(turma)+1:]


                    # Teoria
                    elif column == 3:
                        for week in week_names:
                            data = data.replace(week, '\n' + week)
                        teoria = data.replace(', \n','\n').strip().splitlines()

                        teoria_num_of_days = len(teoria)
                        teoria_dia_da_semana = [None]*teoria_num_of_days
                        teoria_entrada       = [None]*teoria_num_of_days
                        teoria_saida         = [None]*teoria_num_of_days
                        teoria_sala          = [None]*teoria_num_of_days
                        teoria_frequencia    = [None]*teoria_num_of_days
                        for day in range(teoria_num_of_days):
                            data = teoria[day]
                            teoria_dia_da_semana[day], _, data                   = data.partition(' das ')
                            teoria_entrada[day],       _, data                   = data.partition(' às ')
                            teoria_saida[day],         _, data                   = data.partition(', sala ')
                            teoria_sala[day],          _, teoria_frequencia[day] = data.partition(', ')

                            teoria_dia_da_semana[day] = teoria_dia_da_semana[day].capitalize()
                            teoria_frequencia[day]    = teoria_frequencia[day].capitalize()
                            teoria_sala[day]          = teoria_sala[day].upper()

                    # Prática
                    elif column == 4:
                        for week in week_names:
                            data = data.replace(week, '\n' + week)
                        pratica = data.replace(',\n','\n').strip().splitlines()

                        pratica_num_of_days = len(pratica)
                        pratica_dia_da_semana = [None]*pratica_num_of_days
                        pratica_entrada       = [None]*pratica_num_of_days
                        pratica_saida         = [None]*pratica_num_of_days
                        pratica_sala          = [None]*pratica_num_of_days
                        pratica_frequencia    = [None]*pratica_num_of_days
                        for day in range(pratica_num_of_days):
                            data = pratica[day]
                            pratica_dia_da_semana[day], _, data                    = data.partition(' das ')
                            pratica_entrada[day],       _, data                    = data.partition(' às ')
                            pratica_saida[day],         _, data                    = data.partition(', sala ')
                            pratica_sala[day],          _, pratica_frequencia[day] = data.partition(', ')

                            pratica_dia_da_semana[day] = pratica_dia_da_semana[day].capitalize()
                            pratica_frequencia[day]    = pratica_frequencia[day].capitalize()
                            pratica_sala[day]          = pratica_sala[day].upper()

                    # Docente teoria
                    elif column == 5:
                        docente_teoria = title_pos_tag(data)

                    # Docente prática
                    elif column == 6:
                        docente_pratica = title_pos_tag(data)

                teoria = []
                i = 0
                for day in range(teoria_num_of_days):
                    i = i + 1
                    teoria_new = {'id': i,
                                  'dia_da_semana': teoria_dia_da_semana[day],
                                  'horario_de_entrada': teoria_entrada[day],
                                  'horario_de_saida': teoria_saida[day],
                                  'sala': teoria_sala[day],
                                  'frequencia': teoria_frequencia[day]}
                    teoria.append(teoria_new)

                pratica = []
                i = -1
                for day in range(pratica_num_of_days):
                    i = i + 1
                    pratica_new = {'id': i,
                                   'dia_da_semana': pratica_dia_da_semana[day],
                                   'horario_de_entrada': pratica_entrada[day],
                                   'horario_de_saida': pratica_saida[day],
                                   'sala': pratica_sala[day],
                                   'frequencia': pratica_frequencia[day]}
                    pratica.append(pratica_new)

                new_data = {'id': index-1,
                            'codigo': codigo,
                            'subcodigo': subcodigo,
                            'disciplina': disciplina,
                            'campus': campus,
                            'periodo': periodo,
                            'turma': turma,
                            'teoria': teoria,
                            'pratica': pratica,
                            'docente_teoria': docente_teoria,
                            'docente_pratica': docente_pratica}
                full_data.append(new_data)

        with open(json_filename, 'w') as file:
            import json
            json.dump(full_data, file)
            displayFileLink('JSON file saved as:', json_filename)
        with open(json_filename, 'r') as file:
            json_data = json.load(file)
            return json_data

Define ```convertJSONtoSheet``` function that processes a JSON file with data given by ```json_data``` into a spreadsheet.

In [None]:
def convertJSONtoSheet(json_data):
    import pandas as pd
    import qgrid
    
    id              = pd.Series()
    codigo          = pd.Series()
    subcodigo       = pd.Series()
    disciplina      = pd.Series()
    campus          = pd.Series()
    periodo         = pd.Series()
    turma           = pd.Series()
#     teoria          = pd.Series()
#     pratica         = pd.Series()
    docente_teoria  = pd.Series()
    docente_pratica = pd.Series()
    
    for disciplina in json_data:
        index = str(disciplina['id'])
        id[index]              = disciplina['id']
        codigo[index]          = disciplina['codigo']
        subcodigo[index]       = disciplina['subcodigo']
        disciplina[index]      = disciplina['disciplina']
        campus[index]          = disciplina['campus']
        periodo[index]         = disciplina['periodo']
        turma[index]           = disciplina['turma']
#         teoria[index]          = disciplina['teoria']
#         pratica[index]         = disciplina['pratica']
        docente_teoria[index]  = disciplina['docente_teoria']
        docente_pratica[index] = disciplina['docente_pratica']
        print(disciplina)
        
    df = pd.DataFrame({
        'ID'              : id,
        'Código'          : codigo,
        'Subcódigo'       : subcodigo,
        'Disciplina'      : disciplina,
        'Campus'          : campus,
        'Período'         : periodo,
        'Turma'           : turma,
#         'Teoria'          : teoria,
#         'Prática'         : pratica,
        'Docente teoria'  : docente_teoria,
        'Docente prática' : docente_pratica
    })
    
    sheet = qgrid.QgridWidget(df=df)
    return sheet

In [16]:
# a = pd.Series()

In [19]:
# a['0'] = 1

In [20]:
# a

0    1
dtype: int64