In [45]:
import pandas as pd
import pandera as pa

# Data Extraction

In [46]:
df = pd.read_csv("ocorrencia_2010_2020.csv", sep = ";", parse_dates=['ocorrencia_dia'], dayfirst = True)
df.head(10)

Unnamed: 0,codigo_ocorrencia,codigo_ocorrencia2,ocorrencia_classificacao,ocorrencia_cidade,ocorrencia_uf,ocorrencia_aerodromo,ocorrencia_dia,ocorrencia_hora,total_recomendacoes
0,40211,40211,INCIDENTE,RIO DE JANEIRO,RJ,****,2010-01-03,12:00:00,0
1,40349,40349,INCIDENTE,BELÉM,PA,SBBE,2010-01-03,11:05:00,0
2,40351,40351,INCIDENTE,RIO DE JANEIRO,RJ,SBRJ,2010-01-03,03:00:00,0
3,39527,39527,ACIDENTE,LUCAS DO RIO VERDE,MT,****,2010-01-04,17:30:00,0
4,40324,40324,INCIDENTE,PELOTAS,RS,SBPK,2010-01-05,19:25:00,0
5,39807,39807,INCIDENTE,SALVADOR,BA,****,2010-01-06,17:53:00,0
6,40215,40215,INCIDENTE,COARI,AM,SBUY,2010-01-07,18:40:00,0
7,39707,39707,INCIDENTE GRAVE,CANUTAMA,AM,****,2010-01-09,12:30:00,3
8,39156,39156,INCIDENTE GRAVE,CASCAVEL,PR,SBCA,2010-01-10,23:15:00,2
9,39711,39711,INCIDENTE GRAVE,PARÁ DE MINAS,MG,****,2010-01-10,20:00:00,0


Validation

In [47]:
schema = pa.DataFrameSchema(
    columns = {
        "codigo": pa.Column(pa.Int, required=False),
        "codigo_ocorrencia": pa.Column(pa.Int),
        "codigo_ocorrencia2": pa.Column(pa.Int),
        "ocorrencia_classificacao": pa.Column(pa.String),
        "ocorrencia_cidade": pa.Column(pa.String),
        "ocorrencia_uf": pa.Column(pa.String, pa.Check.str_length(2,2)),
        "ocorrencia_aerodromo": pa.Column(pa.String),
        "ocorrencia_dia": pa.Column(pa.DateTime),
        "ocorrencia_hora": pa.Column(pa.String,
                                     pa.Check.str_matches(r'^([0-1]?[0-9]|[2][0-3]):([0-5][0-9])(:[0-5][0-9])?$'),
                                     nullable = True),
        "total_recomendacoes": pa.Column(pa.Int)
    }
)

In [48]:
schema.validate(df)

Unnamed: 0,codigo_ocorrencia,codigo_ocorrencia2,ocorrencia_classificacao,ocorrencia_cidade,ocorrencia_uf,ocorrencia_aerodromo,ocorrencia_dia,ocorrencia_hora,total_recomendacoes
0,40211,40211,INCIDENTE,RIO DE JANEIRO,RJ,****,2010-01-03,12:00:00,0
1,40349,40349,INCIDENTE,BELÉM,PA,SBBE,2010-01-03,11:05:00,0
2,40351,40351,INCIDENTE,RIO DE JANEIRO,RJ,SBRJ,2010-01-03,03:00:00,0
3,39527,39527,ACIDENTE,LUCAS DO RIO VERDE,MT,****,2010-01-04,17:30:00,0
4,40324,40324,INCIDENTE,PELOTAS,RS,SBPK,2010-01-05,19:25:00,0
...,...,...,...,...,...,...,...,...,...
5747,79804,79804,INCIDENTE,CAMPINAS,SP,SBKP,2020-12-29,19:00:00,0
5748,79757,79757,INCIDENTE GRAVE,LAGOA DA CONFUSÃO,TO,****,2020-12-30,18:30:00,0
5749,79802,79802,INCIDENTE,RIO DE JANEIRO,RJ,SBGL,2020-12-30,00:54:00,0
5750,79756,79756,INCIDENTE GRAVE,VICENTINA,MS,****,2020-12-31,09:00:00,0


In [5]:
df.loc[1, 'ocorrencia_cidade']

'BELÉM'

In [6]:
df.loc[1:3]

Unnamed: 0,codigo_ocorrencia,codigo_ocorrencia2,ocorrencia_classificacao,ocorrencia_cidade,ocorrencia_uf,ocorrencia_aerodromo,ocorrencia_dia,ocorrencia_hora,total_recomendacoes
1,40349,40349,INCIDENTE,BELÉM,PA,SBBE,2010-01-03,11:05:00,0
2,40351,40351,INCIDENTE,RIO DE JANEIRO,RJ,SBRJ,2010-01-03,03:00:00,0
3,39527,39527,ACIDENTE,LUCAS DO RIO VERDE,MT,****,2010-01-04,17:30:00,0


In [7]:
df.loc[[10,40]]

Unnamed: 0,codigo_ocorrencia,codigo_ocorrencia2,ocorrencia_classificacao,ocorrencia_cidade,ocorrencia_uf,ocorrencia_aerodromo,ocorrencia_dia,ocorrencia_hora,total_recomendacoes
10,39789,39789,INCIDENTE,SÃO PEDRO DO SUL,RS,****,2010-01-10,21:30:00,0
40,39158,39158,INCIDENTE,BELÉM,PA,****,2010-01-28,16:00:00,0


# Data Cleaning

In [8]:
df.loc[:, 'ocorrencia_cidade']

0           RIO DE JANEIRO
1                    BELÉM
2           RIO DE JANEIRO
3       LUCAS DO RIO VERDE
4                  PELOTAS
               ...        
5747              CAMPINAS
5748     LAGOA DA CONFUSÃO
5749        RIO DE JANEIRO
5750             VICENTINA
5751        RIO DE JANEIRO
Name: ocorrencia_cidade, Length: 5752, dtype: object

In [9]:
df.codigo_ocorrencia.is_unique

True

In [10]:
df.set_index('codigo_ocorrencia', inplace=True)
df.head()

Unnamed: 0_level_0,codigo_ocorrencia2,ocorrencia_classificacao,ocorrencia_cidade,ocorrencia_uf,ocorrencia_aerodromo,ocorrencia_dia,ocorrencia_hora,total_recomendacoes
codigo_ocorrencia,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
40211,40211,INCIDENTE,RIO DE JANEIRO,RJ,****,2010-01-03,12:00:00,0
40349,40349,INCIDENTE,BELÉM,PA,SBBE,2010-01-03,11:05:00,0
40351,40351,INCIDENTE,RIO DE JANEIRO,RJ,SBRJ,2010-01-03,03:00:00,0
39527,39527,ACIDENTE,LUCAS DO RIO VERDE,MT,****,2010-01-04,17:30:00,0
40324,40324,INCIDENTE,PELOTAS,RS,SBPK,2010-01-05,19:25:00,0


In [11]:
df.loc[40324]

codigo_ocorrencia2                        40324
ocorrencia_classificacao              INCIDENTE
ocorrencia_cidade                       PELOTAS
ocorrencia_uf                                RS
ocorrencia_aerodromo                       SBPK
ocorrencia_dia              2010-01-05 00:00:00
ocorrencia_hora                        19:25:00
total_recomendacoes                           0
Name: 40324, dtype: object

In [12]:
df.reset_index(drop=True, inplace=True)

In [13]:
df.head()

Unnamed: 0,codigo_ocorrencia2,ocorrencia_classificacao,ocorrencia_cidade,ocorrencia_uf,ocorrencia_aerodromo,ocorrencia_dia,ocorrencia_hora,total_recomendacoes
0,40211,INCIDENTE,RIO DE JANEIRO,RJ,****,2010-01-03,12:00:00,0
1,40349,INCIDENTE,BELÉM,PA,SBBE,2010-01-03,11:05:00,0
2,40351,INCIDENTE,RIO DE JANEIRO,RJ,SBRJ,2010-01-03,03:00:00,0
3,39527,ACIDENTE,LUCAS DO RIO VERDE,MT,****,2010-01-04,17:30:00,0
4,40324,INCIDENTE,PELOTAS,RS,SBPK,2010-01-05,19:25:00,0


In [14]:
df.loc[0,'ocorrencia_aerodromo'] = ''

In [15]:
df.head(1)

Unnamed: 0,codigo_ocorrencia2,ocorrencia_classificacao,ocorrencia_cidade,ocorrencia_uf,ocorrencia_aerodromo,ocorrencia_dia,ocorrencia_hora,total_recomendacoes
0,40211,INCIDENTE,RIO DE JANEIRO,RJ,,2010-01-03,12:00:00,0


In [49]:
df.head(1)

Unnamed: 0,codigo_ocorrencia,codigo_ocorrencia2,ocorrencia_classificacao,ocorrencia_cidade,ocorrencia_uf,ocorrencia_aerodromo,ocorrencia_dia,ocorrencia_hora,total_recomendacoes
0,40211,40211,INCIDENTE,RIO DE JANEIRO,RJ,****,2010-01-03,12:00:00,0


In [50]:
df.loc[1] = 20

In [51]:
df.head(2)

Unnamed: 0,codigo_ocorrencia,codigo_ocorrencia2,ocorrencia_classificacao,ocorrencia_cidade,ocorrencia_uf,ocorrencia_aerodromo,ocorrencia_dia,ocorrencia_hora,total_recomendacoes
0,40211,40211,INCIDENTE,RIO DE JANEIRO,RJ,****,2010-01-03 00:00:00,12:00:00,0
1,20,20,20,20,20,20,20,20,20


In [52]:
df.loc[:, 'total_recomendacoes'] = 10
df

Unnamed: 0,codigo_ocorrencia,codigo_ocorrencia2,ocorrencia_classificacao,ocorrencia_cidade,ocorrencia_uf,ocorrencia_aerodromo,ocorrencia_dia,ocorrencia_hora,total_recomendacoes
0,40211,40211,INCIDENTE,RIO DE JANEIRO,RJ,****,2010-01-03 00:00:00,12:00:00,10
1,20,20,20,20,20,20,20,20,10
2,40351,40351,INCIDENTE,RIO DE JANEIRO,RJ,SBRJ,2010-01-03 00:00:00,03:00:00,10
3,39527,39527,ACIDENTE,LUCAS DO RIO VERDE,MT,****,2010-01-04 00:00:00,17:30:00,10
4,40324,40324,INCIDENTE,PELOTAS,RS,SBPK,2010-01-05 00:00:00,19:25:00,10
...,...,...,...,...,...,...,...,...,...
5747,79804,79804,INCIDENTE,CAMPINAS,SP,SBKP,2020-12-29 00:00:00,19:00:00,10
5748,79757,79757,INCIDENTE GRAVE,LAGOA DA CONFUSÃO,TO,****,2020-12-30 00:00:00,18:30:00,10
5749,79802,79802,INCIDENTE,RIO DE JANEIRO,RJ,SBGL,2020-12-30 00:00:00,00:54:00,10
5750,79756,79756,INCIDENTE GRAVE,VICENTINA,MS,****,2020-12-31 00:00:00,09:00:00,10


In [53]:
df.loc[df.ocorrencia_uf == 'SP', ['ocorrencia_classificacao']] = 'GRAVE'
df

Unnamed: 0,codigo_ocorrencia,codigo_ocorrencia2,ocorrencia_classificacao,ocorrencia_cidade,ocorrencia_uf,ocorrencia_aerodromo,ocorrencia_dia,ocorrencia_hora,total_recomendacoes
0,40211,40211,INCIDENTE,RIO DE JANEIRO,RJ,****,2010-01-03 00:00:00,12:00:00,10
1,20,20,20,20,20,20,20,20,10
2,40351,40351,INCIDENTE,RIO DE JANEIRO,RJ,SBRJ,2010-01-03 00:00:00,03:00:00,10
3,39527,39527,ACIDENTE,LUCAS DO RIO VERDE,MT,****,2010-01-04 00:00:00,17:30:00,10
4,40324,40324,INCIDENTE,PELOTAS,RS,SBPK,2010-01-05 00:00:00,19:25:00,10
...,...,...,...,...,...,...,...,...,...
5747,79804,79804,GRAVE,CAMPINAS,SP,SBKP,2020-12-29 00:00:00,19:00:00,10
5748,79757,79757,INCIDENTE GRAVE,LAGOA DA CONFUSÃO,TO,****,2020-12-30 00:00:00,18:30:00,10
5749,79802,79802,INCIDENTE,RIO DE JANEIRO,RJ,SBGL,2020-12-30 00:00:00,00:54:00,10
5750,79756,79756,INCIDENTE GRAVE,VICENTINA,MS,****,2020-12-31 00:00:00,09:00:00,10


In [54]:
df.loc[df.ocorrencia_uf == 'SP']
df

Unnamed: 0,codigo_ocorrencia,codigo_ocorrencia2,ocorrencia_classificacao,ocorrencia_cidade,ocorrencia_uf,ocorrencia_aerodromo,ocorrencia_dia,ocorrencia_hora,total_recomendacoes
0,40211,40211,INCIDENTE,RIO DE JANEIRO,RJ,****,2010-01-03 00:00:00,12:00:00,10
1,20,20,20,20,20,20,20,20,10
2,40351,40351,INCIDENTE,RIO DE JANEIRO,RJ,SBRJ,2010-01-03 00:00:00,03:00:00,10
3,39527,39527,ACIDENTE,LUCAS DO RIO VERDE,MT,****,2010-01-04 00:00:00,17:30:00,10
4,40324,40324,INCIDENTE,PELOTAS,RS,SBPK,2010-01-05 00:00:00,19:25:00,10
...,...,...,...,...,...,...,...,...,...
5747,79804,79804,GRAVE,CAMPINAS,SP,SBKP,2020-12-29 00:00:00,19:00:00,10
5748,79757,79757,INCIDENTE GRAVE,LAGOA DA CONFUSÃO,TO,****,2020-12-30 00:00:00,18:30:00,10
5749,79802,79802,INCIDENTE,RIO DE JANEIRO,RJ,SBGL,2020-12-30 00:00:00,00:54:00,10
5750,79756,79756,INCIDENTE GRAVE,VICENTINA,MS,****,2020-12-31 00:00:00,09:00:00,10


In [55]:
df.loc[df.ocorrencia_aerodromo == '****', ['ocorrencia_aerodromo']] = pd.NA

In [56]:
df.head()

Unnamed: 0,codigo_ocorrencia,codigo_ocorrencia2,ocorrencia_classificacao,ocorrencia_cidade,ocorrencia_uf,ocorrencia_aerodromo,ocorrencia_dia,ocorrencia_hora,total_recomendacoes
0,40211,40211,INCIDENTE,RIO DE JANEIRO,RJ,,2010-01-03 00:00:00,12:00:00,10
1,20,20,20,20,20,20,20,20,10
2,40351,40351,INCIDENTE,RIO DE JANEIRO,RJ,SBRJ,2010-01-03 00:00:00,03:00:00,10
3,39527,39527,ACIDENTE,LUCAS DO RIO VERDE,MT,,2010-01-04 00:00:00,17:30:00,10
4,40324,40324,INCIDENTE,PELOTAS,RS,SBPK,2010-01-05 00:00:00,19:25:00,10


In [58]:
df.replace(['**','###!','####', '*****','NULL'], pd.NA, inplace=True)

In [59]:
df

Unnamed: 0,codigo_ocorrencia,codigo_ocorrencia2,ocorrencia_classificacao,ocorrencia_cidade,ocorrencia_uf,ocorrencia_aerodromo,ocorrencia_dia,ocorrencia_hora,total_recomendacoes
0,40211,40211,INCIDENTE,RIO DE JANEIRO,RJ,,2010-01-03 00:00:00,12:00:00,10
1,20,20,20,20,20,20,20,20,10
2,40351,40351,INCIDENTE,RIO DE JANEIRO,RJ,SBRJ,2010-01-03 00:00:00,03:00:00,10
3,39527,39527,ACIDENTE,LUCAS DO RIO VERDE,MT,,2010-01-04 00:00:00,17:30:00,10
4,40324,40324,INCIDENTE,PELOTAS,RS,SBPK,2010-01-05 00:00:00,19:25:00,10
...,...,...,...,...,...,...,...,...,...
5747,79804,79804,GRAVE,CAMPINAS,SP,SBKP,2020-12-29 00:00:00,19:00:00,10
5748,79757,79757,INCIDENTE GRAVE,LAGOA DA CONFUSÃO,TO,,2020-12-30 00:00:00,18:30:00,10
5749,79802,79802,INCIDENTE,RIO DE JANEIRO,RJ,SBGL,2020-12-30 00:00:00,00:54:00,10
5750,79756,79756,INCIDENTE GRAVE,VICENTINA,MS,,2020-12-31 00:00:00,09:00:00,10


In [60]:
df.isna().sum()

codigo_ocorrencia              0
codigo_ocorrencia2             0
ocorrencia_classificacao       0
ocorrencia_cidade              0
ocorrencia_uf                  0
ocorrencia_aerodromo        2181
ocorrencia_dia                 0
ocorrencia_hora                1
total_recomendacoes            0
dtype: int64

In [61]:
df.isnull().sum()

codigo_ocorrencia              0
codigo_ocorrencia2             0
ocorrencia_classificacao       0
ocorrencia_cidade              0
ocorrencia_uf                  0
ocorrencia_aerodromo        2181
ocorrencia_dia                 0
ocorrencia_hora                1
total_recomendacoes            0
dtype: int64

In [62]:
df.fillna(10, inplace=True)
df.isnull().sum()

codigo_ocorrencia           0
codigo_ocorrencia2          0
ocorrencia_classificacao    0
ocorrencia_cidade           0
ocorrencia_uf               0
ocorrencia_aerodromo        0
ocorrencia_dia              0
ocorrencia_hora             0
total_recomendacoes         0
dtype: int64

In [28]:
df.replace([10], pd.NA, inplace=True)

In [63]:
df.isnull().sum()

codigo_ocorrencia           0
codigo_ocorrencia2          0
ocorrencia_classificacao    0
ocorrencia_cidade           0
ocorrencia_uf               0
ocorrencia_aerodromo        0
ocorrencia_dia              0
ocorrencia_hora             0
total_recomendacoes         0
dtype: int64

In [64]:
df.head()

Unnamed: 0,codigo_ocorrencia,codigo_ocorrencia2,ocorrencia_classificacao,ocorrencia_cidade,ocorrencia_uf,ocorrencia_aerodromo,ocorrencia_dia,ocorrencia_hora,total_recomendacoes
0,40211,40211,INCIDENTE,RIO DE JANEIRO,RJ,10,2010-01-03 00:00:00,12:00:00,10
1,20,20,20,20,20,20,20,20,10
2,40351,40351,INCIDENTE,RIO DE JANEIRO,RJ,SBRJ,2010-01-03 00:00:00,03:00:00,10
3,39527,39527,ACIDENTE,LUCAS DO RIO VERDE,MT,10,2010-01-04 00:00:00,17:30:00,10
4,40324,40324,INCIDENTE,PELOTAS,RS,SBPK,2010-01-05 00:00:00,19:25:00,10


In [65]:
df.replace([10], pd.NA, inplace=True)
df.isnull().sum()

codigo_ocorrencia              0
codigo_ocorrencia2             0
ocorrencia_classificacao       0
ocorrencia_cidade              0
ocorrencia_uf                  0
ocorrencia_aerodromo        2181
ocorrencia_dia                 0
ocorrencia_hora                1
total_recomendacoes         5752
dtype: int64

In [66]:
df.fillna(value={'total_recomendacoes':10}, inplace=True)
df.isnull().sum()

codigo_ocorrencia              0
codigo_ocorrencia2             0
ocorrencia_classificacao       0
ocorrencia_cidade              0
ocorrencia_uf                  0
ocorrencia_aerodromo        2181
ocorrencia_dia                 0
ocorrencia_hora                1
total_recomendacoes            0
dtype: int64

In [67]:
df['total_recomendacoes_bkp'] = df.total_recomendacoes
df.head()

Unnamed: 0,codigo_ocorrencia,codigo_ocorrencia2,ocorrencia_classificacao,ocorrencia_cidade,ocorrencia_uf,ocorrencia_aerodromo,ocorrencia_dia,ocorrencia_hora,total_recomendacoes,total_recomendacoes_bkp
0,40211,40211,INCIDENTE,RIO DE JANEIRO,RJ,,2010-01-03 00:00:00,12:00:00,10.0,10.0
1,20,20,20,20,20,20,20,20,10.0,10.0
2,40351,40351,INCIDENTE,RIO DE JANEIRO,RJ,SBRJ,2010-01-03 00:00:00,03:00:00,10.0,10.0
3,39527,39527,ACIDENTE,LUCAS DO RIO VERDE,MT,,2010-01-04 00:00:00,17:30:00,10.0,10.0
4,40324,40324,INCIDENTE,PELOTAS,RS,SBPK,2010-01-05 00:00:00,19:25:00,10.0,10.0


In [68]:
df.drop(['total_recomendacoes_bkp'], axis=1, inplace=True)

In [69]:
df.head()

Unnamed: 0,codigo_ocorrencia,codigo_ocorrencia2,ocorrencia_classificacao,ocorrencia_cidade,ocorrencia_uf,ocorrencia_aerodromo,ocorrencia_dia,ocorrencia_hora,total_recomendacoes
0,40211,40211,INCIDENTE,RIO DE JANEIRO,RJ,,2010-01-03 00:00:00,12:00:00,10.0
1,20,20,20,20,20,20,20,20,10.0
2,40351,40351,INCIDENTE,RIO DE JANEIRO,RJ,SBRJ,2010-01-03 00:00:00,03:00:00,10.0
3,39527,39527,ACIDENTE,LUCAS DO RIO VERDE,MT,,2010-01-04 00:00:00,17:30:00,10.0
4,40324,40324,INCIDENTE,PELOTAS,RS,SBPK,2010-01-05 00:00:00,19:25:00,10.0


In [70]:
df.dropna()
df

Unnamed: 0,codigo_ocorrencia,codigo_ocorrencia2,ocorrencia_classificacao,ocorrencia_cidade,ocorrencia_uf,ocorrencia_aerodromo,ocorrencia_dia,ocorrencia_hora,total_recomendacoes
0,40211,40211,INCIDENTE,RIO DE JANEIRO,RJ,,2010-01-03 00:00:00,12:00:00,10.0
1,20,20,20,20,20,20,20,20,10.0
2,40351,40351,INCIDENTE,RIO DE JANEIRO,RJ,SBRJ,2010-01-03 00:00:00,03:00:00,10.0
3,39527,39527,ACIDENTE,LUCAS DO RIO VERDE,MT,,2010-01-04 00:00:00,17:30:00,10.0
4,40324,40324,INCIDENTE,PELOTAS,RS,SBPK,2010-01-05 00:00:00,19:25:00,10.0
...,...,...,...,...,...,...,...,...,...
5747,79804,79804,GRAVE,CAMPINAS,SP,SBKP,2020-12-29 00:00:00,19:00:00,10.0
5748,79757,79757,INCIDENTE GRAVE,LAGOA DA CONFUSÃO,TO,,2020-12-30 00:00:00,18:30:00,10.0
5749,79802,79802,INCIDENTE,RIO DE JANEIRO,RJ,SBGL,2020-12-30 00:00:00,00:54:00,10.0
5750,79756,79756,INCIDENTE GRAVE,VICENTINA,MS,,2020-12-31 00:00:00,09:00:00,10.0


In [71]:
df.dropna(subset=['ocorrencia_uf'])

Unnamed: 0,codigo_ocorrencia,codigo_ocorrencia2,ocorrencia_classificacao,ocorrencia_cidade,ocorrencia_uf,ocorrencia_aerodromo,ocorrencia_dia,ocorrencia_hora,total_recomendacoes
0,40211,40211,INCIDENTE,RIO DE JANEIRO,RJ,,2010-01-03 00:00:00,12:00:00,10.0
1,20,20,20,20,20,20,20,20,10.0
2,40351,40351,INCIDENTE,RIO DE JANEIRO,RJ,SBRJ,2010-01-03 00:00:00,03:00:00,10.0
3,39527,39527,ACIDENTE,LUCAS DO RIO VERDE,MT,,2010-01-04 00:00:00,17:30:00,10.0
4,40324,40324,INCIDENTE,PELOTAS,RS,SBPK,2010-01-05 00:00:00,19:25:00,10.0
...,...,...,...,...,...,...,...,...,...
5747,79804,79804,GRAVE,CAMPINAS,SP,SBKP,2020-12-29 00:00:00,19:00:00,10.0
5748,79757,79757,INCIDENTE GRAVE,LAGOA DA CONFUSÃO,TO,,2020-12-30 00:00:00,18:30:00,10.0
5749,79802,79802,INCIDENTE,RIO DE JANEIRO,RJ,SBGL,2020-12-30 00:00:00,00:54:00,10.0
5750,79756,79756,INCIDENTE GRAVE,VICENTINA,MS,,2020-12-31 00:00:00,09:00:00,10.0


In [72]:
df

Unnamed: 0,codigo_ocorrencia,codigo_ocorrencia2,ocorrencia_classificacao,ocorrencia_cidade,ocorrencia_uf,ocorrencia_aerodromo,ocorrencia_dia,ocorrencia_hora,total_recomendacoes
0,40211,40211,INCIDENTE,RIO DE JANEIRO,RJ,,2010-01-03 00:00:00,12:00:00,10.0
1,20,20,20,20,20,20,20,20,10.0
2,40351,40351,INCIDENTE,RIO DE JANEIRO,RJ,SBRJ,2010-01-03 00:00:00,03:00:00,10.0
3,39527,39527,ACIDENTE,LUCAS DO RIO VERDE,MT,,2010-01-04 00:00:00,17:30:00,10.0
4,40324,40324,INCIDENTE,PELOTAS,RS,SBPK,2010-01-05 00:00:00,19:25:00,10.0
...,...,...,...,...,...,...,...,...,...
5747,79804,79804,GRAVE,CAMPINAS,SP,SBKP,2020-12-29 00:00:00,19:00:00,10.0
5748,79757,79757,INCIDENTE GRAVE,LAGOA DA CONFUSÃO,TO,,2020-12-30 00:00:00,18:30:00,10.0
5749,79802,79802,INCIDENTE,RIO DE JANEIRO,RJ,SBGL,2020-12-30 00:00:00,00:54:00,10.0
5750,79756,79756,INCIDENTE GRAVE,VICENTINA,MS,,2020-12-31 00:00:00,09:00:00,10.0


In [73]:
df.drop_duplicates(inplace=True)

In [74]:
df

Unnamed: 0,codigo_ocorrencia,codigo_ocorrencia2,ocorrencia_classificacao,ocorrencia_cidade,ocorrencia_uf,ocorrencia_aerodromo,ocorrencia_dia,ocorrencia_hora,total_recomendacoes
0,40211,40211,INCIDENTE,RIO DE JANEIRO,RJ,,2010-01-03 00:00:00,12:00:00,10.0
1,20,20,20,20,20,20,20,20,10.0
2,40351,40351,INCIDENTE,RIO DE JANEIRO,RJ,SBRJ,2010-01-03 00:00:00,03:00:00,10.0
3,39527,39527,ACIDENTE,LUCAS DO RIO VERDE,MT,,2010-01-04 00:00:00,17:30:00,10.0
4,40324,40324,INCIDENTE,PELOTAS,RS,SBPK,2010-01-05 00:00:00,19:25:00,10.0
...,...,...,...,...,...,...,...,...,...
5747,79804,79804,GRAVE,CAMPINAS,SP,SBKP,2020-12-29 00:00:00,19:00:00,10.0
5748,79757,79757,INCIDENTE GRAVE,LAGOA DA CONFUSÃO,TO,,2020-12-30 00:00:00,18:30:00,10.0
5749,79802,79802,INCIDENTE,RIO DE JANEIRO,RJ,SBGL,2020-12-30 00:00:00,00:54:00,10.0
5750,79756,79756,INCIDENTE GRAVE,VICENTINA,MS,,2020-12-31 00:00:00,09:00:00,10.0
