In [1]:
import csv
import pandas as pd
import matplotlib.pyplot as plt
from functools import reduce

In [2]:
txt_file_names = {
    'chuva-taio':'83050000TAIÓ_2962005-1442014-chuva',
    'nivel-taio':'83050000TAIÓ_2962005-1442014-nível',
    'chuva-ituporanga':'83250000ITUPORANGA_3172005-1442014-chuva',
    'nivel-ituporanga':'83250000ITUPORANGA_2962005-1032014-nível',
    'chuva-rio_do_sul':'83300200RIODOSUL-NOVO_2862005-1442014-Chuva',
    'nivel-rio_do_sul':'83300200RIODOSUL-NOVO_2982005-1422014 -nível'
}

Podemos transformar os arquivos .txt em .csv

In [3]:
for csv_name, txt_name in txt_file_names.items():
    with open(f'txt/{txt_name}.txt', 'r') as in_file:
        next(in_file)
        stripped = (line.strip() for line in in_file)
        lines = (line.split() for line in stripped if line)
        with open(f'csv/{csv_name}.csv', 'w') as out_file:
            writer = csv.writer(out_file)
            if 'chuva' in csv_name:
                writer.writerow(('data', 'hora', 'chuva (mm)'))
            elif 'nivel' in csv_name:
                writer.writerow(('data', 'hora', 'nivel (cm)'))
            writer.writerows(lines)

In [4]:
chuva_taio = pd.read_csv('csv/chuva-taio.csv')
nivel_taio = pd.read_csv('csv/nivel-taio.csv')
chuva_ituporanga = pd.read_csv('csv/chuva-ituporanga.csv')
nivel_ituporanga = pd.read_csv('csv/nivel-ituporanga.csv')
chuva_rio_do_sul = pd.read_csv('csv/chuva-rio_do_sul.csv')
nivel_rio_do_sul = pd.read_csv('csv/nivel-rio_do_sul.csv')

In [12]:
chuva_taio.head()

Unnamed: 0,data,hora,chuva (mm)
0,29/06/2005,00:00:00,
1,29/06/2005,01:00:00,
2,29/06/2005,02:00:00,
3,29/06/2005,03:00:00,
4,29/06/2005,04:00:00,


In [13]:
chuva_ituporanga.head()

Unnamed: 0,data,hora,chuva (mm)
0,31/07/2005,01:00:00,
1,31/07/2005,02:00:00,
2,31/07/2005,03:00:00,
3,31/07/2005,04:00:00,
4,31/07/2005,05:00:00,


In [5]:
data_frames = [chuva_taio, nivel_taio, chuva_ituporanga, nivel_ituporanga, chuva_rio_do_sul, nivel_rio_do_sul]
df = reduce(lambda  left,right: pd.merge(left,right,on=['data','hora'], how='outer'), data_frames)
# As colunas seguem a mesma ordem que passamos em data_frames
df.columns = ['data', 'hora', 'chuva-taio', 'nivel-taio', 'chuva-ituporanga', 'nivel-ituporanga', 'chuva-rio_do_sul', 'nivel-rio_do_sul']

# Removendo linhas com colunas nulas
df = df.dropna()

df.head(10)

Unnamed: 0,data,hora,chuva-taio,nivel-taio,chuva-ituporanga,nivel-ituporanga,chuva-rio_do_sul,nivel-rio_do_sul
678,31/07/2005,21:00:00,0.0,98.0,3.0,30.0,0.0,171.0
679,31/07/2005,22:00:00,0.0,98.0,12.0,39.0,0.0,171.0
680,31/07/2005,23:00:00,0.0,97.0,19.0,59.0,0.0,171.0
681,01/08/2005,00:00:00,0.0,97.0,5.0,82.0,0.0,170.0
684,01/08/2005,03:00:00,0.0,97.0,1.0,27.0,0.0,170.0
686,01/08/2005,05:00:00,0.0,97.0,1.0,27.0,0.0,169.0
688,01/08/2005,07:00:00,0.0,96.0,1.0,27.0,0.0,169.0
689,01/08/2005,08:00:00,0.0,96.0,2.0,30.0,0.0,168.0
690,01/08/2005,09:00:00,0.0,96.0,12.0,39.0,0.0,169.0
691,01/08/2005,10:00:00,0.0,95.0,19.0,59.0,0.0,168.0


In [23]:
df.describe()

Unnamed: 0,chuva-taio,nivel-taio,chuva-ituporanga,nivel-ituporanga,chuva-rio_do_sul,nivel-rio_do_sul
count,66957.0,66957.0,66957.0,66957.0,66957.0,66957.0
mean,0.041549,180.214735,0.053664,61.800499,0.055229,208.492316
std,0.475469,152.568151,0.758685,40.230845,0.543762,127.192194
min,0.0,44.0,0.0,8.0,0.0,91.0
25%,0.0,83.0,0.0,37.0,0.0,127.0
50%,0.0,121.0,0.0,49.0,0.0,165.0
75%,0.0,210.0,0.0,70.0,0.0,239.0
max,48.0,975.0,140.6,512.0,38.0,1183.0


In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 66957 entries, 678 to 130439
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   data              66957 non-null  object 
 1   hora              66957 non-null  object 
 2   chuva-taio        66957 non-null  float64
 3   nivel-taio        66957 non-null  float64
 4   chuva-ituporanga  66957 non-null  float64
 5   nivel-ituporanga  66957 non-null  float64
 6   chuva-rio_do_sul  66957 non-null  float64
 7   nivel-rio_do_sul  66957 non-null  float64
dtypes: float64(6), object(2)
memory usage: 4.6+ MB
