In [1]:
import os 
import re
import pandas as pd


from simple_utils import ( # my own simple utils for the project
                   clean_directories, # removes the existing directory tree
                   create_directories, # creates number of directories in a given root directory
                   download_imgw_info_files,
                   get_table_from_imgw,
                   get_info_files_from_table,
                   get_headers_from_info_file,
                   get_dates_from_table,# parse website using bs4 to get the dates range 
                   download_all_zip_files,
                   unzip_all, # unzip all zip files in a folder to a different folder
                   merge_imgw_csv_files ) 

In [2]:
#main_dir = "C:/Users/wojci/Downloads/CLIMATE_DATA" #windows machine
main_dir = "/home/wojciech/Pobrane/CLIMATE_DATA" # Linux machine

download_dir = "DATA_DOWNLOAD"
unzipped_dir = "UNZIPPED"
prepared_dir = "PREPARED"
ext = ".zip" # extension of the downloaded files

In [3]:
clean_directories(main_dir)

path exists - removing contents of /home/wojciech/Pobrane/CLIMATE_DATA


### Climate data

In [4]:
climate_directory = "CLIMATE"
climate_path = os.path.join(main_dir,climate_directory)
os.mkdir(climate_path)

create_directories(climate_path,[download_dir, unzipped_dir, prepared_dir])

Creating DATA_DOWNLOAD directory
Creating UNZIPPED directory
Creating PREPARED directory


In [5]:
climate_download_path = os.path.join(climate_path,download_dir)

climate_url = "https://danepubliczne.imgw.pl/data/dane_pomiarowo_obserwacyjne/dane_meteorologiczne/miesieczne/klimat/"
climate_table = get_table_from_imgw(climate_url)

climate_info_files = get_info_files_from_table(climate_table)
download_imgw_info_files(climate_url,climate_info_files,climate_download_path)

In [6]:
climate_date_directories = get_dates_from_table(climate_table)
download_all_zip_files(climate_url,climate_date_directories,"_m_k",climate_download_path)

100%|██████████| 33/33 [00:02<00:00, 11.30it/s]


In [7]:
climate_unzip_dir = os.path.join(climate_path,unzipped_dir)
unzip_all(climate_download_path,climate_unzip_dir)

100%|██████████| 35/35 [00:00<00:00, 349.71it/s]


In [8]:
kmt_info_file = tuple(filter(lambda e: re.match(r"k_m_t",e) is not None,climate_info_files))[0]
kmt_headers = get_headers_from_info_file(os.path.join(climate_download_path,kmt_info_file))

df_kmt = merge_imgw_csv_files(climate_unzip_dir,headers=kmt_headers,f_name_pattern=r"k_m_t")
df_kmt

100%|██████████| 66/66 [00:00<00:00, 315.33it/s]


Unnamed: 0,Kod stacji,Nazwa stacji,Rok,Miesiąc,Średnia miesięczna temperatura [°C],Status pomiaru TEMP,Średnia miesięczna wilgotność względna [%],Status pomiaru WLGS,Średnia miesięczna prędkość wiatru [m/s],Status pomiaru FWS,Średnie miesięczne zachmurzenie ogólne [oktanty],Status pomiaru NOS
0,249180010,PSZCZYNA,1971,1,-3.2,,0.0,8.0,1.8,,4.3,
1,249180010,PSZCZYNA,1971,2,0.5,,0.0,8.0,2.0,,6.9,
2,249180010,PSZCZYNA,1971,3,0.6,,0.0,8.0,2.2,,5.8,
3,249180010,PSZCZYNA,1971,4,9.1,,0.0,8.0,1.5,,4.9,
4,249180010,PSZCZYNA,1971,5,16.2,,0.0,8.0,1.2,,4.2,
...,...,...,...,...,...,...,...,...,...,...,...,...
1896,254220090,OLECKO,2004,8,18.2,,0.0,8.0,0.0,8.0,0.0,8.0
1897,254220090,OLECKO,2004,9,12.6,,0.0,8.0,0.0,8.0,5.2,
1898,254220090,OLECKO,2004,10,8.4,,0.0,8.0,0.0,8.0,6.2,
1899,254220090,OLECKO,2004,11,1.8,,0.0,8.0,0.0,8.0,6.7,


In [9]:
df_kmt.to_csv(os.path.join(climate_path,prepared_dir,"kmt.csv"))

In [10]:
kmd_info_file = tuple(filter(lambda e: re.match(r"k_m_d",e) is not None,climate_info_files))[0]
kmd_headers = get_headers_from_info_file(os.path.join(climate_download_path,kmd_info_file))

df_kmd = merge_imgw_csv_files(climate_unzip_dir,headers=kmd_headers,f_name_pattern=r"k_m_d")
df_kmd

100%|██████████| 66/66 [00:00<00:00, 222.71it/s]


Unnamed: 0,Kod stacji,Nazwa stacji,Rok,Miesiąc,Absolutna temperatura maksymalna [°C],Status pomiaru TMAX,Średnia temperatura maksymalna [°C],Status pomiaru TMXS,Absolutna temperatura minimalna [°C],Status pomiaru TMIN,...,Status pomiaru SUMM,Maksymalna dobowa suma opadów [mm],Status pomiaru OPMX,Pierwszy dzień wystapienia opadu maksymalnego,Ostatni dzień wystąpienia opadu maksymalnego,Maksymalna wysokość pokrywy śnieżnej [cm],Status pomiaru PKSN,Liczba dni z pokrywą śnieżną,Liczba dni z opadem deszczu,Liczba dni z opadem śniegu
0,249180010,PSZCZYNA,1971,1,13.9,,0.1,,-21.8,,...,,10.2,,3.0,,30,,23,2,2
1,249180010,PSZCZYNA,1971,2,12.1,,2.7,,-10.8,,...,,8.2,,27.0,,20,,9,2,10
2,249180010,PSZCZYNA,1971,3,20.8,,4.3,,-19.5,,...,,29.6,,22.0,,30,,16,6,12
3,249180010,PSZCZYNA,1971,4,22.4,,13.9,,-2.2,,...,,9.9,,27.0,,0,,0,9,2
4,249180010,PSZCZYNA,1971,5,28.7,,20.7,,-1.1,,...,,18.0,,23.0,,0,,0,13,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1860,254220090,OLECKO,2014,8,34.0,,22.8,,7.0,,...,,16.8,,24.0,,0,,0,23,0
1861,254220090,OLECKO,2014,9,25.5,,19.2,,3.0,,...,,15.9,,21.0,,0,,0,8,0
1862,254220090,OLECKO,2014,10,21.7,,12.4,,-8.2,,...,,8.4,,14.0,,0,,0,8,0
1863,254220090,OLECKO,2014,11,15.2,,4.9,,-13.1,,...,,12.2,,7.0,,3,,7,3,4


In [11]:
df_kmd.to_csv(os.path.join(climate_path,prepared_dir,"kmd.csv"))

### Synoptic data

In [12]:
synoptic_directory = "SYNOPTIC"
synoptic_path = os.path.join(main_dir,synoptic_directory)
os.mkdir(synoptic_path)

create_directories(synoptic_path,[download_dir, unzipped_dir, prepared_dir])

Creating DATA_DOWNLOAD directory
Creating UNZIPPED directory
Creating PREPARED directory


In [13]:
synoptic_download_path = os.path.join(synoptic_path,download_dir)

synoptic_url = "https://danepubliczne.imgw.pl/data/dane_pomiarowo_obserwacyjne/dane_meteorologiczne/miesieczne/synop/"
synoptic_table = get_table_from_imgw(synoptic_url)

synoptic_info_files = get_info_files_from_table(synoptic_table)
download_imgw_info_files(synoptic_url,synoptic_info_files,synoptic_download_path)

In [14]:
synoptic_date_directories = get_dates_from_table(synoptic_table)
download_all_zip_files(synoptic_url,synoptic_date_directories,"_m_s",synoptic_download_path)

100%|██████████| 31/31 [00:02<00:00, 12.65it/s]


In [15]:
synoptic_unzip_dir = os.path.join(synoptic_path,unzipped_dir)
unzip_all(synoptic_download_path,synoptic_unzip_dir)

100%|██████████| 33/33 [00:00<00:00, 599.54it/s]


#### smd files

In [16]:
smd_info_file = tuple(filter(lambda e: re.match(r"s_m_d",e) is not None,synoptic_info_files))[0]
smd_headers = get_headers_from_info_file(os.path.join(synoptic_download_path,smd_info_file))

df_smd = merge_imgw_csv_files(synoptic_unzip_dir,headers=smd_headers,f_name_pattern=r"s_m_d")
df_smd

100%|██████████| 62/62 [00:00<00:00, 281.56it/s]


Unnamed: 0,Kod stacji,Nazwa stacji,Rok,Miesiąc,Absolutna temperatura maksymalna [°C],Status pomiaru TMAX,Średnia temperatura maksymalna [°C],Status pomiaru TMXS,Absolutna temperatura minimalna [°C],Status pomiaru TMIN,...,Liczba dni z wiatrem >= 10m/s,Status pomiaru W10D,Liczba dni z wiatrem >15m/s,Status pomiaru W15D,Liczba dni z burzą,Status pomiaru BURD,Liczba dni z rosą,Status pomiaru ROSD,Liczba dni ze szronem,Status pomiaru SZRD
0,349190600,BIELSKO-BIAŁA,2004,1,5.7,,-1.1,,-13.8,,...,9,,0,,0,,0,,0,
1,349190600,BIELSKO-BIAŁA,2004,2,13.8,,3.3,,-15.8,,...,7,,0,,0,,0,,0,
2,349190600,BIELSKO-BIAŁA,2004,3,21.0,,6.7,,-15.5,,...,2,,0,,0,,1,,0,
3,349190600,BIELSKO-BIAŁA,2004,4,20.3,,13.8,,-2.0,,...,1,,0,,1,,17,,3,
4,349190600,BIELSKO-BIAŁA,2004,5,23.5,,16.7,,0.7,,...,1,,0,,5,,20,,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3635,354220195,SUWAŁKI,1995,8,32.2,,23.6,,6.2,,...,0,,0,,3,,18,,0,
3636,354220195,SUWAŁKI,1995,9,22.0,,16.4,,-0.9,,...,3,,0,,1,,18,,3,
3637,354220195,SUWAŁKI,1995,10,23.0,,13.1,,-3.2,,...,1,,0,,0,,19,,3,
3638,354220195,SUWAŁKI,1995,11,8.6,,1.4,,-10.0,,...,3,,0,,0,,0,,9,


In [17]:
df_smd.to_csv(os.path.join(synoptic_path,prepared_dir,"smd.csv"))

#### smt files

In [18]:
smt_info_file = tuple(filter(lambda e: re.match(r"s_m_t",e) is not None,synoptic_info_files))[0]
smt_headers = get_headers_from_info_file(os.path.join(synoptic_download_path,smt_info_file))

df_smt = merge_imgw_csv_files(synoptic_unzip_dir,headers=smt_headers,f_name_pattern=r"s_m_t")
df_smt

100%|██████████| 62/62 [00:00<00:00, 601.12it/s]


Unnamed: 0,Kod stacji,Nazwa stacji,Rok,Miesiąc,Średnie miesięczne zachmurzenie ogólne [oktanty],Status pomiaru NOS,Średnia miesięczna prędkość wiatru [m/s],Status pomiaru FWS,Średnia miesięczna temperatura [°C],Status pomiaru TEMP,...,Średnia miesięczna wilgotność względna [%],Status pomiaru WLGS,Średnie miesięczne ciśnienie na poziomie stacji [hPa],Status pomiaru PPPS,Średnie miesięczne ciśnienie na pozimie morza [hPa],Status pomiaru PPPM,Suma opadu dzień [mm],Status pomiaru WODZ,Suma opadu noc [mm],Status pomiaru WONO
0,349190600,BIELSKO-BIAŁA,1991,1,4.7,,4.6,,-0.1,,...,77.2,,977.7,,1027.7,,3.5,,8.7,
1,349190600,BIELSKO-BIAŁA,1991,2,4.9,,3.3,,-4.3,,...,79.8,,971.5,,1022.0,,10.1,,16.7,
2,349190600,BIELSKO-BIAŁA,1991,3,5.4,,3.8,,5.7,,...,78.9,,970.0,,1018.5,,8.6,,10.9,
3,349190600,BIELSKO-BIAŁA,1991,4,5.3,,3.0,,6.4,,...,76.2,,968.9,,1017.2,,39.5,,24.4,
4,349190600,BIELSKO-BIAŁA,1991,5,6.5,,2.9,,9.2,,...,78.1,,968.6,,1016.4,,62.4,,58.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
691,354220195,SUWAŁKI,2008,8,4.9,,4.0,,17.4,,...,76.7,,989.3,,1011.2,,57.7,,49.6,
692,354220195,SUWAŁKI,2008,9,5.2,,3.6,,11.5,,...,84.2,,996.2,,1018.7,,14.9,,18.9,
693,354220195,SUWAŁKI,2008,10,5.6,,3.8,,8.2,,...,88.5,,992.9,,1015.5,,19.7,,32.3,
694,354220195,SUWAŁKI,2008,11,6.1,,4.8,,3.0,,...,92.4,,989.5,,1012.5,,10.4,,24.4,


In [19]:
df_smt.to_csv(os.path.join(synoptic_path,prepared_dir,"smt.csv"))

### Precipitation data

In [20]:
precipitation_directory = "PRECIPITATION"
precipitation_path = os.path.join(main_dir,precipitation_directory)
os.mkdir(precipitation_path)

create_directories(precipitation_path,[download_dir, unzipped_dir, prepared_dir])

Creating DATA_DOWNLOAD directory
Creating UNZIPPED directory
Creating PREPARED directory


In [21]:
precipitation_download_path = os.path.join(precipitation_path,download_dir)

precipitation_url = "https://danepubliczne.imgw.pl/data/dane_pomiarowo_obserwacyjne/dane_meteorologiczne/miesieczne/opad/"
precipitation_table = get_table_from_imgw(precipitation_url)

precipitation_info_files = get_info_files_from_table(precipitation_table)
download_imgw_info_files(precipitation_url,precipitation_info_files,precipitation_download_path)


In [22]:
precipitation_date_directories = get_dates_from_table(precipitation_table)
download_all_zip_files(precipitation_url,precipitation_date_directories,"_m_o",precipitation_download_path)


100%|██████████| 33/33 [00:03<00:00,  9.20it/s]


In [23]:
precipitation_unzip_dir = os.path.join(precipitation_path,unzipped_dir)
unzip_all(precipitation_download_path,precipitation_unzip_dir)

100%|██████████| 34/34 [00:00<00:00, 173.04it/s]


In [24]:
om_headers = get_headers_from_info_file(os.path.join(precipitation_download_path,precipitation_info_files[0]))

df_om = merge_imgw_csv_files(precipitation_unzip_dir,headers=om_headers,f_name_pattern=r"o_m_")
df_om

100%|██████████| 33/33 [00:00<00:00, 36.07it/s]


Unnamed: 0,Kod stacji,Nazwa stacji,Rok,Miesiąc,Miesięczna suma opadów [mm],Status pomiaru SUMM,Liczba dni z opadem śniegu,Status pomiaru LDS,Opad maksymalny [mm],Status pomiaru MAXO,Dzień pierwszy wystąpienia opadu maksymalnego,Dzień ostatni wystąpienia opadu maksymalnego,Liczba dni z pokrywą śnieżną,Status pomiaru LDPS
0,249180020,WARSZOWICE,2019,1,97.7,,19,,21.5,,8.0,,0,
1,249180020,WARSZOWICE,2019,2,34.0,,3,,15.4,,3.0,,0,
2,249180020,WARSZOWICE,2019,3,39.8,,1,,7.9,,15.0,,0,
3,249180020,WARSZOWICE,2019,4,51.1,,1,,20.7,,28.0,,0,
4,249180020,WARSZOWICE,2019,5,124.0,,0,,28.0,,22.0,,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60125,254239999,SMOLANY,1965,8,61.0,,0,,17.0,,27.0,,0,
60126,254239999,SMOLANY,1965,9,38.6,,0,,10.7,,15.0,,0,
60127,254239999,SMOLANY,1965,10,17.7,,0,,7.8,,16.0,,0,
60128,254239999,SMOLANY,1965,11,42.3,,12,,8.8,,1.0,,0,


In [25]:
df_om.to_csv(os.path.join(precipitation_path,prepared_dir,"om.csv"))