In [46]:
import pandas as pd

files = {
    "B00202A": "dane_imgw/B00202A_2024_10.csv",
    "B00300S": "dane_imgw/B00300S_2024_10.csv",
    "B00305A": "dane_imgw/B00305A_2024_10.csv",
    "B00604S": "dane_imgw/B00604S_2024_10.csv",
    "B00606S": "dane_imgw/B00606S_2024_10.csv",
    "B00608S": "dane_imgw/B00608S_2024_10.csv",
    "B00702A": "dane_imgw/B00702A_2024_10.csv",
    "B00703A": "dane_imgw/B00703A_2024_10.csv",
    "B00714A": "dane_imgw/B00714A_2024_10.csv",
    "B00802A": "dane_imgw/B00802A_2024_10.csv",
}

dataframes = {}
for key, path in files.items():
    df = pd.read_csv(path, header=None, sep=";")
    df.columns = ["kodSH", "parametrSH", "czas", f"{key}"]
    df['czas'] = pd.to_datetime(df['czas'], format="%Y-%m-%d %H:%M")
    dataframes[key] = df

base_key = "B00202A"
base_df = dataframes[base_key][["kodSH", "czas"]].drop_duplicates() 

for key, df in dataframes.items():
    trimmed = df[["kodSH", "czas", f"{key}"]]
    base_df = pd.merge(base_df, trimmed, on=["kodSH", "czas"], how="left")

base_df['data'] = base_df['czas'].dt.date
base_df['time'] = base_df['czas'].dt.time
base_df = base_df.drop(columns=["czas"])
base_df

Unnamed: 0,kodSH,B00202A,B00300S,B00305A,B00604S,B00606S,B00608S,B00702A,B00703A,B00714A,B00802A,data,time
0,249190090,94.0,3.29,0.17,,0.0,0.0,0.0,0.9,,92.06,2024-10-01,00:00:00
1,249190090,83.0,3.16,0.11,,,0.0,0.0,0.2,,91.97,2024-10-01,00:10:00
2,249190090,79.0,3.05,0.03,,,0.0,0.0,0.7,,91.98,2024-10-01,00:20:00
3,249190090,70.0,3.01,-0.02,,,0.0,0.2,1.4,,92.45,2024-10-01,00:30:00
4,249190090,70.0,3.07,0.00,,,0.0,0.1,1.5,,92.50,2024-10-01,00:40:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1166100,254150060,284.0,11.50,11.00,,,0.0,7.0,9.3,,80.90,2024-10-30,18:10:00
1166101,254150060,277.0,11.40,10.60,,,0.0,6.7,9.2,,80.90,2024-10-30,18:20:00
1166102,254150060,278.0,11.40,10.70,,,0.0,6.8,9.4,,81.70,2024-10-30,18:30:00
1166103,254150060,278.0,11.40,10.90,,,0.0,6.2,8.7,,82.00,2024-10-30,18:40:00


### koordynaty stacji

In [47]:
import geopandas as gpd

effacility = gpd.read_file("dane_przestrzenne/effacility.geojson")
effacility = effacility[["ifcid", "name1", "geometry"]]
effacility.rename(columns={"name1": "name"}, inplace=True)
effacility

Unnamed: 0,ifcid,name,geometry
0,149180010,Krzyżanowice,POINT (448926.076 236501.65)
1,149180020,Chałupki,POINT (451760.53 228509.024)
2,149180030,Łaziska,POINT (460035.038 228718.997)
3,149180040,Gołkowice,POINT (463863.107 228846.279)
4,149180050,Zebrzydowice,POINT (472228.166 223702.104)
...,...,...,...
2647,453220010,Jezioro Rajgrodzkie,POINT (741832.789 659022.551)
2648,454170010,Jezioro Jasień,POINT (409980.914 716533.986)
2649,454170020,Jezioro Raduńskie Górne,POINT (432806.035 707830.127)
2650,454170030,Jezioro Łebsko,POINT (402818.172 765146.628)


### wybranie odpowiednich danych z tabeli powiaty

In [48]:
powiaty = powiaty = gpd.read_file("dane_przestrzenne/powiaty.shp")
powiaty = powiaty[["id", "name", "national_c", "geometry"]]
powiaty['woj_code'] = powiaty.apply(lambda x: x['national_c'][:2], axis=1)
powiaty.head()

Unnamed: 0,id,name,national_c,geometry,woj_code
0,5,złotoryjski,226,"POLYGON ((269083.616 370884.075, 269164.192 37...",2
1,10,jeleniogórski,206,"POLYGON ((244044.052 332419.671, 244053.906 33...",2
2,15,lubański,210,"POLYGON ((228421.669 366620.161, 228453.763 36...",2
3,28,oleśnicki,214,"POLYGON ((377122.144 378970.426, 377361.467 37...",2
4,32,bolesławiecki,201,"POLYGON ((235157.11 379452.665, 235210.213 379...",2


### wybranie odpowiednich danych z tabeli województwa

In [49]:
wojewodztwa = gpd.read_file("dane_przestrzenne/woj.shp")
wojewodztwa = wojewodztwa[["id", "name", "national_c", "geometry"]]
wojewodztwa.head()

Unnamed: 0,id,name,national_c,geometry
0,186,dolnośląskie,2,"POLYGON ((205901.102 343181.955, 205962.944 34..."
1,325,kujawsko-pomorskie,4,"POLYGON ((383436.881 613642.85, 383483.577 613..."
2,523,lubelskie,6,"POLYGON ((681015.792 420027.27, 681441.239 420..."
3,637,lubuskie,8,"POLYGON ((196217.27 448607.281, 196232.688 448..."
4,811,łódzkie,10,"POLYGON ((435569.18 387437.039, 435633.348 387..."


### połączenie powiatów z województwami

In [50]:
merged = pd.merge(powiaty, wojewodztwa, left_on="woj_code", right_on="national_c", how="left")
merged = merged.drop(columns=["id_y", "national_c_y", "national_c_x", "woj_code", "id_x", "geometry_y"])
merged.rename(columns={"name_x": "powiat", "name_y": "wojewodztwo"}, inplace=True)
merged.set_geometry("geometry_x", inplace=True)

merged = gpd.sjoin(effacility, merged, how="left", predicate="within")
merged = merged.drop(columns=["index_right"])
merged = merged.drop_duplicates(subset="ifcid")
merged

Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:2180
Right CRS: PROJCS["ETRS89_Poland_CS92",GEOGCS["ETRS89",DATUM[ ...

  merged = gpd.sjoin(effacility, merged, how="left", predicate="within")


Unnamed: 0,ifcid,name,geometry,powiat,wojewodztwo
0,149180010,Krzyżanowice,POINT (448926.076 236501.65),raciborski,śląskie
1,149180020,Chałupki,POINT (451760.53 228509.024),raciborski,śląskie
2,149180030,Łaziska,POINT (460035.038 228718.997),wodzisławski,śląskie
3,149180040,Gołkowice,POINT (463863.107 228846.279),wodzisławski,śląskie
4,149180050,Zebrzydowice,POINT (472228.166 223702.104),cieszyński,śląskie
...,...,...,...,...,...
2647,453220010,Jezioro Rajgrodzkie,POINT (741832.789 659022.551),grajewski,podlaskie
2648,454170010,Jezioro Jasień,POINT (409980.914 716533.986),bytowski,pomorskie
2649,454170020,Jezioro Raduńskie Górne,POINT (432806.035 707830.127),kartuski,pomorskie
2650,454170030,Jezioro Łebsko,POINT (402818.172 765146.628),słupski,pomorskie


# dodanie danych o stacjach do mongo

In [None]:
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi

client = MongoClient("mongodb://127.0.0.1:27017/")
db = client["pag"]

from pyproj import Transformer
transformer = Transformer.from_crs("EPSG:2180", "EPSG:4326", always_xy=True)

documents = []
for row in merged.itertuples():
    document = {
        "_id": row.ifcid,
        "name": row.name,
        "location": {
            "type": "Point",
            "coordinates": [*transformer.transform(row.geometry.x, row.geometry.y)]
        },
        "powiat": row.powiat,
        "wojewodztwo": row.wojewodztwo
    }
    documents.append(document)

db.stations.insert_many(documents)
db.stations.create_index([("location", "2dsphere")])

# dodanie danych meteorologicznych do redis

In [55]:
import redis
import pandas as pd
from datetime import datetime

r = redis.Redis(host='localhost', port=6379, db=0)

for index, row in base_df.iterrows():
    timestamp = int(datetime.strptime(f"{row['data']} {row['time']}", 
                                    "%Y-%m-%d %H:%M:%S").timestamp() * 1000)
    
    station_id = str(row['kodSH'])
    
    measure_columns = ["B00202A"]
    
    for measure in measure_columns:
        if pd.notna(row[measure]):
            key = f"{station_id}:{measure}"
            
            try:
                r.execute_command(
                    'TS.CREATE', key,
                    'LABELS', 'station_id', station_id,
                    'measure_type', measure
                )
            except:
                pass
            
            r.execute_command(
                'TS.ADD', key,
                timestamp,
                float(row[measure])
            )


## dodanie dnia i nocy

In [53]:
from datetime import timedelta
from datetime import date
from datetime import datetime
from astral import LocationInfo
from astral.sun import sun

sun_times = []


def calculate_sun_times(name, latitude, longitude, year=2024, month=10):
    location = LocationInfo(name, "Poland", "Europe/Warsaw", latitude, longitude)
    sun_times = []
    days_in_month = (date(year, month + 1, 1) - timedelta(days=1)).day 

    for day in range(1, days_in_month + 1):
        current_date = date(year, month, day)
        s = sun(location.observer, date=current_date, tzinfo=location.timezone)
        sun_times.append({
            "date": current_date.isoformat(),
            "dawn": int(datetime.fromisoformat(str(s["dawn"])).timestamp() * 1000),
            "dusk": int(datetime.fromisoformat(str(s["dusk"])).timestamp() * 1000)
        })

    return sun_times

client = MongoClient("mongodb://localhost:27017/")
db = client["pag"]
collection = db["stations"]

for station in collection.find():
    name = station["name"]
    latitude = station["location"]["coordinates"][1]
    longitude = station["location"]["coordinates"][0]
    sun_times = calculate_sun_times(name, latitude, longitude)
    collection.update_one({"_id": station["_id"]}, {"$set": {"sun_times": sun_times}})