In [22]:
import pandas as pd
import numpy as np
import pytz

## Pridobivanje podatkov o vremenu
Vir podatkov: https://open-meteo.com/

Podatke sem shranila v datoteko.

In [23]:
# Dovolj je samo enkrat zagnati, da se shranijo podatki.

import openmeteo_requests

import pandas as pd
import requests_cache
from retry_requests import retry

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
	"latitude": 46.0511,
	"longitude": 14.5051,
	"start_date": "2010-01-01",
	"end_date": "2023-12-31",
	"hourly": ["temperature_2m", "rain", "precipitation", "snowfall", "snow_depth", "cloud_cover_high", "cloud_cover_mid", "cloud_cover_low", "cloud_cover", "weather_code", "relative_humidity_2m", "dew_point_2m"],
	"timezone": "Europe/Berlin"
}
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()}{response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_rain = hourly.Variables(1).ValuesAsNumpy()
hourly_precipitation = hourly.Variables(2).ValuesAsNumpy()
hourly_snowfall = hourly.Variables(3).ValuesAsNumpy()
hourly_snow_depth = hourly.Variables(4).ValuesAsNumpy()
hourly_cloud_cover_high = hourly.Variables(5).ValuesAsNumpy()
hourly_cloud_cover_mid = hourly.Variables(6).ValuesAsNumpy()
hourly_cloud_cover_low = hourly.Variables(7).ValuesAsNumpy()
hourly_cloud_cover = hourly.Variables(8).ValuesAsNumpy()
hourly_weather_code = hourly.Variables(9).ValuesAsNumpy()
hourly_relative_humidity_2m = hourly.Variables(10).ValuesAsNumpy()
hourly_dew_point_2m = hourly.Variables(11).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}

hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["rain"] = hourly_rain
hourly_data["precipitation"] = hourly_precipitation
hourly_data["snowfall"] = hourly_snowfall
hourly_data["snow_depth"] = hourly_snow_depth
hourly_data["cloud_cover_high"] = hourly_cloud_cover_high
hourly_data["cloud_cover_mid"] = hourly_cloud_cover_mid
hourly_data["cloud_cover_low"] = hourly_cloud_cover_low
hourly_data["cloud_cover"] = hourly_cloud_cover
hourly_data["weather_code"] = hourly_weather_code
hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
hourly_data["dew_point_2m"] = hourly_dew_point_2m

hourly_dataframe = pd.DataFrame(data = hourly_data)

hourly_dataframe.to_csv("../ljubljana_vreme_2010_2023.csv", index=False)

#print(hourly_dataframe)


Coordinates 46.080841064453125°N 14.451510429382324°E
Elevation 300.0 m asl
Timezone b'Europe/Berlin'b'GMT+2'
Timezone difference to GMT+0 7200 s


## Dodajanje podatkov o vremenu k podatkom o prometnih nesrečah

In [24]:
# Prometne nesreče
accident_data = pd.read_csv("../PrometneNesreče.csv")

# Vreme
weather_data = pd.read_csv("../ljubljana_vreme_2010_2023.csv")

print(accident_data['NastanekDatumCas'].dtype)
# Datumi morajo biti v istem formatu
accident_data['NastanekDatumCas'] = pd.to_datetime(accident_data['NastanekDatumCas'], format='ISO8601', errors='coerce')
print(accident_data['NastanekDatumCas'].dtype)
weather_data['date'] = pd.to_datetime(weather_data['date'], format='ISO8601', errors='coerce')
print(weather_data['date'].dtype)

  accident_data = pd.read_csv("../PrometneNesreče.csv")


object
datetime64[ns]
datetime64[ns, UTC]


In [25]:
# Nastavljanje pravilnih časovnih pasov

print(accident_data['NastanekDatumCas'].dtype)
print(weather_data['date'].dt.tz)

accident_data['NastanekDatumCas'] = accident_data['NastanekDatumCas'].dt.tz_localize('Europe/Ljubljana', ambiguous=True)
# Parameter ambiguous je zato, ker ko se spreminja ura, ne vemo, v kateri time zone spada.
# Recimo, če se je nesreča zgodila 30.10. ob 2:30 zjutraj, ne vemo, ali je to po stari uri ali novi uri.
# ambiguous = True pomeni, da bo privzel, da se je zgodilo po novi uri.



# Pretvorimo čas v pravilni time zone
weather_data['date'] = weather_data['date'].dt.tz_convert('Europe/Ljubljana')

print(weather_data['date'].iloc[0])
print(accident_data['NastanekDatumCas'].iloc[0])
print(accident_data['NastanekDatumCas'].dtype)

datetime64[ns]
UTC
2009-12-31 23:00:00+01:00
2010-01-01 04:59:00+01:00
datetime64[ns, Europe/Ljubljana]


In [26]:
# Število prometnih nesreč po mestih
accident_data['Obcina'].value_counts()


Obcina
LJUBLJANA      2361
CELJE           943
NOVA GORICA     941
KRANJ           912
NOVO MESTO      855
               ... 
ŠALOVCI           4
ODRANCI           3
KOBILJE           2
OSILNICA          1
HODOŠ             1
Name: count, Length: 212, dtype: int64

In [27]:
# Samo nesreče v Ljubljani
accident_data = accident_data[accident_data['Obcina'] == 'LJUBLJANA']
accident_data.to_csv("../ljubljana_nesrece_2010_2023.csv", index=False)
print(accident_data['NastanekDatumCas'].dtype)

# DEBUG
# Read the CSV file
lj_acc = pd.read_csv("../ljubljana_nesrece_2010_2023.csv")
print(lj_acc['NastanekDatumCas'].dtype)

# The data already has timezone info, so we need to parse it as timezone-aware
# Use utc=True to handle the timezone info properly
lj_acc['NastanekDatumCas'] = pd.to_datetime(lj_acc['NastanekDatumCas'], format='ISO8601', utc=True, errors='coerce')
print(f"After UTC conversion: {lj_acc['NastanekDatumCas'].dtype}")

# Check for missing values after conversion
null_count = lj_acc['NastanekDatumCas'].isnull().sum()
if null_count > 0:
    print(f"Warning: {null_count} values could not be converted to datetime.")

# Convert to Ljubljana timezone
if lj_acc['NastanekDatumCas'].dtype.name.startswith('datetime64'):
    lj_acc['NastanekDatumCas'] = lj_acc['NastanekDatumCas'].dt.tz_convert('Europe/Ljubljana')
    print(f"Final dtype after timezone conversion: {lj_acc['NastanekDatumCas'].dtype}")
else:
    print("Cannot convert timezone - column is not datetime type")
    
print(lj_acc['NastanekDatumCas'].iloc[0])
# END DEBUG

datetime64[ns, Europe/Ljubljana]
object
After UTC conversion: datetime64[ns, UTC]
Final dtype after timezone conversion: datetime64[ns, Europe/Ljubljana]
2010-01-05 16:40:00+01:00


#### Združitev podatkov o nesrečah in podatkov o vremenu

In [28]:
print(np.min(weather_data['rain']))
print(np.max(weather_data['rain']))

# Število nesreč glede na dež - za korak bom vzela 1.5

# Zaokrožimo čas navzdol do točne uro. Lahko bi zaokrožila na najbljižjo uro, vendar nas predvsem zanima, kakšno vreme je bilo pred in med nesrečo.
# Zato je bolj smiselno zaokrožiti navzdol.
accident_data['NastanekDatumCas'] = accident_data['NastanekDatumCas'].dt.floor(freq='h', ambiguous=True)

print(accident_data['NastanekDatumCas'].iloc[0])
print(weather_data['date'].iloc[0])


# Združimo oba dataframe-a
accidents_weather = accident_data.merge(weather_data, how='left', left_on='NastanekDatumCas', right_on='date')
display(accidents_weather)

accidents_weather.to_csv("../nesrece_vreme_LJ_2010_2023.csv", index=False)


0.0
17.5
2010-01-05 16:00:00+01:00
2009-12-31 23:00:00+01:00


Unnamed: 0,NastanekDatumCas,WGSLat,WGSLon,Leto,Obcina,VrstaDogodka,SkupinaDogodka,Dogodek,CORSBesedilo,Opis poteka intervencije,...,precipitation,snowfall,snow_depth,cloud_cover_high,cloud_cover_mid,cloud_cover_low,cloud_cover,weather_code,relative_humidity_2m,dew_point_2m
0,2010-01-05 16:00:00+01:00,46.096306,14.496116,2010,LJUBLJANA,NESREČE V PROMETU,Nesreče v cestnem prometu,prometne nesreče,Ob 16.36 so na Avšičevi cesti v Ljubljani gasi...,Ob 16.36 so na Avšičevi cesti gasilci PGD Ježi...,...,0.3,0.21,0.05,98.0,97.0,96.0,100.0,73.0,85.565890,-3.8655
1,2010-01-10 07:00:00+01:00,46.021261,14.562976,4020,LJUBLJANA,NESREČE V PROMETU,Nesreče v cestnem prometu,prometne nesreče,"Ob 6.55 se je na ljubljanski obvoznici, med pr...",Ob 6.55 je prišlo do prometne nesreče na ljubl...,...,0.0,0.00,0.34,25.0,36.0,77.0,86.0,3.0,95.736490,-0.4655
2,2010-01-14 18:00:00+01:00,46.052374,14.505043,4020,LJUBLJANA,NESREČE V PROMETU,Nesreče v cestnem prometu,prometne nesreče,,Intervencija ni bila potrebna.; Na kraju nezgo...,...,0.0,0.00,0.30,20.0,31.0,81.0,83.0,3.0,92.860214,-3.0655
3,2010-01-18 13:00:00+01:00,46.105000,14.541776,4020,LJUBLJANA,NESREČE V PROMETU,Nesreče v cestnem prometu,prometne nesreče,Ob 13.31 sta na križišču Zasavske in Dunajske ...,"Na pomoč so priskočili gasilci GB Ljubljana, k...",...,0.0,0.00,0.30,0.0,87.0,4.0,88.0,3.0,67.310440,-1.8155
4,2010-01-22 19:00:00+01:00,46.080400,14.528145,4020,LJUBLJANA,NESREČE V PROMETU,Nesreče v cestnem prometu,prometne nesreče,Ob 19.38 je voznik z osebnim vozilom zapeljal ...,Ob 19.38 je voznik z osebnim vozilom zapeljal ...,...,0.0,0.00,0.29,0.0,4.0,66.0,67.0,2.0,88.895260,-6.3155
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2356,2023-12-20 20:00:00+01:00,46.066957,14.533748,8092,LJUBLJANA,NESREČE V PROMETU,Nesreče v cestnem prometu,prometne nesreče,Ob 20.22 sta na Šmartinski cesti v Ljubljani t...,V križišču Šmartinske ceste in Kajuhove ulice ...,...,0.0,0.00,0.00,0.0,19.0,100.0,100.0,3.0,100.000000,1.8755
2357,2023-12-21 15:00:00+01:00,46.021613,14.562892,8092,LJUBLJANA,NESREČE V PROMETU,Nesreče v cestnem prometu,prometne nesreče,"Ob 15.07 je v predoru Golovec, občina Ljubljan...","Preko ReCO smo dobili informacije, da gre v tu...",...,0.0,0.00,0.00,100.0,100.0,12.0,100.0,3.0,79.636930,0.7255
2358,2023-12-21 18:00:00+01:00,46.059810,14.511972,4046,LJUBLJANA,NESREČE V PROMETU,Nesreče v cestnem prometu,prometne nesreče,Ob 18.33 so na Vilharjevi cesti v Ljubljani tr...,Na cestišču so verižno trčila štiri vozila. Za...,...,0.0,0.00,0.00,100.0,47.0,8.0,100.0,3.0,98.930840,1.3755
2359,2023-12-30 08:00:00+01:00,46.044933,14.594988,4046,LJUBLJANA,NESREČE V PROMETU,Nesreče v cestnem prometu,prometne nesreče,Ob 8.34 je na Poti heroja Trtnika v Ljubljani ...,Voznica je z osebnim vozilom zapeljala s cesti...,...,0.0,0.00,0.00,100.0,0.0,68.0,100.0,3.0,98.932144,1.5255
