# Import library

In [34]:
import pandas as pd
import numpy as np
import requests
import os
import io
from zipfile import ZipFile
import geopandas as gpd
import matplotlib.pyplot as plt
import shapely.geometry
from shapely import wkt

# Getting data from IMGW

In [9]:
year = '2020'
month = '05'
code = 'B00300S' #  code to the specific data type (temperature)

In [10]:
parent_path = r'C:\Users\01150208\OneDrive - Politechnika Warszawska\Pliki_Kuby\Studia\III_ROK\PAG\PAG_II\projekt1'
directory = 'Dane'
path = os.path.join(parent_path, directory)
#  creating a folder for the weather data from IMGW 
if not os.path.exists(path):
    os.mkdir(path)
    print("Directory '%s' created" % directory)

In [11]:
meteo_dir = f'Meteo_{year}-{month}'
data_path = os.path.join(path, meteo_dir)
#  creating the data folder for choosen date (year-month) if it doesn't exist
if not os.path.exists(data_path):
    os.mkdir(data_path)
    #  requesting the weather data by url and getting the zip file
    url = f'https://dane.imgw.pl/datastore/getfiledown/Arch/Telemetria/Meteo/{year}/Meteo_{year}-{month}.zip'
    r = requests.get(url, allow_redirects=True)
    if r:
        print(r.status_code)
        #  extracting a zip folder
        with ZipFile(io.BytesIO(r.content), 'r') as zip:
            zip.extractall(path=data_path)

In [12]:
# selecting a file with specific data type and creating the DataFrame
for file in os.listdir(data_path):
    if file.startswith(code):
        df = pd.read_csv(os.path.join(data_path, file), header=None, delimiter=';', usecols=[0,1,2,3])
df.head()

Unnamed: 0,0,1,2,3
0,249190890,B00300S,2020-05-01 00:10,9
1,249190890,B00300S,2020-05-01 00:20,91
2,249190890,B00300S,2020-05-01 00:30,9
3,249190890,B00300S,2020-05-01 00:40,9
4,249190890,B00300S,2020-05-01 00:50,9


# Manipulating IMGW data

In [13]:
# renaming columns name
df.rename(columns={0:"code", 1:"parameter", 2:'date_', 3:"value"}, inplace=True)
# changing ',' -> '.' to can convert string value to float
df['value'] = df['value'].str.replace(",", '.').astype(float)
df['date_'] = pd.to_datetime(df['date_'])
df['time'] = df['date_'].dt.time
df['date_'] = df['date_'].dt.date
df = df[['code', 'parameter', 'date_', 'time', 'value']]
df.head()

Unnamed: 0,code,parameter,date_,time,value
0,249190890,B00300S,2020-05-01,00:10:00,9.0
1,249190890,B00300S,2020-05-01,00:20:00,9.1
2,249190890,B00300S,2020-05-01,00:30:00,9.0
3,249190890,B00300S,2020-05-01,00:40:00,9.0
4,249190890,B00300S,2020-05-01,00:50:00,9.0


# Manipulating spatial data

In [14]:
shape_path = r"C:\Users\01150208\OneDrive - Politechnika Warszawska\Pliki_Kuby\Studia\III_ROK\PAG\PAG_II\projekt1\Dane\shapefile"

#  reading the spatial data (polygons and points)
meteo_station = gpd.read_file(os.path.join(shape_path, 'effacility.geojson'))

polska = gpd.read_file(os.path.join(shape_path, 'polska.shp'))

woj = gpd.read_file(os.path.join(shape_path, 'woj.shp'))
woj.to_crs(epsg=2180, inplace=True)

powiaty = gpd.read_file(os.path.join(shape_path, 'powiaty.shp'))
powiaty.to_crs(epsg=2180, inplace=True)

In [15]:
# Creating join tables wojewodztwo and meteo station
meteo_station_woj = meteo_station.sjoin(woj, how="inner", predicate='intersects')
meteo_station_woj = meteo_station_woj[['ifcid', 'name1', 'name_right', 'geometry']]
meteo_station_woj

Unnamed: 0,ifcid,name1,name_right,geometry
0,149180010,Krzyżanowice,śląskie,POINT (448926.076 236501.650)
1,149180020,Chałupki,śląskie,POINT (451760.530 228509.024)
2,149180030,Łaziska,śląskie,POINT (460035.038 228718.997)
3,149180040,Gołkowice,śląskie,POINT (463863.107 228846.279)
4,149180050,Zebrzydowice,śląskie,POINT (472228.166 223702.104)
...,...,...,...,...
2621,353210280,Mikołajki,warmińsko-mazurskie,POINT (670542.981 661386.619)
2631,354190160,Elbląg-Milejewo,warmińsko-mazurskie,POINT (535431.646 706703.399)
2632,354210185,Kętrzyn,warmińsko-mazurskie,POINT (654823.735 691792.564)
2645,453200010,Jezioro Dadaj,warmińsko-mazurskie,POINT (621631.792 669780.487)


In [16]:
# Creating join tables powiaty and meteo station
meteo_station_pow = meteo_station_woj.sjoin(powiaty, how="inner", predicate='intersects')
meteo_station_pow = meteo_station_pow[['ifcid', 'name1', 'name_right', 'geometry', 'name']]

In [17]:
# Creating full-info meteo station table --coord system: 1992/2180
meteo_station_pow.reset_index(inplace=True, drop=True)
col = meteo_station_pow.columns.tolist()
col = [*col[0:3], col[-1], col[-2]]
meteo_station_pow = meteo_station_pow[col]
meteo_station_pow.rename(columns={"name1":"name", "name_right":"voiv_name", "name":'pow_name'}, inplace=True)

meteo_station_92 = meteo_station_pow
meteo_station_92

Unnamed: 0,ifcid,name,voiv_name,pow_name,geometry
0,149180010,Krzyżanowice,śląskie,raciborski,POINT (448926.076 236501.650)
1,149180020,Chałupki,śląskie,raciborski,POINT (451760.530 228509.024)
2,150180040,Bojanów,śląskie,raciborski,POINT (439438.614 240707.396)
3,150180060,Racibórz-Miedonia,śląskie,raciborski,POINT (444953.005 250806.781)
4,150180090,Nędza,śląskie,raciborski,POINT (450766.450 254733.345)
...,...,...,...,...,...
2632,154220030,Małe Wronki,warmińsko-mazurskie,olecki,POINT (710989.508 688938.679)
2633,154220060,Olecko,warmińsko-mazurskie,olecki,POINT (729446.291 692014.088)
2634,253220010,Giże,warmińsko-mazurskie,olecki,POINT (722406.877 686099.237)
2635,254220070,Kowale Oleckie,warmińsko-mazurskie,olecki,POINT (723328.597 706231.324)


In [30]:
meteo_station_wgs = meteo_station_92.copy()
# changing coord system in geojson file to wgs84
meteo_station_wgs.to_crs(epsg=4326, inplace=True)
meteo_station_wgs['geometry'] = meteo_station_wgs['geometry'].apply(lambda x: shapely.geometry.mapping(x))
meteo_station_wgs

Unnamed: 0,ifcid,name,voiv_name,pow_name,geometry
0,149180010,Krzyżanowice,śląskie,raciborski,POINT (18.28722 49.99361)
1,149180020,Chałupki,śląskie,raciborski,POINT (18.32778 49.92194)
2,150180040,Bojanów,śląskie,raciborski,POINT (18.15417 50.03056)
3,150180060,Racibórz-Miedonia,śląskie,raciborski,POINT (18.22972 50.12194)
4,150180090,Nędza,śląskie,raciborski,POINT (18.31056 50.15778)
...,...,...,...,...,...
2632,154220030,Małe Wronki,warmińsko-mazurskie,olecki,POINT (22.22194 54.02167)
2633,154220060,Olecko,warmińsko-mazurskie,olecki,POINT (22.50556 54.04139)
2634,253220010,Giże,warmińsko-mazurskie,olecki,POINT (22.39389 53.99139)
2635,254220070,Kowale Oleckie,warmińsko-mazurskie,olecki,POINT (22.42278 54.17167)


## Handling meteo station data from MongoDB

In [49]:
meteo_station_wgs = pd.DataFrame.from_records(meteoStation.find(), exclude=['_id'])
meteo_station_wgs

Unnamed: 0,ifcid,name,voiv_name,pow_name,geometry
0,149180010,Krzyżanowice,śląskie,raciborski,"{'type': 'Point', 'coordinates': [18.287222, 4..."
1,149180020,Chałupki,śląskie,raciborski,"{'type': 'Point', 'coordinates': [18.327778, 4..."
2,150180040,Bojanów,śląskie,raciborski,"{'type': 'Point', 'coordinates': [18.154167, 5..."
3,150180060,Racibórz-Miedonia,śląskie,raciborski,"{'type': 'Point', 'coordinates': [18.229722, 5..."
4,150180090,Nędza,śląskie,raciborski,"{'type': 'Point', 'coordinates': [18.310556, 5..."
...,...,...,...,...,...
2632,154220030,Małe Wronki,warmińsko-mazurskie,olecki,"{'type': 'Point', 'coordinates': [22.221943999..."
2633,154220060,Olecko,warmińsko-mazurskie,olecki,"{'type': 'Point', 'coordinates': [22.505556, 5..."
2634,253220010,Giże,warmińsko-mazurskie,olecki,"{'type': 'Point', 'coordinates': [22.393889, 5..."
2635,254220070,Kowale Oleckie,warmińsko-mazurskie,olecki,"{'type': 'Point', 'coordinates': [22.422778, 5..."


In [50]:
# applying a lambda function to get x, y coordinates
meteo_station_wgs['lat'] = meteo_station_wgs.geometry.apply(lambda p: p['coordinates'][0])
meteo_station_wgs['lon'] = meteo_station_wgs.geometry.apply(lambda p: p['coordinates'][1])

In [51]:
meteo_station_wgs

Unnamed: 0,ifcid,name,voiv_name,pow_name,geometry,lat,lon
0,149180010,Krzyżanowice,śląskie,raciborski,"{'type': 'Point', 'coordinates': [18.287222, 4...",18.287222,49.993611
1,149180020,Chałupki,śląskie,raciborski,"{'type': 'Point', 'coordinates': [18.327778, 4...",18.327778,49.921944
2,150180040,Bojanów,śląskie,raciborski,"{'type': 'Point', 'coordinates': [18.154167, 5...",18.154167,50.030556
3,150180060,Racibórz-Miedonia,śląskie,raciborski,"{'type': 'Point', 'coordinates': [18.229722, 5...",18.229722,50.121944
4,150180090,Nędza,śląskie,raciborski,"{'type': 'Point', 'coordinates': [18.310556, 5...",18.310556,50.157778
...,...,...,...,...,...,...,...
2632,154220030,Małe Wronki,warmińsko-mazurskie,olecki,"{'type': 'Point', 'coordinates': [22.221943999...",22.221944,54.021667
2633,154220060,Olecko,warmińsko-mazurskie,olecki,"{'type': 'Point', 'coordinates': [22.505556, 5...",22.505556,54.041389
2634,253220010,Giże,warmińsko-mazurskie,olecki,"{'type': 'Point', 'coordinates': [22.393889, 5...",22.393889,53.991389
2635,254220070,Kowale Oleckie,warmińsko-mazurskie,olecki,"{'type': 'Point', 'coordinates': [22.422778, 5...",22.422778,54.171667


In [52]:
# merging two tables (meteo station's temperature data and spatial data)
data_station = pd.merge(df, meteo_station_wgs, left_on='code', right_on='ifcid')
# rearranging columns
data_station = data_station[['ifcid', 'parameter', 'date_', 'time', 'value', 'name', 'voiv_name', 'pow_name', 'geometry', 'lat', 'lon']]
data_station

Unnamed: 0,ifcid,parameter,date_,time,value,name,voiv_name,pow_name,geometry,lat,lon
0,249190890,B00300S,2020-05-01,00:10:00,9.0,Radziechowy,śląskie,żywiecki,"{'type': 'Point', 'coordinates': [19.155556, 4...",19.155556,49.648611
1,249190890,B00300S,2020-05-01,00:20:00,9.1,Radziechowy,śląskie,żywiecki,"{'type': 'Point', 'coordinates': [19.155556, 4...",19.155556,49.648611
2,249190890,B00300S,2020-05-01,00:30:00,9.0,Radziechowy,śląskie,żywiecki,"{'type': 'Point', 'coordinates': [19.155556, 4...",19.155556,49.648611
3,249190890,B00300S,2020-05-01,00:40:00,9.0,Radziechowy,śląskie,żywiecki,"{'type': 'Point', 'coordinates': [19.155556, 4...",19.155556,49.648611
4,249190890,B00300S,2020-05-01,00:50:00,9.0,Radziechowy,śląskie,żywiecki,"{'type': 'Point', 'coordinates': [19.155556, 4...",19.155556,49.648611
...,...,...,...,...,...,...,...,...,...,...,...
1045893,351160425,B00300S,2020-05-31,23:10:00,12.3,Wrocław,dolnośląskie,Wrocław,"{'type': 'Point', 'coordinates': [16.881667, 5...",16.881667,51.113611
1045894,351160425,B00300S,2020-05-31,23:20:00,12.3,Wrocław,dolnośląskie,Wrocław,"{'type': 'Point', 'coordinates': [16.881667, 5...",16.881667,51.113611
1045895,351160425,B00300S,2020-05-31,23:30:00,12.2,Wrocław,dolnośląskie,Wrocław,"{'type': 'Point', 'coordinates': [16.881667, 5...",16.881667,51.113611
1045896,351160425,B00300S,2020-05-31,23:40:00,12.0,Wrocław,dolnośląskie,Wrocław,"{'type': 'Point', 'coordinates': [16.881667, 5...",16.881667,51.113611


# Handling day/night function

In [53]:
def get_sun(date, lat, lon):
    meteo = LocationInfo(lat, lon)
    return sun(meteo.observer, date=date)

def map_get_sun(df):
    return pd.Series(map(get_sun, df['date_'], df['lat'], df['lon']))

def get_sunrise(s):
    return s["sunrise"]

def get_sunset(s):
    return s["sunset"]

def map_sun_prop(func, df):
    return pd.Series(map(func, df['Sun']))

def day_or_night(s, time):
    return s["sunrise"].time() < time < s["sunset"].time()

def map_day_or_night(df):
    return pd.Series(map(day_or_night, df['Sun'], df['time']))

In [54]:
import time
import datetime
from astral.sun import sun
from astral import LocationInfo

start = time.time()

mask = data_station.date_.diff() != datetime.timedelta()
temp = data_station[mask].copy()

temp['Sun'] = None
data_station['Sun'] = None

temp.reset_index(drop=False, inplace=True)
temp['Sun'] = map_get_sun(temp)
temp.set_index('index', inplace=True)

data_station['Sun'] = temp['Sun']
data_station['Sun'].fillna(method="ffill", inplace=True)

# data_station['sunrise'] = map_sun_prop(get_sunrise, data_station)
# data_station['sunset'] = map_sun_prop(get_sunset, data_station)
data_station['day'] = map_day_or_night(data_station)
stop = time.time()
print(stop - start)
data_station

2.810689926147461


Unnamed: 0,ifcid,parameter,date_,time,value,name,voiv_name,pow_name,geometry,lat,lon,Sun,day
0,249190890,B00300S,2020-05-01,00:10:00,9.0,Radziechowy,śląskie,żywiecki,"{'type': 'Point', 'coordinates': [19.155556, 4...",19.155556,49.648611,"{'dawn': 2020-05-01 03:52:38.138075+00:00, 'su...",False
1,249190890,B00300S,2020-05-01,00:20:00,9.1,Radziechowy,śląskie,żywiecki,"{'type': 'Point', 'coordinates': [19.155556, 4...",19.155556,49.648611,"{'dawn': 2020-05-01 03:52:38.138075+00:00, 'su...",False
2,249190890,B00300S,2020-05-01,00:30:00,9.0,Radziechowy,śląskie,żywiecki,"{'type': 'Point', 'coordinates': [19.155556, 4...",19.155556,49.648611,"{'dawn': 2020-05-01 03:52:38.138075+00:00, 'su...",False
3,249190890,B00300S,2020-05-01,00:40:00,9.0,Radziechowy,śląskie,żywiecki,"{'type': 'Point', 'coordinates': [19.155556, 4...",19.155556,49.648611,"{'dawn': 2020-05-01 03:52:38.138075+00:00, 'su...",False
4,249190890,B00300S,2020-05-01,00:50:00,9.0,Radziechowy,śląskie,żywiecki,"{'type': 'Point', 'coordinates': [19.155556, 4...",19.155556,49.648611,"{'dawn': 2020-05-01 03:52:38.138075+00:00, 'su...",False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1045893,351160425,B00300S,2020-05-31,23:10:00,12.3,Wrocław,dolnośląskie,Wrocław,"{'type': 'Point', 'coordinates': [16.881667, 5...",16.881667,51.113611,"{'dawn': 2020-05-31 03:03:27.381861+00:00, 'su...",False
1045894,351160425,B00300S,2020-05-31,23:20:00,12.3,Wrocław,dolnośląskie,Wrocław,"{'type': 'Point', 'coordinates': [16.881667, 5...",16.881667,51.113611,"{'dawn': 2020-05-31 03:03:27.381861+00:00, 'su...",False
1045895,351160425,B00300S,2020-05-31,23:30:00,12.2,Wrocław,dolnośląskie,Wrocław,"{'type': 'Point', 'coordinates': [16.881667, 5...",16.881667,51.113611,"{'dawn': 2020-05-31 03:03:27.381861+00:00, 'su...",False
1045896,351160425,B00300S,2020-05-31,23:40:00,12.0,Wrocław,dolnośląskie,Wrocław,"{'type': 'Point', 'coordinates': [16.881667, 5...",16.881667,51.113611,"{'dawn': 2020-05-31 03:03:27.381861+00:00, 'su...",False


In [55]:
data_station[data_station.day == True]

Unnamed: 0,ifcid,parameter,date_,time,value,name,voiv_name,pow_name,geometry,lat,lon,Sun,day
27,249190890,B00300S,2020-05-01,04:40:00,8.8,Radziechowy,śląskie,żywiecki,"{'type': 'Point', 'coordinates': [19.155556, 4...",19.155556,49.648611,"{'dawn': 2020-05-01 03:52:38.138075+00:00, 'su...",True
28,249190890,B00300S,2020-05-01,04:50:00,9.0,Radziechowy,śląskie,żywiecki,"{'type': 'Point', 'coordinates': [19.155556, 4...",19.155556,49.648611,"{'dawn': 2020-05-01 03:52:38.138075+00:00, 'su...",True
29,249190890,B00300S,2020-05-01,05:00:00,9.5,Radziechowy,śląskie,żywiecki,"{'type': 'Point', 'coordinates': [19.155556, 4...",19.155556,49.648611,"{'dawn': 2020-05-01 03:52:38.138075+00:00, 'su...",True
30,249190890,B00300S,2020-05-01,05:10:00,11.0,Radziechowy,śląskie,żywiecki,"{'type': 'Point', 'coordinates': [19.155556, 4...",19.155556,49.648611,"{'dawn': 2020-05-01 03:52:38.138075+00:00, 'su...",True
31,249190890,B00300S,2020-05-01,05:20:00,12.1,Radziechowy,śląskie,żywiecki,"{'type': 'Point', 'coordinates': [19.155556, 4...",19.155556,49.648611,"{'dawn': 2020-05-01 03:52:38.138075+00:00, 'su...",True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1045870,351160425,B00300S,2020-05-31,19:20:00,18.1,Wrocław,dolnośląskie,Wrocław,"{'type': 'Point', 'coordinates': [16.881667, 5...",16.881667,51.113611,"{'dawn': 2020-05-31 03:03:27.381861+00:00, 'su...",True
1045871,351160425,B00300S,2020-05-31,19:30:00,17.8,Wrocław,dolnośląskie,Wrocław,"{'type': 'Point', 'coordinates': [16.881667, 5...",16.881667,51.113611,"{'dawn': 2020-05-31 03:03:27.381861+00:00, 'su...",True
1045872,351160425,B00300S,2020-05-31,19:40:00,17.3,Wrocław,dolnośląskie,Wrocław,"{'type': 'Point', 'coordinates': [16.881667, 5...",16.881667,51.113611,"{'dawn': 2020-05-31 03:03:27.381861+00:00, 'su...",True
1045873,351160425,B00300S,2020-05-31,19:50:00,16.9,Wrocław,dolnośląskie,Wrocław,"{'type': 'Point', 'coordinates': [16.881667, 5...",16.881667,51.113611,"{'dawn': 2020-05-31 03:03:27.381861+00:00, 'su...",True


In [56]:
data_station = data_station[['ifcid', 'parameter', 'date_', 'time','value', 'name', 'voiv_name', 'pow_name', 'geometry', 'day']]

In [57]:
# Converting coordinates to shapely object
data_station['geometry'] = data_station['geometry'].apply(lambda x: shapely.geometry.Point(x['coordinates']))
data_stationgpd = gpd.GeoDataFrame(data_station, geometry='geometry')
data_stationgpd = data_stationgpd[['ifcid', 'parameter', 'date_', 'time','value', 'name', 'voiv_name', 'pow_name', 'geometry', 'day']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_station['geometry'] = data_station['geometry'].apply(lambda x: shapely.geometry.Point(x['coordinates']))


In [58]:
data_stationgpd[data_stationgpd.day == True]

Unnamed: 0,ifcid,parameter,date_,time,value,name,voiv_name,pow_name,geometry,day
27,249190890,B00300S,2020-05-01,04:40:00,8.8,Radziechowy,śląskie,żywiecki,POINT (19.15556 49.64861),True
28,249190890,B00300S,2020-05-01,04:50:00,9.0,Radziechowy,śląskie,żywiecki,POINT (19.15556 49.64861),True
29,249190890,B00300S,2020-05-01,05:00:00,9.5,Radziechowy,śląskie,żywiecki,POINT (19.15556 49.64861),True
30,249190890,B00300S,2020-05-01,05:10:00,11.0,Radziechowy,śląskie,żywiecki,POINT (19.15556 49.64861),True
31,249190890,B00300S,2020-05-01,05:20:00,12.1,Radziechowy,śląskie,żywiecki,POINT (19.15556 49.64861),True
...,...,...,...,...,...,...,...,...,...,...
1045870,351160425,B00300S,2020-05-31,19:20:00,18.1,Wrocław,dolnośląskie,Wrocław,POINT (16.88167 51.11361),True
1045871,351160425,B00300S,2020-05-31,19:30:00,17.8,Wrocław,dolnośląskie,Wrocław,POINT (16.88167 51.11361),True
1045872,351160425,B00300S,2020-05-31,19:40:00,17.3,Wrocław,dolnośląskie,Wrocław,POINT (16.88167 51.11361),True
1045873,351160425,B00300S,2020-05-31,19:50:00,16.9,Wrocław,dolnośląskie,Wrocław,POINT (16.88167 51.11361),True


In [59]:
from scipy import stats
trim = lambda x: stats.trim_mean(x, 0.1)
day = data_station.loc[data_station.day == True, ['ifcid', 'value']].groupby('ifcid').agg({"value": trim})
night = data_station.loc[data_station.day == False, ['ifcid', 'value']].groupby('ifcid').agg({"value": trim})
day_woj = data_station.loc[data_station.day == True, ['value', 'voiv_name']].groupby('voiv_name').agg({"value": trim})
day_woj_date = data_station.loc[data_station.day == True, ['value', 'voiv_name', 'date_']].groupby(['voiv_name', 'date_']).agg({"value": trim})
night_woj = data_station.loc[data_station.day == False, ['value', 'voiv_name']].groupby('voiv_name').agg({"value": trim})
day_pow = data_station.loc[data_station.day == True, ['value', 'pow_name']].groupby('pow_name').agg({"value": trim})
night_pow = data_station.loc[data_station.day == False, ['value', 'pow_name']].groupby('pow_name').agg({"value": trim})

In [131]:
day_woj_date

Unnamed: 0_level_0,Unnamed: 1_level_0,value
voiv_name,date_,Unnamed: 2_level_1
dolnośląskie,2020-05-01,13.068581
dolnośląskie,2020-05-02,9.990914
dolnośląskie,2020-05-03,9.573757
dolnośląskie,2020-05-04,11.883731
dolnośląskie,2020-05-05,7.674835
...,...,...
świętokrzyskie,2020-05-27,15.830129
świętokrzyskie,2020-05-28,15.745217
świętokrzyskie,2020-05-29,14.749032
świętokrzyskie,2020-05-30,11.129469


In [171]:
day_pow

Unnamed: 0_level_0,value
pow_name,Unnamed: 1_level_1
Białystok,12.583928
Bielsko-Biała,11.706269
Bydgoszcz,13.068109
Częstochowa,12.689944
Gdańsk,11.018174
...,...
łukowski,12.984107
Świnoujście,12.606784
świdnicki,13.437315
świebodziński,14.044508


# To do

# Value in pow per day

In [60]:
# wyszukiwanie wartości w danym dniu miesiąca w danym powiecie
date_pow = data_station[['value', 'pow_name', 'date_', 'day']].groupby(['pow_name', 'date_', 'day']).agg({"value": trim}).reset_index()

In [61]:
date_pow

Unnamed: 0,pow_name,date_,day,value
0,Białystok,2020-05-01,False,9.051111
1,Białystok,2020-05-01,True,12.383333
2,Białystok,2020-05-02,False,8.793182
3,Białystok,2020-05-02,True,13.847222
4,Białystok,2020-05-03,False,7.452273
...,...,...,...,...
9759,żywiecki,2020-05-29,True,8.637712
9760,żywiecki,2020-05-30,False,5.415132
9761,żywiecki,2020-05-30,True,7.721722
9762,żywiecki,2020-05-31,False,7.769457


# Value in pow per day and per 10-minutes

In [58]:
# wyszukiwanie wartości w danym dniu miesiąca w danym powiecie o określonej godzinie
time_pow = data_station[['value', 'pow_name', 'date_', 'time', 'day']].groupby(['pow_name', 'date_', 'day', 'time']).agg({"value": trim}).reset_index()

In [59]:
time_pow

Unnamed: 0,pow_name,date_,day,time,value
0,Białystok,2020-05-01,False,00:00:00,6.400
1,Białystok,2020-05-01,False,00:10:00,6.400
2,Białystok,2020-05-01,False,00:20:00,6.500
3,Białystok,2020-05-01,False,00:30:00,6.600
4,Białystok,2020-05-01,False,00:40:00,6.600
...,...,...,...,...,...
701170,żywiecki,2020-05-31,True,19:20:00,9.804
701171,żywiecki,2020-05-31,True,19:30:00,9.816
701172,żywiecki,2020-05-31,True,19:40:00,9.846
701173,żywiecki,2020-05-31,True,19:50:00,9.856


# Value in voiv per day

In [52]:
# wyszukiwanie wartości w danym dniu miesiąca w danym województwie
date_woj = data_station[['value', 'voiv_name', 'date_', 'day']].groupby(['voiv_name', 'date_', 'day']).agg({"value": trim}).reset_index()

In [53]:
date_woj

Unnamed: 0,voiv_name,date_,day,value
0,dolnośląskie,2020-05-01,False,9.742192
1,dolnośląskie,2020-05-01,True,13.068581
2,dolnośląskie,2020-05-02,False,7.041097
3,dolnośląskie,2020-05-02,True,9.990914
4,dolnośląskie,2020-05-03,False,5.655806
...,...,...,...,...
987,świętokrzyskie,2020-05-29,True,14.749032
988,świętokrzyskie,2020-05-30,False,8.853974
989,świętokrzyskie,2020-05-30,True,11.129469
990,świętokrzyskie,2020-05-31,False,9.969189


# Value in voiv per day per 10-minutes

In [50]:
# wyszukiwanie wartości w danym dniu miesiąca w danym województwie o określonej godzinie
time_woj = data_station[['value', 'voiv_name', 'date_', 'time', 'day']].groupby(['voiv_name', 'date_', 'time', 'day']).agg({"value": trim}).reset_index()

In [51]:
time_woj

Unnamed: 0,voiv_name,date_,time,day,value
0,dolnośląskie,2020-05-01,00:00:00,False,11.134667
1,dolnośląskie,2020-05-01,00:10:00,False,11.409412
2,dolnośląskie,2020-05-01,00:20:00,False,11.591250
3,dolnośląskie,2020-05-01,00:30:00,False,11.774375
4,dolnośląskie,2020-05-01,00:40:00,False,11.840000
...,...,...,...,...,...
71419,świętokrzyskie,2020-05-31,23:10:00,False,9.462500
71420,świętokrzyskie,2020-05-31,23:20:00,False,9.255000
71421,świętokrzyskie,2020-05-31,23:30:00,False,9.071250
71422,świętokrzyskie,2020-05-31,23:40:00,False,9.157500


In [None]:
# wyszukiwanie wartości średniej dla powiatów (godzina or dzien or miesiac)

# wyszukiwanie wartości średniej dla województw (godzina or dzien or miesiac)

# wizualizacja danych województwa

# wizualizacja danych powiaty

# MongoDB connection and querying

In [48]:
import pymongo

connection = pymongo.MongoClient("mongodb://localhost:27017/")
db = connection.pag
imgw = db.imgw
prod = db.prod
meteoStation = db.meteoStation

In [27]:
mongo_df = pd.DataFrame.from_records(prod.find())
mongo_df

Unnamed: 0,_id,kodSH,data,wartosc,name,wojewodztwo,powiat,loc,day
0,61af9ba2238b563422fb4c6f,249190890,2020-05-01 00:10:00,9.0,Radziechowy,śląskie,żywiecki,"{'type': 'Point', 'coordinates': [19.155556, 4...",False
1,61af9ba2238b563422fb4c70,249190890,2020-05-01 00:20:00,9.1,Radziechowy,śląskie,żywiecki,"{'type': 'Point', 'coordinates': [19.155556, 4...",False
2,61af9ba2238b563422fb4c71,249190890,2020-05-01 00:30:00,9.0,Radziechowy,śląskie,żywiecki,"{'type': 'Point', 'coordinates': [19.155556, 4...",False
3,61af9ba2238b563422fb4c72,249190890,2020-05-01 00:40:00,9.0,Radziechowy,śląskie,żywiecki,"{'type': 'Point', 'coordinates': [19.155556, 4...",False
4,61af9ba2238b563422fb4c73,249190890,2020-05-01 00:50:00,9.0,Radziechowy,śląskie,żywiecki,"{'type': 'Point', 'coordinates': [19.155556, 4...",False
...,...,...,...,...,...,...,...,...,...
1045893,61af9ba7238b5634220b41f4,351160425,2020-05-31 23:10:00,12.3,Wrocław,dolnośląskie,Wrocław,"{'type': 'Point', 'coordinates': [16.881667, 5...",False
1045894,61af9ba7238b5634220b41f5,351160425,2020-05-31 23:20:00,12.3,Wrocław,dolnośląskie,Wrocław,"{'type': 'Point', 'coordinates': [16.881667, 5...",False
1045895,61af9ba7238b5634220b41f6,351160425,2020-05-31 23:30:00,12.2,Wrocław,dolnośląskie,Wrocław,"{'type': 'Point', 'coordinates': [16.881667, 5...",False
1045896,61af9ba7238b5634220b41f7,351160425,2020-05-31 23:40:00,12.0,Wrocław,dolnośląskie,Wrocław,"{'type': 'Point', 'coordinates': [16.881667, 5...",False


In [109]:
# Exporting meteo_station data to DB
meteo_station = list(meteo_station_wgs[['ifcid','name','voiv_name','pow_name','geometry']].to_dict(orient='records'))
meteoStation.insert_many(meteo_station)
meteo_station

[{'ifcid': 149180010,
  'name': 'Krzyżanowice',
  'voiv_name': 'śląskie',
  'pow_name': 'raciborski',
  'geometry': {'type': 'Point', 'coordinates': (18.287222, 49.993611)},
  '_id': ObjectId('61d478ed1b9d75eabddc8569')},
 {'ifcid': 149180020,
  'name': 'Chałupki',
  'voiv_name': 'śląskie',
  'pow_name': 'raciborski',
  'geometry': {'type': 'Point', 'coordinates': (18.327778, 49.921944)},
  '_id': ObjectId('61d478ed1b9d75eabddc856a')},
 {'ifcid': 150180040,
  'name': 'Bojanów',
  'voiv_name': 'śląskie',
  'pow_name': 'raciborski',
  'geometry': {'type': 'Point', 'coordinates': (18.154167, 50.03055599999999)},
  '_id': ObjectId('61d478ed1b9d75eabddc856b')},
 {'ifcid': 150180060,
  'name': 'Racibórz-Miedonia',
  'voiv_name': 'śląskie',
  'pow_name': 'raciborski',
  'geometry': {'type': 'Point', 'coordinates': (18.229722, 50.12194399999999)},
  '_id': ObjectId('61d478ed1b9d75eabddc856c')},
 {'ifcid': 150180090,
  'name': 'Nędza',
  'voiv_name': 'śląskie',
  'pow_name': 'raciborski',
  'ge

In [None]:
imgw.find().count()

In [None]:
query = {'wartosc':9}
imgw.find(query)

# Redis connection and querying

In [62]:
import redis
pool = redis.ConnectionPool(host='127.0.0.1', port=6379, db=0, decode_responses=True)
db_red = redis.StrictRedis(connection_pool=pool)

In [67]:
keys = [str(key) for key in date_pow.iloc[:, :-1].to_dict('records')]
keys

["{'pow_name': 'Białystok', 'date_': datetime.date(2020, 5, 1), 'day': False}",
 "{'pow_name': 'Białystok', 'date_': datetime.date(2020, 5, 1), 'day': True}",
 "{'pow_name': 'Białystok', 'date_': datetime.date(2020, 5, 2), 'day': False}",
 "{'pow_name': 'Białystok', 'date_': datetime.date(2020, 5, 2), 'day': True}",
 "{'pow_name': 'Białystok', 'date_': datetime.date(2020, 5, 3), 'day': False}",
 "{'pow_name': 'Białystok', 'date_': datetime.date(2020, 5, 3), 'day': True}",
 "{'pow_name': 'Białystok', 'date_': datetime.date(2020, 5, 4), 'day': False}",
 "{'pow_name': 'Białystok', 'date_': datetime.date(2020, 5, 4), 'day': True}",
 "{'pow_name': 'Białystok', 'date_': datetime.date(2020, 5, 5), 'day': False}",
 "{'pow_name': 'Białystok', 'date_': datetime.date(2020, 5, 5), 'day': True}",
 "{'pow_name': 'Białystok', 'date_': datetime.date(2020, 5, 6), 'day': False}",
 "{'pow_name': 'Białystok', 'date_': datetime.date(2020, 5, 6), 'day': True}",
 "{'pow_name': 'Białystok', 'date_': datetime.

In [72]:
values = date_pow.iloc[['value']].to_dict('records')
values

ValueError: invalid literal for int() with base 10: 'value'

# Neo4j connection and querying

In [5]:
from neo4j import GraphDatabase

driver = GraphDatabase.driver("bolt://localhost:7687", auth=('neo4j', 'test'))
session = driver.session()

In [6]:
query = "CREATE (warszawa:miasto {nazwa:'Warszawa'}), (mazowieckie:woj {nazwa:'Mazowieckie'}), (warszawa) -[:JEST_W]->(mazowieckie)"
session.run(query)

<neo4j.work.result.Result at 0x209147a2190>

In [4]:
session.close

<bound method Session.close of <neo4j.work.simple.Session object at 0x000002091476D910>>