In [30]:
import pandas as pd
import json
from pathlib import Path
from datetime import datetime

In [3]:
def load_json(file_name):
    with open(file_name) as fp:
        data = json.load(fp)
        return data
    
    return {}
    

In [12]:
def parse_json(d):
    return {
        "timestamp": d.get('dt'),
        "temp": d.get("main", {}).get('temp'),
        "temp_feels": d.get("main", {}).get('feels_like'),
        "temp_min": d.get("main", {}).get('temp_min'),
        "temp_max": d.get("main", {}).get('temp_max'),
        "pressure": d.get("main", {}).get('pressure'),
        "humidity": d.get("main", {}).get('humidity'),
        "wind_speed": d.get('wind',{}).get('speed'),
    }


In [13]:
dane_z_pliku = load_json("dane/pogoda/json/1756678333.json")

In [14]:
dane_z_pliku

{'coord': {'lon': 20.9583, 'lat': 52.331},
 'weather': [{'id': 800,
   'main': 'Clear',
   'description': 'clear sky',
   'icon': '01n'}],
 'base': 'stations',
 'main': {'temp': 19.12,
  'feels_like': 19.31,
  'temp_min': 17.79,
  'temp_max': 20.62,
  'pressure': 1012,
  'humidity': 85,
  'sea_level': 1012,
  'grnd_level': 1003},
 'visibility': 10000,
 'wind': {'speed': 3.6, 'deg': 20},
 'clouds': {'all': 0},
 'dt': 1756678333,
 'sys': {'type': 2,
  'id': 2000992,
  'country': 'PL',
  'sunrise': 1756698454,
  'sunset': 1756747514},
 'timezone': 7200,
 'id': 7533497,
 'name': 'Gmina Łomianki',
 'cod': 200}

In [15]:
parse_json(dane_z_pliku)

{'timestamp': 1756678333,
 'temp': 19.12,
 'temp_feels': 19.31,
 'temp_min': 17.79,
 'temp_max': 20.62,
 'pressure': 1012,
 'humidity': 85,
 'wind_speed': 3.6}

In [17]:
data_dir = Path("dane/pogoda/json/") 
data_dir

WindowsPath('dane/pogoda/json')

In [21]:
list(data_dir.glob("*.json"))[:10]

[WindowsPath('dane/pogoda/json/1756677250.json'),
 WindowsPath('dane/pogoda/json/1756677371.json'),
 WindowsPath('dane/pogoda/json/1756677642.json'),
 WindowsPath('dane/pogoda/json/1756677793.json'),
 WindowsPath('dane/pogoda/json/1756678034.json'),
 WindowsPath('dane/pogoda/json/1756678184.json'),
 WindowsPath('dane/pogoda/json/1756678275.json'),
 WindowsPath('dane/pogoda/json/1756678333.json'),
 WindowsPath('dane/pogoda/json/1756678516.json'),
 WindowsPath('dane/pogoda/json/1756678847.json')]

In [22]:
## wszystko razem


data_dir = Path("dane/pogoda/json/") 
data_list = []

for file in data_dir.glob("*.json"):
    raw_data = load_json(file)
    parsed_data = parse_json(raw_data)
    data_list.append(parsed_data)
    

In [23]:
len(data_list)

4560

In [24]:
data_list[:5]

[{'timestamp': 1756677250,
  'temp': 19.64,
  'temp_feels': 19.93,
  'temp_min': 18.15,
  'temp_max': 20.77,
  'pressure': 1013,
  'humidity': 87,
  'wind_speed': 3.09},
 {'timestamp': 1756677371,
  'temp': 19.48,
  'temp_feels': 19.7,
  'temp_min': 18.15,
  'temp_max': 20.65,
  'pressure': 1013,
  'humidity': 85,
  'wind_speed': 3.09},
 {'timestamp': 1756677642,
  'temp': 19.48,
  'temp_feels': 19.7,
  'temp_min': 18.15,
  'temp_max': 20.65,
  'pressure': 1013,
  'humidity': 85,
  'wind_speed': 3.09},
 {'timestamp': 1756677793,
  'temp': 19.45,
  'temp_feels': 19.67,
  'temp_min': 18.15,
  'temp_max': 20.65,
  'pressure': 1013,
  'humidity': 85,
  'wind_speed': 3.09},
 {'timestamp': 1756678034,
  'temp': 19.35,
  'temp_feels': 19.56,
  'temp_min': 18.15,
  'temp_max': 20.65,
  'pressure': 1012,
  'humidity': 85,
  'wind_speed': 3.6}]

In [25]:
data_df = pd.DataFrame(data_list)

In [26]:
data_df

Unnamed: 0,timestamp,temp,temp_feels,temp_min,temp_max,pressure,humidity,wind_speed
0,1756677250,19.64,19.93,18.15,20.77,1013,87,3.09
1,1756677371,19.48,19.70,18.15,20.65,1013,85,3.09
2,1756677642,19.48,19.70,18.15,20.65,1013,85,3.09
3,1756677793,19.45,19.67,18.15,20.65,1013,85,3.09
4,1756678034,19.35,19.56,18.15,20.65,1012,85,3.60
...,...,...,...,...,...,...,...,...
4555,1757329563,25.17,25.00,24.12,26.27,1018,48,6.17
4556,1757329667,25.21,25.04,24.15,26.30,1018,48,6.17
4557,1757329758,25.31,25.15,24.15,26.67,1018,48,6.17
4558,1757329788,25.31,25.15,24.15,26.67,1018,48,6.17


## przekonwertowanie timestamp na datę i czas

In [33]:
pd.to_datetime(datetime.fromtimestamp(1756677250))

Timestamp('2025-08-31 23:54:10')

In [34]:
datetime.fromtimestamp(1756677250)

datetime.datetime(2025, 8, 31, 23, 54, 10)

In [37]:
data_df['datetime'] = data_df['timestamp'].apply(   lambda x: pd.to_datetime(datetime.fromtimestamp(x))  )

In [38]:
data_df

Unnamed: 0,timestamp,temp,temp_feels,temp_min,temp_max,pressure,humidity,wind_speed,datetime
0,1756677250,19.64,19.93,18.15,20.77,1013,87,3.09,2025-08-31 23:54:10
1,1756677371,19.48,19.70,18.15,20.65,1013,85,3.09,2025-08-31 23:56:11
2,1756677642,19.48,19.70,18.15,20.65,1013,85,3.09,2025-09-01 00:00:42
3,1756677793,19.45,19.67,18.15,20.65,1013,85,3.09,2025-09-01 00:03:13
4,1756678034,19.35,19.56,18.15,20.65,1012,85,3.60,2025-09-01 00:07:14
...,...,...,...,...,...,...,...,...,...
4555,1757329563,25.17,25.00,24.12,26.27,1018,48,6.17,2025-09-08 13:06:03
4556,1757329667,25.21,25.04,24.15,26.30,1018,48,6.17,2025-09-08 13:07:47
4557,1757329758,25.31,25.15,24.15,26.67,1018,48,6.17,2025-09-08 13:09:18
4558,1757329788,25.31,25.15,24.15,26.67,1018,48,6.17,2025-09-08 13:09:48


In [40]:
data_df.to_csv("dane/pogoda/pogoda.csv", index=False)

In [41]:
data_df['datetime'].apply(lambda dt: dt.date())

0       2025-08-31
1       2025-08-31
2       2025-09-01
3       2025-09-01
4       2025-09-01
           ...    
4555    2025-09-08
4556    2025-09-08
4557    2025-09-08
4558    2025-09-08
4559    2025-09-08
Name: datetime, Length: 4560, dtype: object

In [42]:
data_df['date'] = data_df['datetime'].apply(lambda dt: dt.date())
data_df

Unnamed: 0,timestamp,temp,temp_feels,temp_min,temp_max,pressure,humidity,wind_speed,datetime,date
0,1756677250,19.64,19.93,18.15,20.77,1013,87,3.09,2025-08-31 23:54:10,2025-08-31
1,1756677371,19.48,19.70,18.15,20.65,1013,85,3.09,2025-08-31 23:56:11,2025-08-31
2,1756677642,19.48,19.70,18.15,20.65,1013,85,3.09,2025-09-01 00:00:42,2025-09-01
3,1756677793,19.45,19.67,18.15,20.65,1013,85,3.09,2025-09-01 00:03:13,2025-09-01
4,1756678034,19.35,19.56,18.15,20.65,1012,85,3.60,2025-09-01 00:07:14,2025-09-01
...,...,...,...,...,...,...,...,...,...,...
4555,1757329563,25.17,25.00,24.12,26.27,1018,48,6.17,2025-09-08 13:06:03,2025-09-08
4556,1757329667,25.21,25.04,24.15,26.30,1018,48,6.17,2025-09-08 13:07:47,2025-09-08
4557,1757329758,25.31,25.15,24.15,26.67,1018,48,6.17,2025-09-08 13:09:18,2025-09-08
4558,1757329788,25.31,25.15,24.15,26.67,1018,48,6.17,2025-09-08 13:09:48,2025-09-08


In [45]:
# podzielenie dużej ramki na mniejsze pliki xls

for g, gdf in data_df.groupby('date'):
    fname = f"dane/pogoda/{g}-daily.xlsx"
    gdf.to_excel(fname, index=False)