In [68]:
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""
created: 2020-03-22
updated:
author: Rafal
description: import data from lines of json to pandas
"""
import os
import pathlib
import json
import pandas as pd

ROOT_PATH = pathlib.Path().absolute()
FILES_PATH = os.path.join(ROOT_PATH, '../../rafalr/files')

airly_file = os.path.join(FILES_PATH, 'klobuck_szkolna_airly.json')
print(airly_file)

class Airly:
    debug = 0  # useful for development
    columns = {'from':str, 'to':str, 'pm1':float, 'pm25':float, 'pm10':float, 'press':float, 'humid':float, 'temp':float}  # dict of measurements

    def __init__(self, airly_file: str):
        self._airly_file = airly_file
        self._measurements_list = []
        if not Airly.debug:
            return None

        print(self._airly_file)

    def _24hours_history(self, json_string: str) -> list:
        one_day_measure = json.loads(json_string)
        history_list_group = one_day_measure['history']
        history_list = []
        for one_measure in history_list_group:
            values = one_measure['values']
            di_name_value = {}
            for pair in values:
                di_name_value[pair['name']] = pair['value']
                
            li = [
                one_measure['fromDateTime'] , #.astype(str),
                one_measure['tillDateTime'], #.astype(str),
                di_name_value['PM1'],
                di_name_value['PM10'],
                di_name_value['PM25'],
                di_name_value['PRESSURE'],
                di_name_value['HUMIDITY'],
                di_name_value['TEMPERATURE'],
            ]
            history_list.append(li)
        return history_list

    def _json_rows2array(self):
        max_process = 10000000
        if Airly.debug:
            max_process = Airly.debug
        counter = 0
        error_counter = 0
        self._measurements_list = []
        with open(self._airly_file) as f:
            for line in f:
                counter += 1
                if counter > max_process:
                    break
                try:
                    self._measurements_list.extend(self._24hours_history(line))
                except Exception:
                    error_counter += 1
                    # print(f'error in line: {counter}')
                    # print(line)
        print(f'Erorr counter = {error_counter}')
        return self._measurements_list

    def _create_df(self) -> pd.DataFrame:
        df = pd.DataFrame()
        df = df.append(pd.DataFrame(self._measurements_list, columns=Airly.columns), ignore_index=True)
        return df

    def import2df(self):
        self._measurements_list = self._json_rows2array()
        result = self._create_df()
        return result


klobuck_file = Airly(airly_file)
df = klobuck_file.import2df()
df

/Users/rafalroman/PycharmProjects/python_data_products_workshop/rafalr/jupyter/../../rafalr/files/klobuck_szkolna_airly.json
Erorr counter = 57


Unnamed: 0,from,to,pm1,pm25,pm10,press,humid,temp
0,2018-11-15T06:00:00Z,2018-11-15T07:00:00Z,17.76,47.25,28.26,1031.99,97.58,5.52
1,2018-11-15T07:00:00Z,2018-11-15T08:00:00Z,17.15,45.84,27.45,1032.25,96.25,6.03
2,2018-11-15T08:00:00Z,2018-11-15T09:00:00Z,14.66,39.69,23.74,1032.27,93.25,7.02
3,2018-11-15T09:00:00Z,2018-11-15T10:00:00Z,10.44,25.81,16.91,1032.26,96.25,7.52
4,2018-11-15T10:00:00Z,2018-11-15T11:00:00Z,9.19,17.71,13.01,1032.11,82.92,8.92
...,...,...,...,...,...,...,...,...
10459,2020-03-22T01:00:00.000Z,2020-03-22T02:00:00.000Z,6.01,12.75,8.92,1028.24,62.08,-1.95
10460,2020-03-22T02:00:00.000Z,2020-03-22T03:00:00.000Z,4.90,10.43,7.33,1028.24,71.59,-2.59
10461,2020-03-22T03:00:00.000Z,2020-03-22T04:00:00.000Z,5.55,11.63,8.22,1028.46,81.48,-2.97
10462,2020-03-22T04:00:00.000Z,2020-03-22T05:00:00.000Z,7.49,15.70,11.19,1028.75,85.25,-2.97


In [69]:
df.where(df['from'].between('2019-01-02','2019-01-04'))

Unnamed: 0,from,to,pm1,pm25,pm10,press,humid,temp
0,,,,,,,,
1,,,,,,,,
2,,,,,,,,
3,,,,,,,,
4,,,,,,,,
...,...,...,...,...,...,...,...,...
10459,,,,,,,,
10460,,,,,,,,
10461,,,,,,,,
10462,,,,,,,,


In [71]:
df.dtypes

from      object
to        object
pm1      float64
pm25     float64
pm10     float64
press    float64
humid    float64
temp     float64
dtype: object