In [1]:
import requests
import pandas as pd
import time

# Seminar - APIs and real-life coding

# Task 1: Requesting API
# 1a. Create a function requesting data from sreality

```python
base_url = 'https://www.sreality.cz/api/cs/v2/estates?category_main_cb=1&category_type_cb=1&locality_region_id=10&per_page60&page={}'.format(
    i)

r = requests.get(base_url)
d = r.json()
```

* function should parametrize:
    * `category_main_cb` - `{'flat': 1, 'house': 2, 'land': 3}`
    * `category_type_cb` - `{'sell': 1, 'rent': 2}`
    * `locality_region_id` - `{'Praha': 10, 'Brno': 14}`
    * `page` parameter
* use string inputs for `category_main_cb` and `category_type_cb`
* test the validity of inputs
* include try/except clause to handle errors
* function should return JSON data in python types
* do not forget to sleep each request at least 0.5s


In [4]:
def sreality(category_main, category_type, category_region, page):
    mains = {'flat':1, 'house':2, 'land':3}
    type = {"sell":1, "rent":2}
    location = {"Praha":10, "Brno":14}

    base_url = f'https://www.sreality.cz/api/cs/v2/estates?category_main_cb={mains[category_main]}&category_type_cb={type[category_type]}&locality_region_id={location[category_region]}&per_page60&page={page}'

    try:
        r = requests.get(base_url)
        if r.ok:
            return r.json()
    except:
        print("There was an erron in request.")

sreality("house", "rent", "Praha", 1)

{'meta_description': '236 realit v nabídce pronájem domů a nemovitostí Praha. Vyberte si novou nemovitost na sreality.cz s hledáním na mapě a velkými náhledy fotografií nabízených domů a nemovitostí.',
 'result_size': 236,
 '_embedded': {'estates': [{'labelsReleased': [['new_building'],
     ['school', 'drugstore']],
    'has_panorama': 0,
    'labels': ['Novostavba', 'Škola 1 min. pěšky', 'Lékárna 3 min. pěšky'],
    'is_auction': False,
    'labelsAll': [['new_building', 'garage'],
     ['playground',
      'small_shop',
      'vet',
      'tavern',
      'theater',
      'candy_shop',
      'movies',
      'atm',
      'tram',
      'sports',
      'kindergarten',
      'train',
      'restaurant',
      'bus_public_transport',
      'shop',
      'school',
      'drugstore',
      'medic',
      'post_office',
      'metro']],
    'seo': {'category_main_cb': 2,
     'category_sub_cb': 39,
     'category_type_cb': 2,
     'locality': 'praha-praha-nebusice-'},
    'exclusively_at_rk'

### 1b. Create a function converting sreality json data into pandas dataframe

In [5]:
def sreality_to_json(sreality_json):
    return pd.json_normalize(sreality_json)

### 1c. link function `1b` into function `1a`

In [None]:
def sreality_to_json(reality):
    def sreality(category_main, category_type, category_region, page):
        mains = {'flat':1, 'house':2, 'land':3}
        type = {"sell":1, "rent":2}
        location = {"Praha":10, "Brno":14}

        base_url = f'https://www.sreality.cz/api/cs/v2/estates?category_main_cb={mains[category_main]}&category_type_cb={type[category_type]}&locality_region_id={location[category_region]}&per_page60&page={page}'

        try:
            r = requests.get(base_url)
            if r.ok:
                return r.json()
        except:
            print("There was an erron in request.")

    sreality("house", "rent", "Praha", 1)
    return pd.json_normalize(sreality_json)

### 1c. Combining multiple requests into single df

* Function should parametrize:
    * `start_page` and `end_page`
    * request parameters
* construct a list of individual request dfs
* then feed it into `pd.concat` function

## Task 2: Cleaning data

### 2a. Filter columns
* filter only columns: `['locality', 'price', 'name', 'gps','hash_id','labelsAll','exclusively_at_rk']`
* use `.copy()` to avoid `SettingWithCopyWarning` later


### 2b: GPS
* Convert dictionary in `gps` column into two columns - `lat` and `lon`
* use apply function on gps column
* Note apply can return multiple columns

### 2b. Get flat type from name
* Name is always represented by string `Prodej bytu [type of flat] [Area] m^2`
* try picking third word in string
* check meaningfulness using `.value_counts()`

In [None]:
def get_third_word(x):
    return x.split(" ")[2]

clean["name"].apply(get_third_word)

### 2c. Get area from name
* Naive: select the word before last word
* Then try navigating using the index of `'m²'`
* if this also fail, then you will need to use regex

## Task 3 (Homework): Convert column`labelsAll` into boolean variables

### Task 3a. Get all possible label names
* deal with nested-list structure
* Hint: try sum the whole column to get a nested list of lists.
* Then flatten the nested list (2D to 1D)
* Finally keep only unique elements


### 4b. Test existence of label `cellar` for offers
* again deal with nested list of list structure
* write generic function `test_existence_of_label(offer_labels,label)`

### 4c. Test existence of all possible labels
* use apply returning series with all labels