# Marking up the residential registry with updates from **Public data**

# Install dependencies

In [1]:
!pip install git+https://github.com/Valueguard-Index-Sweden/valueguard-python-client#egg=valueguard
!pip install tqdm

Collecting valueguard
  Cloning https://github.com/Valueguard-Index-Sweden/valueguard-python-client to /tmp/pip-install-n6rm98rw/valueguard_b4eb0a17a30c482cb3f04a2b48efc3f6
  Running command git clone -q https://github.com/Valueguard-Index-Sweden/valueguard-python-client /tmp/pip-install-n6rm98rw/valueguard_b4eb0a17a30c482cb3f04a2b48efc3f6


## Import libraries 

- **valueguard** our official client to retrive data from the API
- **tqdm** for a easily understandable progressbar
- **pandas** to work with the data in python
- **math** to calcualate the ceiling of pages needed
- **time** to time the functions

In [2]:
import valueguard
from tqdm import tqdm
import pandas as pd
import math
import time
import datetime
import os
from sqlalchemy import create_engine

## Display Settings

In [3]:
pd.set_option('display.max_columns', None)

### Create Client

In [4]:
vgClient = valueguard.Client()

## Get credentials 

In [5]:
valueguard_username = os.getenv('VALUEGUARD_API_USERNAME')
valueguard_password = os.getenv('VALUEGUARD_API_PASSWORD')

## Login to Valueguard

In [6]:
vgClient.authenticate(valueguard_username,valueguard_password)

## Get the residential registry data

In [7]:
residential_registry_df = pd.read_csv("../data/residential_registry.csv")

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


## Set the id field to index

In [8]:
residential_registry_df.set_index('id', inplace=True) # Set the id in the data to the id of the dataframe

## Get the max updated at value

In [9]:
residential_registry_updated_at_max = residential_registry_df['updated_at'].max()

## Settings for the **residential registry markups**

In [10]:
residential_registry_markups_search_criteria={
    'updated_at_min': residential_registry_updated_at_max,
    'updated_from':"public_data"
}

residential_registry_markups_total_nr_records = vgClient.residential_registry_markups(offset=0, limit=0, search_criteria=residential_registry_markups_search_criteria)['meta_data']['total_nr_records']
page_size = 50000

pages = math.ceil(residential_registry_markups_total_nr_records/page_size)

### New rows to update registry with

In [11]:
print(residential_registry_markups_total_nr_records)

32147


## Adding the markups to the **residential registry**

In [12]:
for page_nr in tqdm(range(pages)):
    for residenetial_registry_markup in vgClient.residential_registry_markups(offset=page_nr*page_size, limit=page_size, search_criteria=residential_registry_markups_search_criteria)['residences']:
        id_to_update = residenetial_registry_markup['id']
        for column_to_update in residenetial_registry_markup.keys():
            if column_to_update != "id":
                residential_registry_df.at[int(id_to_update),column_to_update] = residenetial_registry_markup[column_to_update]

100%|██████████| 1/1 [00:53<00:00, 53.80s/it]


### Save data

### File (CSV)

In [13]:
residential_registry_df.to_csv("../data/residential_registry_with_updates.csv")

### MySql (Database)

In [14]:
# Credentials to database connection
hostname="localhost"
dbname="mydb_name"
uname="my_user_name"
pwd="my_password"


# Create SQLAlchemy engine to connect to MySQL Database
engine = create_engine("mysql+pymysql://{user}:{pw}@{host}/{db}".format(host=hostname, db=dbname, user=uname, pw=pwd))

# Convert dataframe to sql table                                   
# --> residential_registry_df.to_sql('residential_registry_with_updates', engine, index=False)