# Tips from GCP training

In [1]:
# %%writefile "path"/"script_name".py - create a script file or other file with the cell content

In [None]:
# %%bash - commnad line in the cell

## Imports

In [2]:
import numpy as np
import re
import pandas as pd
from config import config
from get_offers_by_city import connect

## Create Connection to de_rent Database

In [3]:
conn = connect(config())

Connecting to database...
Connection successful


## Load data from database

In [4]:
query = '''
    SELECT * FROM all_offers_infos
'''

cursor = conn.cursor()
cursor.execute(query)
result = cursor.fetchall()

In [6]:
df = pd.DataFrame(result, columns=['offer_id', 'extraction_date', 'city', 'city_code', 'offer_type', 'lat_lng', 'offer_infos'])

In [10]:
df['offer_infos'][3]

'{"","\\"area\\":40,\\"mobex\\":true,\\"zip\\":\\"01069\\",\\"objectcat\\":\\"Wohnung\\",\\"pers\\":\\"1342334\\",\\"rooms\\":2,\\"buildyear\\":2021,\\"fed\\":\\"Sachsen\\",\\"city\\":\\"Dresden\\",\\"obcon\\":\\"Erstbezug\\",\\"obcat\\":\\"Etagenwohnung\\",\\"balcn\\":false,\\"pic\\":\\"https://i.immonet.de/41/33/15/741413315_156x88.jpg\\",\\"title\\":\\"Zwischen Elbe und Großem Garten - Neubau 2-Zimmer-Wohnung\\",\\"kitch\\":false,\\"marketingtype\\":\\"Miete\\",\\"bank\\":false,\\"obtyp\\":\\"Wohnen\\",\\"prrng\\":\\"0-500\\",\\"price\\":420,\\"gardn\\":false,\\"state\\":\\"Deutschland\\""}'

In [8]:
df['offer_infos'][0]

'{"","\\"area\\":50.35,\\"mobex\\":true,\\"zip\\":\\"01099\\",\\"objectcat\\":\\"Wohnung\\",\\"pers\\":\\"622951\\",\\"rooms\\":2,\\"fed\\":\\"Sachsen\\",\\"city\\":\\"Dresden\\",\\"obcat\\":\\"Wohnung\\",\\"balcn\\":true,\\"pic\\":\\"https://i.immonet.de/53/35/27/739533527_156x104.jpg\\",\\"title\\":\\"Mit schönem Blick vom großzügigem Balkon! Helle Wohnung im Haus mit Aufzug und TG-Stellplatz!\\",\\"kitch\\":false,\\"marketingtype\\":\\"Miete\\",\\"bank\\":false,\\"obtyp\\":\\"Wohnen\\",\\"prrng\\":\\"0-500\\",\\"price\\":430,\\"gardn\\":false,\\"state\\":\\"Deutschland\\""}'

## Data Cleaning

### Cleaning one offer

#### Lat - Lng

In [12]:
# look to the information
df['lat_lng'][0]

'{"{lat: 51.06671,lng: 13.78576}"}'

In [20]:
# Separate into latitude (lat) and longitude (lng)
df['lat'] = df['lat_lng'].apply(lambda x: re.findall('\d+.\d+', x)[0])
df['lng'] = df['lat_lng'].apply(lambda x: re.findall('\d+.\d+', x)[1])

In [34]:
# drop original lat_lng column
df.drop(columns='lat_lng', inplace=True)

#### Separate infos from offer_infos

In [35]:
df.head()

Unnamed: 0,offer_id,extraction_date,city,city_code,offer_type,offer_infos,lat,lng
0,45701120,2021-10-31 08:03:29,Dresden,100051,wohnung,"{"""",""\""area\"":50.35,\""mobex\"":true,\""zip\"":\""0...",51.06671,13.78576
1,45780995,2021-10-31 08:03:29,Dresden,100051,wohnung,"{"""",""\""area\"":53,\""mobex\"":true,\""zip\"":\""0106...",51.03056,13.72469
2,45131783,2021-10-31 08:03:29,Dresden,100051,wohnung,"{"""",""\""area\"":26.5,\""mobex\"":true,\""zip\"":\""01...",51.074138696955245,13.740054768070925
3,45776903,2021-10-31 08:03:29,Dresden,100051,wohnung,"{"""",""\""area\"":40,\""mobex\"":true,\""zip\"":\""0106...",51.04843,13.75065
4,44552201,2021-10-31 08:03:29,Dresden,100051,wohnung,"{"""",""\""area\"":80,\""mobex\"":true,\""zip\"":\""0115...",51.0442,13.70739


In [161]:
# Get all data in "JASON" format for each offer and put it into a dictionary
# and than into a list to create a now separated dataframe with the main informations.

df_list = []

for x in range(len(df)):
    infos_dict = {}
    
    b = df['offer_infos'][x].replace('\\', '')
    b = b.replace('{', '').replace('}', '')[4:]
    b = b[:-1]
    b = b.split(',')
    for i in b:
        try:
            c, d = i.split(':')
        except:
            pass
        infos_dict[c] = d
        #print(i)
    df_list.append(infos_dict)

# create a dataframe with all the main infos.
df_sep = pd.DataFrame(df_list)
df_sep.drop(columns=['"obtyp"', '"prrng"', '"pic"', ' Bestlage', ' unterm Dach', ' Schandauer Straße', 
                     ' Leipziger Straße',' im Mansardgeschoss', ' Fröbelstraße', 
                     ' mittendrin', ' Ermelstraße',' Behrischstraße', ' in zweiter Reihe', 
                     ' ruhige Seitenstraße'], inplace=True)
# Rename columns
df_sep.columns = ['area', 'furnished', 'zip_code', 'category', 'pers', 'num_rooms', 'state', 'city',
                  'sub_category', 'balcon', 'offer_title', 'kitchen', 'offer_mkting_type',
                  'bank', 'rent_price', 'garten', 'country', 'build_year', 'offer_condition',
                  'heating']

# Convert False and True into 1 and 0

# remove "" from strings


In [162]:
df_sep.columns

Index(['area', 'furnished', 'zip_code', 'category', 'pers', 'num_rooms',
       'state', 'city', 'sub_category', 'balcon', 'offer_title', 'kitchen',
       'offer_mkting_type', 'bank', 'rent_price', 'garten', 'country',
       'build_year', 'offer_condition', 'heating'],
      dtype='object')

In [163]:
df_sep.head()

Unnamed: 0,area,furnished,zip_code,category,pers,num_rooms,state,city,sub_category,balcon,offer_title,kitchen,offer_mkting_type,bank,rent_price,garten,country,build_year,offer_condition,heating
0,50.35,True,"""01099""","""Wohnung""","""622951""",2,"""Sachsen""","""Dresden""","""Wohnung""",True,"""Mit schönem Blick vom großzügigem Balkon! Hel...",False,"""Miete""",False,430,False,"""Deutschland""",,,
1,53.0,True,"""01069""","""Wohnung""","""2197260""",2,"""Sachsen""","""Dresden""","""Erdgeschosswohnung""",False,,False,"""Miete""",False,499,False,"""Deutschland""",1926.0,"""Teil- oder vollsaniert""","""Zentralheizung"""
2,26.5,True,"""01097""","""Wohnung""","""240880""",1,"""Sachsen""","""Dresden""","""Wohnung""",True,"""Willkommen in der attraktiven HafenCity! Erst...",False,"""Miete""",False,290,False,"""Deutschland""",2021.0,"""Erstbezug""",
3,40.0,True,"""01069""","""Wohnung""","""1342334""",2,"""Sachsen""","""Dresden""","""Etagenwohnung""",False,"""Zwischen Elbe und Großem Garten - Neubau 2-Zi...",False,"""Miete""",False,420,False,"""Deutschland""",2021.0,"""Erstbezug""",
4,80.0,True,"""01157""","""Wohnung""","""233519""",3,"""Sachsen""","""Dresden""","""Wohnung""",False,"""Perfekt WG geeignet",False,"""Miete""",False,695,False,"""Deutschland""",1996.0,"""Teil- oder vollsaniert""","""Zentralheizung"""
