In [20]:
import pymongo
import pandas as pd
import numpy as np
from pandas import DataFrame
from copy import deepcopy
from pymongo import MongoClient
from ipyleaflet import Map, basemaps, basemap_to_tiles, Marker, Popup
from ipywidgets import HTML, Layout

In [2]:
CONNECTION_STRING = "mongodb://testuser:testpwd@127.0.0.1:27017/testDB"

In [3]:
client = MongoClient(CONNECTION_STRING)

In [4]:
collection = client['testDB']["cian_offers"]

For offer id, find realtor status, realtor id, realtor phone number

In [5]:
offer_id = 264401712

In [6]:
def get_nested(data: dict, keys: str, delimiter: str = "."):
    result = deepcopy(data)
    for key in keys.split(delimiter):
        if result:
            result = result.get(key)
        else:
            return None
    return result

def get_user_info(offer_id: int) -> dict:
    list_of_userIDs = []
    list_of_phone_nr = []
    fields_of_interest = {'userId': True, 'moderationInfo': True, 'basicProfiScore': True, 'isByCommercialOwner': True, 'phones': True, 'isByHomeowner': True, 'cianUserId': True, 'fromDeveloper': True, 'publishedUserId': True, 'user': True, 'isPro': True}
    cursor = collection.find_one({'_id' : offer_id}, fields_of_interest)
    list_of_userIDs.append(cursor.get('userId'))
    list_of_userIDs.append(cursor.get('cianUserId'))
    list_of_userIDs.append(cursor.get('publishedUserId'))
    
    list_of_userIDs.append(get_nested(cursor, 'user.cianUserId'))
    list_of_userIDs.append(get_nested(cursor, 'user.userId'))
    list_of_userIDs.append(get_nested(cursor, 'user.agentAvailability.userId'))
    list_of_userIDs.append(get_nested(cursor, 'user.masterAgent.id'))
    
    list_of_phone_nr.append(cursor.get('phones'))
    list_of_phone_nr.append(cursor.get('user', {}).get('phoneNumbers'))    
    return {'list_of_userIDs': list(set([int(i) for i in list(filter(None, list_of_userIDs))])), 'list_of_phone_nr': list_of_phone_nr}

def get_offers_id_containing_user_id(user_id: int) -> list:
    cursor = collection.find({
        "$or": [{"userId": user_id}, 
                {"cianUserId": user_id}, 
                {"publishedUserId": user_id}, 
                {"user.cianUserId": user_id}, 
                {"user.userId": user_id}, 
                {"user.agentAvailability.userId": user_id}, 
                {"user.masterAgent.id": user_id},
                {"userId": str(user_id)}, 
                {"cianUserId": str(user_id)}, 
                {"publishedUserId": str(user_id)}, 
                {"user.cianUserId": str(user_id)}, 
                {"user.userId": str(user_id)}, 
                {"user.agentAvailability.userId": str(user_id)}, 
                {"user.masterAgent.id": str(user_id)}]
            }, {'_id': True})
    return [i.get('_id') for i in list(cursor)]

def get_offers_id_containing_user_ids(user_ids: list) -> list:
    offer_ids = []
    for ids in user_ids:
        offer_ids += get_offers_id_containing_user_id(ids)
    return list(set(offer_ids))

def get_offers_id_containing_phone_number(number: int) -> list:
        cursor = collection.find({
        "$or": [{"phones.number": number}, 
                {"phones.number": str(number)}]
            }, {'_id': True})
        return [i.get('_id') for i in list(cursor)]

def get_offers_id_containing_phone_numbers(numbers: list) -> list:
    offer_ids = []
    for number in numbers:
        offer_ids += get_offers_id_containing_phone_number(number)
    return list(set(offer_ids))

def get_offers_id_from_same_user(offer_id: int) -> list:
    results = get_user_info(offer_id)
    user_id_based = get_offers_id_containing_user_ids(results['list_of_userIDs'])
    phone_nr_based = get_offers_id_containing_phone_numbers([int(i[0].get('number')) for i in list(filter(None, results['list_of_phone_nr']))])
    return list(set(user_id_based+ phone_nr_based))

In [7]:
get_offers_id_from_same_user(265608306)

[265608306]

In [8]:
# lets find all offers that do not have any other offers by the same author

In [9]:
cursor = collection.find({} , {'_id': True, 'userId': True})

In [10]:
result = {}
for offer in cursor:
    result[offer.get('_id')] = len(get_offers_id_from_same_user(offer.get('_id')))

In [11]:
d = DataFrame.from_dict(result, orient='index')
d = d.rename(columns={0: "count"})

In [12]:
d = d[d['count'] == 1].reset_index().rename(columns={'index': 'offer_id'})

In [13]:
ids_of_interest = list(d['offer_id'])
len(ids_of_interest)

104

In [14]:
center=(43.568232, 39.734636)

In [15]:
m = Map(
    basemap=basemaps.OpenTopoMap,
    center=center,
    tap=False,
    zoom=17,
    layout=Layout(width='100%', height='800px')
)

In [16]:
cursor = collection.find({})

In [17]:
cursor = collection.find({"_id" : {"$in": ids_of_interest}, 'bargainTerms.priceRur': {'$lt': 4000000}})

In [18]:
for offer in cursor:
    price = offer['bargainTerms']['priceRur']
    price_string = f'{price:,}'
    marker = Marker(location=(offer['geo']['coordinates']['lat'], offer['geo']['coordinates']['lng']), draggable=False)
    marker.popup = HTML(value='%s RUB<br><a target="_blank" rel="noopener noreferrer" href="%s">Link</a>'%(price_string, offer['fullUrl']))
    m.add_layer(marker)

In [19]:
display(m)

Map(center=[43.568232, 39.734636], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title',…