# Модель предсказания ключевых фраз

In [1]:
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)

import joblib

import warnings
warnings.filterwarnings('ignore')


In [2]:
%run ThePropertyPhrases.py
ThePropertyPhrasesGenerator

__main__.ThePropertyPhrasesGenerator

### Исходные данные

In [3]:
train = pd.read_csv('train875.csv')
train.head(2)

Unnamed: 0.1,Unnamed: 0,id,name,summary,space,description,experiences_offered,neighborhood_overview,notes,transit,access,interaction,house_rules,host_id,host_since,host_about,host_response_time,host_response_rate,host_is_superhost,host_has_profile_pic,host_identity_verified,neighbourhood_cleansed,zipcode,latitude,longitude,is_location_exact,property_type,room_type,accommodates,bathrooms,bedrooms,beds,bed_type,amenities,square_feet,security_deposit,cleaning_fee,guests_included,extra_people,minimum_nights,cancellation_policy,require_guest_profile_picture,require_guest_phone_verification,price
0,0,24671810,Lovely Room with Private bathroom in Tower Hill,Fantastic flat,,Fantastic flat Living room Kitchen Bedroom ...,none,,I live there with my husband and my lovely cat...,,Living room Kitchen Bedroom Bathroom,,Visitors no,13627321,2014-03-28,You are very welcome to come and stay in my pl...,within an hour,97%,f,t,f,Tower Hamlets,E1 0EW,51.509676,-0.059991,t,Apartment,Private room,2,1.0,1.0,1.0,Real Bed,"{TV,Internet,Wifi,Kitchen,Elevator,Heating,""Sm...",,100.0,25.0,2,0.0,2,strict_14_with_grace_period,f,f,57.0
1,1,15726550,1 bedroom flat in South Kensington/Chelsea,The flat has a perfect location near by South ...,"Flat contains hallway, spacious bedroom, bathr...",The flat has a perfect location near by South ...,none,,,,,,,20475917,2014-08-24,,within an hour,100%,f,t,f,Kensington and Chelsea,SW3 6QJ,51.49295,-0.171463,f,Apartment,Entire home/apt,2,1.0,1.0,1.0,Real Bed,"{TV,Wifi,Kitchen,Heating,""Family/kid friendly""...",,,10.0,1,0.0,4,moderate,f,f,104.0


In [4]:
reviews = pd.read_csv('reviews875.csv')
reviews.head(2)

Unnamed: 0,id,listing_id,comments
0,1075818,24671810,paola was super nice the check in was easy and...
1,735392,15726550,mimosa s flat was exactly as described and pic...


### Вспомогательные функции

In [5]:
def get_from_train_by_index(i):
    return train[train.index == i].to_dict(orient='records')[0]

def get_from_train_by_id(idx):
    return train[train.id == idx].to_dict(orient='records')[0]

def get_reviews_by_index(i):
    idx = train[train.index == i].id.values[0]
    return reviews.loc[reviews.listing_id == idx, :]

def get_reviews_by_id(idx):
    return reviews.loc[reviews.listing_id == idx, :]

### Тест модели

In [6]:
phrases_generator = ThePropertyPhrasesGenerator()

In [7]:
for rec_index in [10, 16, 18]:

    d = get_from_train_by_index(rec_index)
    phrases = phrases_generator.generate_key_phrases(d)
    phrases = phrases.reset_index()
    if 'index' in phrases.columns:
        phrases = phrases.drop(columns=['index'], axis=1)

    comments = get_reviews_by_index(rec_index)
    comments = comments.reset_index()
    if 'index' in phrases.columns:
        comments = comments.drop(columns=['index'], axis=1)

    columns = list(phrases.columns) + list(comments.columns)

    df = pd.concat([phrases, comments], axis=1, ignore_index=True) \
        .rename(columns=dict(zip(range(len(columns)), columns))) \
        .fillna('')
    
    display(df)

Unnamed: 0,topic,phrases,freq,listing_count,rented_mean,index,id,listing_id,comments
0,5,great location just,5449,22273,223.181071,10.0,725624.0,15493500.0,great location not far from paddington station...
1,5,perfect location explore,2686,22273,223.181071,,,,
2,21,place stay nice,2591,18468,220.355426,,,,
3,6,clean exactly described,2549,17618,220.141049,,,,
4,25,comfortable bed good,2521,14708,217.252448,,,,
5,21,place stay thanks,2266,18468,220.355426,,,,
6,3,really great host,2229,18263,220.65849,,,,
7,29,es muy modo,2219,11678,215.306474,,,,
8,31,lovely flat great,2174,13831,218.245752,,,,
9,3,host excellent communication,2057,18263,220.65849,,,,


Unnamed: 0,topic,phrases,freq,listing_count,rented_mean,index,id,listing_id,comments
0,5,great location just,5449,22273,223.181071,16.0,803622.0,17291600.0,what a great place in a really nice and safe a...
1,5,perfect location explore,2686,22273,223.181071,,,,
2,6,clean exactly described,2549,17618,220.141049,,,,
3,1,highly recommend lovely,2045,14777,218.761115,,,,
4,1,helpful highly recommend,1766,14777,218.761115,,,,
5,16,bien situ proximit,1735,17280,220.723785,,,,
6,16,tr actif et,1649,17280,220.723785,,,,
7,1,stay highly recommend,1588,14777,218.761115,,,,
8,5,lovely great location,1416,22273,223.181071,,,,
9,5,location super close,1397,22273,223.181071,,,,


Unnamed: 0,topic,phrases,freq,listing_count,rented_mean,index,id,listing_id,comments
0,6,clean exactly described,2549,17618,220.141049,18.0,732206.0,15627000.0,sam was very discreet which enabled us to have...
1,6,clean kitchen equipped,1304,17618,220.141049,,,,
2,6,room clean cozy,908,17618,220.141049,,,,
3,28,underground station close,809,16478,219.477667,,,,
4,6,bedroom bathroom clean,749,17618,220.141049,,,,
5,6,room really clean,704,17618,220.141049,,,,
6,28,location right tube,667,16478,219.477667,,,,
7,6,room exactly described,626,17618,220.141049,,,,
8,6,room clean spacious,616,17618,220.141049,,,,
9,6,apartment clean cozy,530,17618,220.141049,,,,
