In [None]:
import pandas as pd
import numpy as np

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
def string_to_set(string):
    res = set(string.replace('{', '').replace('}', '').replace('\\', '').replace("'", '').replace('"', '').strip().split(','))
    clear_res = set()
    for elem in res:
        clear_res.add(elem.strip())
    return clear_res


books = pd.read_csv('drive/MyDrive/data/books_with_lemmas.csv', index_col=0)
books['lemmas_inter'] = books['lemmas_inter'].apply(lambda x: string_to_set(x))
books['id'] = books['id'].astype(int)

films = pd.read_csv('drive/MyDrive/data/films_with_lemmas.csv', index_col=0)
films['lemmas_inter'] = films['lemmas_inter'].apply(lambda x: string_to_set(x))
films['id'] = films['id'].astype(int)

In [None]:
class BooksToFilmsMetrics:

    def accuracy(self, books: pd.DataFrame, films: pd.DataFrame) -> pd.DataFrame:

        books['key'] = 0
        films['key'] = 0

        df = books.merge(films, on='key', how='outer')
        df = df[['id_x', 'id_y', 'lemmas_inter_x', 'lemmas_inter_y']]
        df.columns = ['book_id', 'film_id', 'book_lemmas', 'film_lemmas']

        df['metric'] = df.apply(lambda row: len(row['film_lemmas'].intersection(row['book_lemmas'])) / len(row['book_lemmas']), axis=1)
        return df[['book_id', 'film_id', 'metric']]

    def weighted_accuracy(self, books: pd.DataFrame, films: pd.DataFrame, book_weight: float = 1, film_weight: float = 2.5) -> list:
      
        books['key'] = 0
        films['key'] = 0

        df = books.merge(films, on='key', how='outer')
        df = df[['id_x', 'id_y', 'lemmas_inter_x', 'lemmas_inter_y']]
        df.columns = ['book_id', 'film_id', 'book_lemmas', 'film_lemmas']

        df['metric'] = df.apply(lambda row: len(row['film_lemmas'].intersection(row['book_lemmas'])) /\
                                (book_weight*len(row['book_lemmas']) + film_weight*len(row['film_lemmas'])), axis=1)
        return df[['book_id', 'film_id', 'metric']]
    
    def absolute_accuracy(self, books: pd.DataFrame, films: pd.DataFrame, l1: float = 1.0) -> list:
        books['key'] = 0
        films['key'] = 0

        df = books.merge(films, on='key', how='outer')
        df = df[['id_x', 'id_y', 'lemmas_inter_x', 'lemmas_inter_y']]
        df.columns = ['book_id', 'film_id', 'book_lemmas', 'film_lemmas']

        df['metric'] = df.apply(lambda row: (len(row['film_lemmas'].intersection(row['book_lemmas'])) - l1*len(row['book_lemmas'])) / len(row['book_lemmas']), axis=1)
        return df[['book_id', 'film_id', 'metric']]


In [None]:
BooksToFilmsMetrics().weighted_accuracy(
                        books[['id', 'lemmas_inter']].head(1),
                        films[['id', 'lemmas_inter']]
                          )

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,book_id,film_id,metric
0,0,0,0.00000
1,0,1,0.00000
2,0,2,0.00000
3,0,3,0.00000
4,0,4,0.04878
...,...,...,...
23309,0,23309,0.00000
23310,0,23310,0.00000
23311,0,23311,0.00000
23312,0,23312,0.00000


In [None]:
class Preprocessor(BooksToFilmsMetrics):

    def create_features(self, books: pd.DataFrame, films: pd.DataFrame) -> pd.DataFrame:
        df = self.accuracy(
                          books[['id', 'lemmas_inter']],
                          films[['id', 'lemmas_inter']]
                      )
        df.columns = ['book_id', 'film_id', 'accuracy']

        for i, j in ([1, 0], [1, 2], [1, 3],
                     [0, 1], [2, 1], [3,1],
                     [1, 1]):
            df[f'waccuracy_{i}_{j}'] = self.weighted_accuracy(
                              books[['id', 'lemmas_inter']],
                              films[['id', 'lemmas_inter']],
                              i, j
                          )['metric']
          
        for i in range(3):
            df[f'absaccuracy_{i}'] = self.absolute_accuracy(
                      books[['id', 'lemmas_inter']],
                      films[['id', 'lemmas_inter']],
                      i
                  )['metric']
        
        return df

In [None]:
Preprocessor().create_features(books.head(1), films)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/st

Unnamed: 0,book_id,film_id,accuracy,waccuracy_1_0,waccuracy_1_2,waccuracy_1_3,waccuracy_0_1,waccuracy_2_1,waccuracy_3_1,waccuracy_1_1,absaccuracy_0,absaccuracy_1,absaccuracy_2
0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0000,0.0,0.000000,-1.000000,-2.000000
1,0,1,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0000,0.0,0.000000,-1.000000,-2.000000
2,0,2,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0000,0.0,0.000000,-1.000000,-2.000000
3,0,3,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0000,0.0,0.000000,-1.000000,-2.000000
4,0,4,0.333333,0.333333,0.058824,0.041667,0.142857,0.076923,0.0625,0.1,0.333333,-0.666667,-1.666667
...,...,...,...,...,...,...,...,...,...,...,...,...,...
23309,0,23309,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0000,0.0,0.000000,-1.000000,-2.000000
23310,0,23310,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0000,0.0,0.000000,-1.000000,-2.000000
23311,0,23311,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0000,0.0,0.000000,-1.000000,-2.000000
23312,0,23312,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0000,0.0,0.000000,-1.000000,-2.000000


In [None]:
adaps = pd.read_csv('drive/MyDrive/data/data-other/film_adaptations.csv')
adaps = adaps[['id_book', 'id_film']]
adaps['id_book'] = adaps['id_book'].astype(int)
adaps['id_film'] = adaps['id_film'].astype(int)

In [None]:
adaps = adaps.merge(books, left_on=['id_book'], right_on=['id'])
adaps = adaps[['id_book', 'id_film', 'lemmas_inter']]
adaps = adaps.merge(films, left_on=['id_film'], right_on=['id'])
adaps = adaps[['id_book', 'id_film', 'lemmas_inter_x', 'lemmas_inter_y']]

In [None]:
adaps

Unnamed: 0,id_book,id_film,lemmas_inter_x,lemmas_inter_y
0,7123,1225,"{роман, английский, фантастика, антиутопия, са...","{драма, фантастика, зарубежный}"
1,44135,1225,"{социальный, литература, фантастика}","{драма, фантастика, зарубежный}"
2,44135,1225,"{социальный, литература, фантастика}","{драма, фантастика, зарубежный}"
3,38187,1225,"{литература, английский, классика, зарубежный}","{драма, фантастика, зарубежный}"
4,38187,1225,"{литература, английский, классика, зарубежный}","{драма, фантастика, зарубежный}"
...,...,...,...,...
4207,6276,15212,"{сказка, детский, приключение, зарубежный}","{сказка, мюзикл, фэнтези, русский, мосфильм, с..."
4208,34878,15212,"{сказка, детский, произведение, повесть, литер...","{сказка, мюзикл, фэнтези, русский, мосфильм, с..."
4209,34878,15212,"{сказка, детский, произведение, повесть, литер...","{сказка, мюзикл, фэнтези, русский, мосфильм, с..."
4210,7931,15212,"{сказка, детский, ребёнок, зарубежный}","{сказка, мюзикл, фэнтези, русский, мосфильм, с..."


In [None]:
df = pd.DataFrame()
prp = Preprocessor()
for row in adaps.iterrows():
    row = row[1]
    book_id = row['id_book']
    film_id = row['id_film']
    d = prp.create_features(books[books['id'] == book_id], films[films['id'] == film_id])
    d['len_book_lemmas'] = len(row['lemmas_inter_x'])
    d['len_films_lemmas'] = len(row['lemmas_inter_y'])
    d['len_intersection'] = len(row['lemmas_inter_x'].intersection(row['lemmas_inter_y']))
    df = df.append(d)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/st

In [104]:
df['target'] = 1
df.loc[df.len_intersection == 0, 'target'] = 0.0

In [None]:
df.to_csv('drive/MyDrive/data/lemmas_data_adaps.csv')

In [None]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn import metrics

import matplotlib.pyplot as plt

In [None]:
df.columns

Index(['book_id', 'film_id', 'accuracy', 'waccuracy_1_0', 'waccuracy_1_2',
       'waccuracy_1_3', 'waccuracy_0_1', 'waccuracy_2_1', 'waccuracy_3_1',
       'waccuracy_1_1', 'absaccuracy_0', 'absaccuracy_1', 'absaccuracy_2',
       'len_book_lemmas', 'len_films_lemmas', 'len_intersection', 'target'],
      dtype='object')

In [111]:
X = df[['accuracy', 'waccuracy_1_0', 'waccuracy_1_2',
           'waccuracy_1_3', 'waccuracy_0_1', 'waccuracy_2_1', 'waccuracy_3_1',
           'waccuracy_1_1', 'absaccuracy_0', 'absaccuracy_1', 'absaccuracy_2',
           'len_book_lemmas', 'len_films_lemmas', 'len_intersection']]
y = df['target']

In [112]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

In [168]:
X_train

Unnamed: 0,accuracy,waccuracy_1_0,waccuracy_1_2,waccuracy_1_3,waccuracy_0_1,waccuracy_2_1,waccuracy_3_1,waccuracy_1_1,absaccuracy_0,absaccuracy_1,absaccuracy_2,len_book_lemmas,len_films_lemmas,len_intersection
0,0.250000,0.250000,0.083333,0.062500,0.250000,0.083333,0.062500,0.125000,0.250000,-0.750000,-1.750000,8,8,2
0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-1.000000,-2.000000,4,8,0
0,0.333333,0.333333,0.032258,0.022222,0.071429,0.050000,0.043478,0.058824,0.333333,-0.666667,-1.666667,3,14,1
0,0.400000,0.400000,0.074074,0.052632,0.181818,0.095238,0.076923,0.125000,0.400000,-0.600000,-1.600000,5,11,2
0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-1.000000,-2.000000,4,3,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-1.000000,-2.000000,1,5,0
0,0.600000,0.600000,0.176471,0.130435,0.500000,0.187500,0.142857,0.272727,0.600000,-0.400000,-1.400000,5,6,3
0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-1.000000,-2.000000,12,2,0
0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-1.000000,-2.000000,2,4,0


In [113]:
def plot_roc(y, y_pred_proba):
    fpr, tpr, _ = metrics.roc_curve(y,  y_pred_proba)
    auc = metrics.roc_auc_score(y, y_pred_proba)
    plt.plot(fpr,tpr,label="data 1, auc="+str(auc))
    plt.legend(loc=4)
    plt.show()


In [184]:
import numpy as np
import warnings
warnings.filterwarnings("ignore")

parameters = {
    "loss": ["squared_error", "huber"],
    "learning_rate": np.linspace(0.01,0.5,10),
    "max_depth":[3,10,20,40,50],
    "criterion": ["friedman_mse",  "mse"],
    "n_estimators":[1,3,5,8]
}

model = GradientBoostingRegressor()
model = GridSearchCV(model,                    
                   param_grid=parameters,
                   scoring='f2_score',
                   cv=5, verbose=4)

In [188]:
parameters = {
    'C'       : np.linspace(0.001,10,20),
    'solver'  : ['newton-cg', 'lbfgs', 'liblinear'],
}

model = LogisticRegression()
model = GridSearchCV(model,                    
                   param_grid=parameters,
                   scoring='roc_auc',
                   cv=10, verbose=4)

In [189]:
model = model.fit(X_train, y_train)

Fitting 10 folds for each of 60 candidates, totalling 600 fits
[CV 1/10] END ........C=0.001, solver=newton-cg;, score=0.955 total time=   0.0s
[CV 2/10] END ........C=0.001, solver=newton-cg;, score=0.966 total time=   0.0s
[CV 3/10] END ........C=0.001, solver=newton-cg;, score=0.975 total time=   0.0s
[CV 4/10] END ........C=0.001, solver=newton-cg;, score=0.969 total time=   0.0s
[CV 5/10] END ........C=0.001, solver=newton-cg;, score=0.950 total time=   0.0s
[CV 6/10] END ........C=0.001, solver=newton-cg;, score=0.975 total time=   0.0s
[CV 7/10] END ........C=0.001, solver=newton-cg;, score=0.974 total time=   0.0s
[CV 8/10] END ........C=0.001, solver=newton-cg;, score=0.967 total time=   0.0s
[CV 9/10] END ........C=0.001, solver=newton-cg;, score=0.974 total time=   0.0s
[CV 10/10] END .......C=0.001, solver=newton-cg;, score=0.968 total time=   0.0s
[CV 1/10] END ............C=0.001, solver=lbfgs;, score=0.955 total time=   0.0s
[CV 2/10] END ............C=0.001, solver=lbfg

In [180]:
be = model.best_estimator_
y = be.predict(X_test)

In [190]:
model.best_estimator_.coef_

array([[1.43098924, 1.43098924, 0.43508812, 0.33342987, 1.53137869,
        0.42720646, 0.32430696, 0.63848259, 1.43098924, 1.43098922,
        1.43098921, 0.21072909, 0.14302178, 6.02159446]])

In [192]:
X_train

Unnamed: 0,accuracy,waccuracy_1_0,waccuracy_1_2,waccuracy_1_3,waccuracy_0_1,waccuracy_2_1,waccuracy_3_1,waccuracy_1_1,absaccuracy_0,absaccuracy_1,absaccuracy_2,len_book_lemmas,len_films_lemmas,len_intersection
0,0.250000,0.250000,0.083333,0.062500,0.250000,0.083333,0.062500,0.125000,0.250000,-0.750000,-1.750000,8,8,2
0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-1.000000,-2.000000,4,8,0
0,0.333333,0.333333,0.032258,0.022222,0.071429,0.050000,0.043478,0.058824,0.333333,-0.666667,-1.666667,3,14,1
0,0.400000,0.400000,0.074074,0.052632,0.181818,0.095238,0.076923,0.125000,0.400000,-0.600000,-1.600000,5,11,2
0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-1.000000,-2.000000,4,3,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-1.000000,-2.000000,1,5,0
0,0.600000,0.600000,0.176471,0.130435,0.500000,0.187500,0.142857,0.272727,0.600000,-0.400000,-1.400000,5,6,3
0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-1.000000,-2.000000,12,2,0
0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-1.000000,-2.000000,2,4,0


In [179]:
model.best_estimator_

GradientBoostingRegressor(learning_rate=0.01, n_estimators=1)

In [129]:
roc_auc_score(y_true=y_test, y_score=y[0:, 1])

1.0

In [124]:
y_test

0    0
0    1
0    1
0    0
0    1
    ..
0    1
0    0
0    0
0    1
0    1
Name: target, Length: 1390, dtype: int64

In [127]:
y[0:, 1]

array([0.00814504, 0.9605348 , 0.96567111, ..., 0.01331552, 1.        ,
       0.98159917])

In [141]:
d = prp.create_features(books[books['id'] == 25845], films)

In [150]:
# 'len_book_lemmas', 'len_films_lemmas', 'len_intersection'
d['len_book_lemmas'] = 8
d['len_films_lemmas'] = d.film_id.apply(lambda x: len(films[films['id'] == x]['lemmas_inter'].item()))
d['len_intersection'] = d.film_id.apply(lambda x: len(films[films['id'] == x]['lemmas_inter'].item().intersection(lems.item())))

In [154]:
d['rec'] = be.predict_proba(d[['accuracy', 'waccuracy_1_0', 'waccuracy_1_2',
       'waccuracy_1_3', 'waccuracy_0_1', 'waccuracy_2_1', 'waccuracy_3_1',
       'waccuracy_1_1', 'absaccuracy_0', 'absaccuracy_1', 'absaccuracy_2',
       'len_book_lemmas', 'len_films_lemmas', 'len_intersection']])[0:, 1]

In [171]:
d.sort_values('rec').tail(5)

Unnamed: 0,book_id,film_id,accuracy,waccuracy_1_0,waccuracy_1_2,waccuracy_1_3,waccuracy_0_1,waccuracy_2_1,waccuracy_3_1,waccuracy_1_1,absaccuracy_0,absaccuracy_1,absaccuracy_2,len_book_lemmas,len_films_lemmas,len_intersection,rec
11230,25845,11230,0.625,0.625,0.227273,0.172414,0.714286,0.217391,0.16129,0.333333,0.625,-0.375,-1.375,8,7,5,1.0
21165,25845,21165,0.625,0.625,0.166667,0.121951,0.454545,0.185185,0.142857,0.263158,0.625,-0.375,-1.375,8,11,5,1.0
571,25845,571,0.625,0.625,0.147059,0.106383,0.384615,0.172414,0.135135,0.238095,0.625,-0.375,-1.375,8,13,5,1.0
15239,25845,15239,0.75,0.75,0.272727,0.206897,0.857143,0.26087,0.193548,0.4,0.75,-0.25,-1.25,8,7,6,1.0
23149,25845,23149,0.75,0.75,0.272727,0.206897,0.857143,0.26087,0.193548,0.4,0.75,-0.25,-1.25,8,7,6,1.0


In [172]:
films.iloc[11230]

id                                                          11230
title                                              Детство Никиты
genres                                                      драмы
year                                                       1992.0
description     Детство Никиты проходит в поместье отца в Сама...
content_type                                                 film
popularity                                                0.24851
lemmas          {'', 'николаевич', 'повесть', 'произведение', ...
lemmas_inter    {автобиографический, русский, драма, повесть, ...
Name: 11230, dtype: object

In [167]:
import pickle

with open('drive/MyDrive/model.pickle', 'wb') as file:
    pickle.dump(be, file)

In [170]:
books

Unnamed: 0,id,title,authors,genres,annotation,popularity,lemmas,lemmas_inter
0,0,Как выдать ведьму замуж,Надежда Соколова,"Любовное фэнтези,Магические академии,Юмористич...","Отредактированная версия пятитомника ""Ведьма и...",0.652522,"{'фэнтези', 'академия', 'юмористический', 'маг...","{любовный, фэнтези, юмористический}"
1,1,На острие удара,Михаил Михеев,"Боевая фантастика,Героическая фантастика",В современном мире грань между войной и миром ...,0.607063,"{'героический', 'боевой', 'фантастика'}","{боевой, фантастика}"
2,2,Другой мир. Кровавое дерево. Книга 5,Джордж Гранд,"Боевое фэнтези,Героическое фэнтези,Историческо...",Телохранитель Игоря остаётся в плену. Движение...,0.490108,"{'приключение', 'фэнтези', 'героический', 'ист...","{исторический, боевой, приключение, фэнтези}"
3,3,Магазин с привидением,Мартин Видмарк,"Детские приключения,Зарубежные детские книги,К...",Что привело агента Летучей мыши ночью в продук...,0.457287,"{'приключение', 'вампир', 'детский', 'зарубежн...","{детский, приключение, вампир, зарубежный}"
4,4,Один дома 6,Виктория Медведева,Космическая фантастика,Атмосфера Земли достигает апогея загрязнения. ...,0.525285,"{'приостановить', 'disney+', 'семейный', 'год'...","{оригинальный, сша, английский, перезапуск, па..."
...,...,...,...,...,...,...,...,...
59017,59017,Необыкновенная история,Александр Алфёров,"Книги для детей,Книги для дошкольников,Сказки","Доброта… вот чего не хватает этому миру, чтобы...",0.537138,"{'дошкольник', 'книга', 'ребёнок', 'сказка'}","{сказка, ребёнок}"
59018,59018,Ллойс,Дмитрий Панасенко,Боевая фантастика,Что может объединять наркоманку наемницу – быв...,0.457287,"{'боевой', 'фантастика'}","{боевой, фантастика}"
59019,59019,Иуда Искариот,Леонид Андреев,"Литература 20 века,Русская классика,Список шко...","«Иисуса Христа много раз предупреждали, что Иу...",0.630932,"{'классика', 'повесть', 'школьный', 'леонид', ...","{русский, повесть, литература, классика}"
59020,59020,Любишь ли ты меня?,Валентина Рыжкова,"Книги для подростков,Современная русская литер...","Марина старается быть такой, как все, ничем не...",0.457287,"{'подросток', 'литература', 'современный', 'кн...","{русский, литература}"


In [197]:
books[books['id'] == 0]

Unnamed: 0,id,title,authors,genres,annotation,popularity,lemmas,lemmas_inter
0,0,Как выдать ведьму замуж,Надежда Соколова,"Любовное фэнтези,Магические академии,Юмористич...","Отредактированная версия пятитомника ""Ведьма и...",0.652522,"{'фэнтези', 'академия', 'юмористический', 'маг...","{любовный, фэнтези, юмористический}"
