In [939]:
# import
import pandas as pd
import numpy as np

from colorama import Fore, Style
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

# Dash framework a komponenty
import dash
from dash import html, dcc, Input, Output, State, dash_table
import dash_bootstrap_components as dbc
from dash.dash_table import DataTable
import base64
import re


In [940]:
# load cleaned data = dataset_lowercase
df = pd.read_csv('Cleaned_Book_ETL.csv', encoding='UTF-8', sep=',')


In [941]:
# Convert data types

column_types = {
    'User-ID': 'int64',
    'ISBN': 'string',
    'Book-Rating': 'int64',
    'Book-Title': 'string',
    'Book-Author': 'string',
    'Year-Of-Publication': 'int64',  # nebo 'string', pokud jsou hodnoty smíšené
    'Publisher': 'string',
    'Image-URL-S': 'string',
    'Image-URL-M': 'string',
    'Image-URL-L': 'string'
}

# Data types conversion
for column, dtype in column_types.items():
    df[column] = df[column].astype(dtype)

In [942]:
# VYNECHAT

# columns_to_drop = ['Image-URL-S', 'Image-URL-M']

# redundant columns
# df = df.drop(columns=columns_to_drop)
# df

In [943]:
# POZNÁMKY:
# Filter *: uživatelé, kteří hodnotili alespoň 2 knihy - není potřeba pro ML model důležitá


# Default Filter 0: uživatelé, kteří hodnotili alespoň 10 knih = 255000 záznamů
# Frontend Filter 1: determinative readers - uživatelé, kteří hodnotili knihu "The Fellowship of the Ring (The Lord of the Rings, Part 1)" od autora Tolkien = 144 záznamů příliš restriktivní
# 

In [944]:
# FILTER filter data before pivot - original data set je příliš velký pro pivot (40 s).

# Default Filter 0: uživatelé, kteří hodnotili alespoň 10 knih = 255000 záznamů
filter_df = df.copy()
book_counts = filter_df.groupby('User-ID')['Book-Rating'].count()
book_counts = book_counts[book_counts > 10]

df = filter_df.loc[filter_df['User-ID'].isin(book_counts.index)]


In [945]:
# VYNECHAT
# Check for duplicates - one more time
# print(df.duplicated().sum())  # Počet duplicitních řádků

In [946]:
# SET by USER - ZÁKLADNÍ LOGIKA původního PY.SOUBORU 
chosen_book = 'the fellowship  '
chosen_book = re.sub(r'\s+', ' ', chosen_book)

complete_name_of_chosen_book = df.loc[df['Book-Title'].str.contains(chosen_book, case=False, na=False, regex=False), 'Book-Title']
complete_name_of_chosen_book = complete_name_of_chosen_book.values[0]
complete_name_of_chosen_book

'the fellowship of the ring (the lord of the rings, part 1)'

In [947]:
# pivot table
book_pivot = books_of_chosen_readers.pivot_table(columns='User-ID', index='Book-Title', values='Book-Rating') # pivot_table umí agregovat a umí pracovat s NaN
book_pivot.fillna(0, inplace=True) # Memory-efficient
book_pivot

User-ID,254,1674,11676,11944,16601,16795,22818,23571,23699,23872,...,254206,258614,259057,259901,260419,265313,271176,274393,276050,276313
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"earth prayers from around the world: 365 prayers, poems, and invocations for honoring the earth",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
nonbook materials: the organization of integrated collections,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
!yo!,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'a hell of a place to lose a cow': an american hitchhiking odyssey,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'eine gute frau hat keinen kopf'. europгѓ?г‚в¤ische sprichwгѓ?г‚в¶rter гѓ?г‚вјber frauen.,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zwгѓ?г‚в¶lf.,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
гѓ?ngeles fugaces (falling angels),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
гѓ?г‚?ber das fernsehen.,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
г‚вїeres tu mi mamгѓвў?/are you my mother?,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [948]:
# sparse matrix
book_sparse = csr_matrix(book_pivot)

In [949]:
# Unsupervised learner for implementing neighbor searches
# algorithm{‘auto’, ‘ball_tree’, ‘kd_tree’, ‘brute’}, default=’auto’ will attempt to decide the most appropriate algorithm based on the values passed to fit method.
model = NearestNeighbors(algorithm='brute')
model.fit(book_sparse)

In [950]:
# model vrací dvě hodnoty:
# 1) distances: Vzdálenosti mezi danou knihou (chosen_book) a jejími nejbližšími sousedy
# 2) suggestions: Indexy knih, které jsou nejbližší sousedé k dané knize (chosen_book)

# .values.reshape(1, -1): Převede řádek na numpy pole a přetvoří jej do tvaru (1, n), kde n je počet sloupců. To je nutné, protože kneighbors očekává 2D pole.
distances, suggestions = model.kneighbors(book_pivot.loc[[complete_name_of_chosen_book]].values, n_neighbors=10) # n_neighbors=5, default
suggestions

array([[13378, 15544, 14924,  5740, 13746,  5748,  5744,  5745,  5750,
         5752]], dtype=int64)

In [951]:
# print all the suggested books
for i in range(len(suggestions)):
  print(list(book_pivot.index[suggestions[i]]))

['the fellowship of the ring (the lord of the rings, part 1)', 'the two towers (the lord of the rings, part 2)', 'the return of the king (the lord of the rings, part 3)', 'harry potter and the chamber of secrets (book 2)', 'the hobbit : the enchanting prelude to the lord of the rings', 'harry potter and the prisoner of azkaban (book 3)', 'harry potter and the goblet of fire (book 4)', 'harry potter and the order of the phoenix (book 5)', "harry potter and the sorcerer's stone (book 1)", "harry potter and the sorcerer's stone (harry potter (paperback))"]


In [952]:
list_of_suggested_books = []

# Procházení návrhů a převod indexu na seznam řetězců
for i in range(len(suggestions)):
    recommended_books = book_pivot.index[suggestions[i]].tolist()  # Převod na seznam řetězců
    for book in recommended_books:
        list_of_suggested_books.append({"Suggested Books": book})

# Vytvoření tabulky
table = dash_table.DataTable(
    data=list_of_suggested_books,
    columns=[{"name": "Suggested Books", "id": "Suggested Books"}],
    style_table={"width": "100%"},
    style_cell={"textAlign": "left"}
)

table


DataTable(data=[{'Suggested Books': 'the fellowship of the ring (the lord of the rings, part 1)'}, {'Suggested Books': 'the two towers (the lord of the rings, part 2)'}, {'Suggested Books': 'the return of the king (the lord of the rings, part 3)'}, {'Suggested Books': 'harry potter and the chamber of secrets (book 2)'}, {'Suggested Books': 'the hobbit : the enchanting prelude to the lord of the rings'}, {'Suggested Books': 'harry potter and the prisoner of azkaban (book 3)'}, {'Suggested Books': 'harry potter and the goblet of fire (book 4)'}, {'Suggested Books': 'harry potter and the order of the phoenix (book 5)'}, {'Suggested Books': "harry potter and the sorcerer's stone (book 1)"}, {'Suggested Books': "harry potter and the sorcerer's stone (harry potter (paperback))"}], columns=[{'name': 'Suggested Books', 'id': 'Suggested Books'}], style_table={'width': '100%'}, style_cell={'textAlign': 'left'})

In [953]:
# Loading a dash picture
def load_image(image_path):
    with open(image_path, "rb") as f:
        encoded_image = base64.b64encode(f.read()).decode()
    return f"data:image/jpeg;base64,{encoded_image}"

image_path = 'StuttgartSelect.jpg'

encoded_image = load_image(image_path)

In [954]:
# running Dash server

app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

# add Flask server for Gunicorn
server = app.server

In [955]:
# APP Layout 
app.layout = dbc.Container(
    
    # className="mb-4",
    children=[  
     
        # 1 - titulek
        dbc.Row(
            dbc.Col([
                html.H1(
                    "Book recomendation",
                    
                    style={"display": "flex",
                        "justify-content": "center",
                        "align-items": "center",
                        'color': 'grey'  # Barva textu
                            }
                ),
                html.Img(
                    src=encoded_image,
                    className="img-fluid",
                    style = {"objectFit": "cover"}
                )
            ]),
        

            style={
                    "display": "flex",
                    "justify-content": "center",
                    "align-items": "center",
                    "height": "100px"  # Nastavení pevné výšky
                    },
            # className="mb-4",
        ),

        # 2 -  Search pro zadání názvu chosen_book 
        dbc.Row(
            dbc.Col([
                dcc.Input(
                    id="search_box",
                    type="text",
                    placeholder="Enter your favorite book to recommendations for others you might like",
                    debounce=True,  # Callback se spustí až po potvrzení Enter
                    style={"width": "50%"}
                ),
                dcc.Checklist(
                    id="author_filter",
                    options=[{"label": "Filter by same author", "value": "filter_author"}],
                    value=[],
                    style={}
                )],

            style={
                    "display": "flex",
                    "justify-content": "center",
                    "align-items": "center",
                    "height": "100px"  # Nastavení pevné výšky
                    },
            # className="mb-4",
            )
        ),

        # 3 - Check box - autor, rok (state) kontejner pro zobrazení doporučení TOP 3 - 10 knih
        
            
        # 4 - kontejner pro zobrazení doporučení TOP 3 - 10 knih
        dbc.Row(    
            dbc.Col([
                html.Div(
                    id='output_table'
                    # výpis TOP doporučení
                ),
                html.Div(
                    id="error_output",  # ID kontejneru
                    style={
                        "color": "red",  # Červená barva pro chybové hlášky
                        "textAlign": "center",
                        "marginTop": "10px"
    }
)
    ]),
                style={"display": "flex",
                    "flex-wrap": "wrap",
                    "justify-content": "center",
                    "align-items": "center"
                }
        ), 
            
        # 5 - kontejner pro zobrazení obrázků doporučených knih 
        dbc.Row(
            dbc.Col(
                # dcc.Dropdown(
                #     id="output_table_gallery",
                #     # gallerie
                # )
                
            ),
            style={
                    "display": "flex",
                    "justify-content": "center",
                    "align-items": "center",
                    "height": "100px"  # Nastavení pevné výšky
            }
        )
    ]
)

In [956]:
# Enter number of neighbors to ML
number_of_neighbors = 1 # the 6 - request = 5 result titles

# table for book or error content
def create_table(table_data, color_style, columns_style):
    return html.Div(dash_table.DataTable(
            data=table_data,
            columns=columns_style,
            style_table={"width": "50%"},
            style_cell={"textAlign": "left"},
            style_header={
                "textAlign": "center"
            }
                    ), 
        style={
            "display": "flex",
            "justify-content": "center", # jinak se nenačte stylování původního rozvrhu, které se hodí doleva
            "align-items": "center",
            **color_style
        }
            )

# ----------------- Callback 1 recomendation -----------------
@app.callback(
    Output("output_table", "children"),  # output table
 
    [Input("search_box", "value"),  # Input book name
     State("author_filter", "value")]  # check box
)

def update_recommendations(chosen_book, filter_author):
    if not chosen_book:
        
        return None # 1. error message

    # Odstranění mezer ze vstupního názvu knihy
    chosen_book = re.sub(r'\s+', ' ', chosen_book)

    # ZÁKLADNÍ LOGIKA původního PY.SOUBORU 
    complete_name_of_the_book = df.loc[df['Book-Title'].str.contains(chosen_book, case=False, na=False, regex=False), 'Book-Title']
        
    # Ověření, že Series není prázdná
    if complete_name_of_the_book.empty:
        table_data = [{"Suggested Books:":"Book name is not in database."}]
        color_style= {"color": "red"}
        columns_style = [{"name": "Suggested Books:", "id": "Suggested Books:"}]
        return create_table(table_data, color_style, columns_style) # 2. error message
    
    complete_name_of_the_book = complete_name_of_the_book.values[0] # Series
       
    # Podmínka filtrování podle autora - checkbox
    if filter_author:
        # Najdeme autora vybrané knihy
        chosen_book_author = df.loc[df['Book-Title'] == complete_name_of_the_book, 'Book-Author'].values[0]
        books_of_chosen_readers = df.loc[df['Book-Author'] == chosen_book_author] # vracíme celý frame
    # Podmínka filtrování bez autora knihy, na základě všech titulů čtenářů i jiných knih než knih autora zadaného názvu knihy
    else:
        readers_of_the_book = df.loc[df['Book-Title'] == complete_name_of_the_book, 'User-ID']
        books_of_chosen_readers = df.loc[df['User-ID'].isin(readers_of_the_book)] # vracíme celý frame

    # # Kontrola, zda máme data pro další zpracování
    # if books_of_chosen_readers.empty:
    #     table_data = [{"Suggested Books:":"No more Book Titles from this author available"}]
    #     color_style= {"color": "red"}
    #     columns_style = [{"name": "Suggested Books:", "id": "Suggested Books:"}]
    #     return create_table(table_data, color_style, columns_style) # 3. error message

    # Pivotní tabulka
    book_pivot = books_of_chosen_readers.pivot_table(columns='User-ID', index='Book-Title', values='Book-Rating') # pivot_table umí agregovat a umí pracovat s NaN
    book_pivot.fillna(0, inplace=True) # Memory-efficient

    if book_pivot.empty or len(book_pivot) < number_of_neighbors:  # Pokud je méně než 'number_of_neighbors' knih v pivotní tabulce
        table_data = [{"Suggested Books:":"Not enough data for model to suggest a book"}]
        color_style= {"color": "red"}
        columns_style = [{"name": "Suggested Books:", "id": "Suggested Books:"}]
        return create_table(table_data, color_style, columns_style) # 3. error message

    # Sparse matrix
    book_sparse = csr_matrix(book_pivot)

    # Unsupervised learner for implementing neighbor searches
    model = NearestNeighbors(algorithm='brute')
    model.fit(book_sparse)

    # model vrací dvě hodnoty:
    distances, suggestions = model.kneighbors(book_pivot.loc[[complete_name_of_the_book]].values, n_neighbors=number_of_neighbors) # n_neighbors=5, default

    # Sestavení seznamu doporučených knih
    list_of_suggested_books = []
    for i in range(len(suggestions)):
        recommended_books = book_pivot.index[suggestions[i]].tolist()  # Převod na seznam řetězců
        for book in recommended_books:
            list_of_suggested_books.append({"Suggested Books:": book})

    # Odstranění zadané knihy ze seznamu doporučení
    list_of_suggested_books = [book for book in list_of_suggested_books if book["Suggested Books:"] != complete_name_of_the_book] #Tento přístup projde každý slovník v seznamu list_of_suggested_books a ponechá jen ty, které nemají stejnou hodnotu
    
    # když je v seznamu kniha autora pouze jedna 
    if len(list_of_suggested_books) < 1:
        table_data = [{"Suggested Books:":f"Only avalaible result is the {complete_name_of_the_book}"}]
        color_style= {"color": "red"}
        columns_style = [{"name": "Suggested Books:", "id": "Suggested Books:"}]
        return create_table(table_data, color_style, columns_style) # 4. error message
    
    # Vytvoření tabulky
    table_data = list_of_suggested_books
    color_style = {"color": "black"}
    columns_style = [{"name": "Suggested Books:", "id": "Suggested Books:"}]
    return create_table(table_data, color_style, columns_style)



# ----------------- Callback 2 gallery ----------------- 


if __name__ == "__main__":
    app.run_server(host="127.0.0.1", port=8060, debug=True)