## Imports

In [3]:
import pandas as pd
import gradio as gr
import nltk
import numpy as np
from search import search_with_query

## Files load

In [4]:
df_descripteur_porter = pd.read_csv('freq_poids_porter.csv')
df_descripteur_lancaster = pd.read_csv('freq_poids_lancaster.csv')

In [5]:
df_freqs_poids_porter = pd.read_csv('freq_poids_porter.csv')
df_freqs_poids_lancaster = pd.read_csv('freq_poids_lancaster.csv')

In [6]:
df_inverse = pd.read_csv('df_poids.csv').drop('Unnamed: 0', axis=1)

In [None]:
X_train = pd.read_csv('X_train.csv')
y_train = pd.read_csv('y_train.csv')
X_test = pd.read_csv('X_test.csv')
y_test = pd.read_csv('y_test.csv')

df_pca_dbscan = pd.read_csv('df_pca_dbscan.csv')

In [None]:
queries_porter = pd.read_csv('queries_porter.csv')

# Functions

## First tab functions

In [7]:
def search_document_descripteur(document, stemming_method):
        if stemming_method == 'Porter':
                new_df = df_descripteur_porter[df_descripteur_porter['Document']== document]
        elif stemming_method == 'Lancaster':
                new_df = df_descripteur_lancaster[df_descripteur_lancaster['Document']== document]
        new_df.drop(['Document'], axis=1, inplace=True)
        
        inverse = df_inverse.iloc[int(document)].to_frame()
        inverse = inverse[(inverse.T != 0.000000).all()]
        inverse = inverse.reset_index(level=0)
        inverse["Terme"] = inverse["index"]
        inverse["Poid"] = inverse[int(document)]
        inverse.drop([int(document)], axis=1, inplace=True)
        inverse.drop(['index'], axis=1, inplace=True)

        return [new_df, inverse]

In [8]:
def search_document_inverse(phrase, stemming_method):
        ExpReg = nltk. RegexpTokenizer('(?:[A-Za-z]\.)+|\d+(?:\.\d+)?%?|\w+(?:\-\w+)*')
        termes = ExpReg.tokenize(phrase)
        termes = [t.lower() for t in termes]
        MotsVides = nltk.corpus.stopwords.words('english')
        if stemming_method == 'Porter':
                Porter = nltk.PorterStemmer()
                TermesSansMotsVides_p = [Porter.stem(terme) for terme in termes if terme.lower() not in MotsVides]
        elif stemming_method == 'Lancaster':
                Lancaster = nltk.LancasterStemmer()
                TermesSansMotsVides_p = [Lancaster.stem(terme) for terme in termes if terme.lower() not in MotsVides]
        
        new_df = df_inverse[TermesSansMotsVides_p]
        new_df = new_df[(new_df.T != 0.000000).any()]
        new_df = new_df.reset_index(level=0)

        if stemming_method == 'Porter':
                desc_df = df_descripteur_porter.loc[df_descripteur_porter['Terme'].isin(TermesSansMotsVides_p) ]
        elif stemming_method == 'Lancaster':
                desc_df = df_descripteur_lancaster.loc[df_descripteur_lancaster['Terme'].isin(TermesSansMotsVides_p) ]

        #new_df.drop(['document'], axis=1, inplace=True)
        return new_df, desc_df

## Second tab function

In [9]:
def search_query(query):
        results = search_with_query(query)
        return pd.DataFrame(results)
        

In [10]:
# search_query("information AND classification OR NOT title AND computers")

## Third tab functions

## Main

In [12]:
with gr.Blocks() as app:
        gr.Markdown("main app")
        with gr.Tab("Main Searches"):
                with gr.Row():
                        with gr.Column():
                                gr.Markdown("Search by Term")
                                search_input = gr.Textbox(label="Search for terms")
                                search_button = gr.Button(label="Search by Term")
                with gr.Row():
                        with gr.Column():
                                gr.Markdown("Search by Document")
                                search_document = gr.Number()
                        with gr.Column():
                                gr.Markdown("Stemming method")
                                stemming_method = gr.Radio(["Porter", "Lancaster"], label="Stemming")
                        search_document_button = gr.Button(label="Search by Document")
                with gr.Row():
                        with gr.Column():
                                gr.Markdown("Descripteur")
                                search_output_descripteur = gr.Numpy()
                        with gr.Column():
                                gr.Markdown("Inverse")
                                search_output_inverse = gr.Numpy()
        with gr.Tab("Boolean Search"):
                gr.Markdown("Boolean Search")
                search_boolean = gr.Textbox(label="Boolean search")
                search_boolean_button = gr.Button(label="Search")
                search_boolean_output = gr.Numpy()
        with gr.Tab("DBScan"):
                gr.Markdown("DBScan")
        with gr.Tab("Naive Bayes"):
                gr.Markdown("Naive Bayes")
        search_button.click(search_document_inverse, inputs = [search_input, stemming_method], outputs = [search_output_inverse, search_output_descripteur])
        search_document_button.click(search_document_descripteur, inputs = [search_document, stemming_method], outputs = [search_output_descripteur, search_output_inverse])
        search_boolean_button.click(search_query, inputs = search_boolean, outputs = search_boolean_output)

app.launch()

Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.


