# Running the app:
## From the "Runtime" menu, select "Run all", 

### Then scroll down and click on the URL below.

In [33]:
%%capture
!pip install advertools jupyter_dash dash dash_bootstrap_components

In [28]:
import os
from jupyter_dash import JupyterDash
import dash_bootstrap_components as dbc
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Output, Input, State
from dash_table import DataTable
from dash.exceptions import PreventUpdate
import advertools as adv
import pandas as pd

app = JupyterDash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

app.layout = html.Div([
    dbc.Row([
        dbc.Col(lg=2),
        dbc.Col([
            html.Br(),
            html.H1('advertools'), html.Br(), html.Br(),
            html.H3("🕷️ SEO Crawler"),
            dbc.Textarea(id='url_list', cols=100, rows=5,
                         value='',
                         placeholder='Paste the starting URLs here, one per line'),
            html.Br(),
            dbc.Checkbox(id='follow_links_checklist',
                         checked=False),
            dbc.Label("Follow links",
                      id='follow_links_label',
                      html_for="follow_links_checklist"),
            dbc.Tooltip('Should I follow links on the above pages until I finish the whole website?',
                        target='follow_links_label'),
            html.Br(),
            dbc.Input(id='crawl_output_file',
                      value='',
                      placeholder='Name your crawl project'),
            html.Br(), html.Br(),
            dbc.Button(id='start_crawling', children='Crawl!', color='primary'),
            html.Br(), html.Br(),
            dcc.Loading(html.Div(id='crawl_status_div')),
            html.Hr(),
            html.Br(),
            html.Div(id='crawl_df_table')
        ], lg=7, md=12)
    ])
])


@app.callback(Output('start_crawling', 'disabled'),
              Input('url_list', 'value'), Input('crawl_output_file', 'value'))
def crawl_button_activation(url_list, crawl_output_file):
    if len(url_list) == 0 or len(crawl_output_file) == 0:
        return True
    else:
        return False


@app.callback(Output('crawl_status_div', 'children'),
              Output('crawl_df_table', 'children'),
              Input('start_crawling', 'n_clicks'),
              State('url_list', 'value'),
              State('crawl_output_file', 'value'),
              State('follow_links_checklist', 'checked'))
def crawl_pages(n_clicks, url_list, crawl_output_file,  follow_links):
    if not n_clicks:
        raise PreventUpdate
    url_list = [url.strip() for url in url_list.split()]
    crawl_output_file = crawl_output_file.replace(' ', '_') + '.jl'
    print(follow_links)
    adv.crawl(url_list=url_list, 
              output_file=crawl_output_file,
              follow_links=follow_links)
    crawl_df = pd.read_json(crawl_output_file, lines=True)
    success_msg = f"Your crawl finished. Your dataset contains {crawl_df.shape[1]} rows and {crawl_df.shape[1]} columns."
    crawl_table = html.Div([
        DataTable(columns=[{"name": i, "id": i} for i in crawl_df.columns],
                                data=crawl_df.to_dict('records'),
                  style_table={'overflowX': 'auto', 'height': '400px'},
                  virtualization=True,
                  fixed_rows={'headers': True},
                  export_format='csv')
    ])
    return success_msg, crawl_table

app.run_server(debug=False)