In [1]:
import io
import json
import spacy
import numpy as np
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier



In [2]:
def load_vectors(fname):
    fin = io.open(fname, 'r', encoding='utf-8', newline='\n', errors='ignore')
    n, d = map(int, fin.readline().split())
    data = {}
    cnt = 0
    for line in fin:
        cnt += 1
        if cnt % 100000 == 0:
            print(cnt)
        tokens = line.rstrip().split(' ')
        data[tokens[0]] = list(map(float, tokens[1:]))
    return data



def load_questions(fname):
    with open(fname) as f:
        questions_list = f.readlines()

    filtered_questions_list = []
    for question in questions_list:
        question_json = json.loads(question)
        question_json.pop('question1')
        question_json.pop('question2')
        filtered_questions_list.append(question_json)
            
    return filtered_questions_list



def load_tables(part_file_name):
    with open('./data/'+part_file_name+'.tables.jsonl') as f:
        tables_list = f.readlines()
        
    tables = {}
    for table in tables_list:
        table_json = json.loads(table)
        tables[table_json['id']] = table_json
    return tables




def sentence_embedding(sentence, spacy_model, fastText_model):
    # weight more on nouns that are not stop words
    embed_dim = 300
    noun_amplify = 1
    doc = nlp(sentence)
    embed_res = np.zeros((embed_dim,))
    doc_len = len(doc)
    noun_cnt = 0
    for token in doc:
        if token.lemma_ in fastText_model:
            if token.pos_ == 'NOUN' and token.is_stop == False:
                embed_res += noun_amplify*np.array(fastText_model[token.lemma_])
                noun_cnt += 1
            else:
                embed_res += np.array(fastText_model[token.lemma_])
        else:
            embed_res += np.zeros((embed_dim,))
#         print(fastText_model[token.lemma_][-1], embed_res[-1])
    embed_res = np.array(embed_res)/(doc_len+(noun_amplify-1)*noun_cnt)
    return embed_res




def headers_embedding(headers, spacy_model, fastText_model):
    embed_dim = 300
    header_embedding = np.empty((0, embed_dim))
    for col_name in headers:
        col_embed = sentence_embedding(col_name, spacy_model, fastText_model)
        header_embedding = np.vstack((header_embedding, col_embed))
    return header_embedding



def question_Xy(question_embedding, header_embedding, sel_ind, conds_ind):
    question_embed = 600
    question_X = np.empty((0, question_embed))
    question_y = np.empty((0,1))
    for header_ind in range(len(header_embedding)):
        question_X = np.vstack((question_X, np.concatenate((question_embedding, header_embedding[header_ind]))))
        if header_ind == sel_ind or header_ind == conds_ind:
            question_y = np.vstack((question_y, np.array(1)))
        else:
            question_y = np.vstack((question_y, np.array(0)))
    return question_X, question_y



def obtain_Xy(question_list, table_dict, spacy_model, fastText_model):
    question_embed = 600
    aggreation_embed = 300
    question_Xs = np.empty((0, question_embed))
    question_ys = np.empty((0,1))
    aggreation_Xs = np.empty((0, aggreation_embed))
    aggregation_ys = np.empty((0,1))
    for question in question_list:
        question_embedding = sentence_embedding(question['question'], spacy_model, fastText_model)
        header_embedding = headers_embedding(table_dict[question['table_id']]['header'], spacy_model, \
                                           fastText_model)
        
        question_X, question_y = question_Xy(question_embedding, header_embedding, question['sql']['sel'], \
                                                  question['sql']['conds'][0][0])
        question_Xs = np.vstack((question_Xs, question_X))
        question_ys = np.vstack((question_ys, question_y))
        
        aggreation_Xs = np.vstack((aggreation_Xs, question_embedding))
        if question['sql']['agg'] == 5:
            agg = 0
        else:
            agg = 1
        aggregation_ys = np.vstack((aggregation_ys, np.array(agg)))
        
    return question_Xs, question_ys, aggreation_Xs, aggregation_ys

# 1. Load NLP Models for Embedding

### Please downlaod pretrained word embedding model from and save to current folder https://dl.fbaipublicfiles.com/fasttext/vectors-english/wiki-news-300d-1M-subword.vec.zip

In [3]:
fastText = load_vectors('wiki-news-300d-1M-subword.vec')

100000
200000
300000
400000
500000
600000
700000
800000
900000


In [4]:
nlp = spacy.load("en_core_web_sm")

# 2. Load data and Train Prediction Models for the Demo System

In [5]:
column_train_X, column_train_y, agg_train_X, agg_train_y = np.load('train.npy', allow_pickle=True)
column_test_X, column_test_y, agg_test_X, agg_test_y = np.load('test.npy', allow_pickle=True)
train_lstm_X, train_lstm_y = np.load('train_lstm.npy', allow_pickle=True)
test_lstm_X, test_lstm_y = np.load('test_lstm.npy', allow_pickle=True)
test_np = np.load('test_individual.npy', allow_pickle=True)
train_np = np.load('train_individual.npy', allow_pickle=True)

In [6]:
agg_clf = RandomForestClassifier()
agg_clf.fit(agg_train_X,agg_train_y)
agg_y_pred = agg_clf.predict(agg_test_X)
accuracy_score(agg_test_y, agg_y_pred)

  


0.9333333333333333

In [7]:
col_clf = RandomForestClassifier()
col_clf.fit(column_train_X, column_train_y)

column_y_pred = col_clf.predict(column_test_X)
accuracy_score(column_test_y, column_y_pred)

  


0.839572192513369

# 3. Demo System

In [8]:
def table_html(table_js):
    title = table_js['page_title']+": "+table_js['section_title']
    title_html = '<h3>'+title+'<h3>'+'<h4>only show top 10 rows<h4>'
    head_html = '<tr><th>'+'</th><th>'.join(table_js['header'])+'</th></tr>'
    data_html = ''
    for row in table_js['rows'][:10]:
        row_html = '<tr><td>'+'</td><td>'.join(row)+'</td></tr>'
        data_html += row_html
    table_html = title_html + head_html + data_html
    return table_html.encode()

In [9]:
def data_html(question, table_js):
    res_dict = {}
    
    header = table_js['header']
    question_embed = sentence_embedding(question, nlp, fastText)
    header_embed = headers_embedding(header, nlp, fastText)
    X, _ = question_Xy(question_embed, header_embed, 0, 0)
    col = col_clf.predict(X)
    agg = agg_clf.predict([question_embed])
    
    if np.sum(col) != 2.0:
        col_ind = np.random.random_integers(col.shape[0], size=(2,))-1
    else:
        col_ind = np.nonzero(col)[0]
        
    res_dict["$schema"] = 'https://vega.github.io/schema/vega-lite/v4.json'
    print(col, col_ind)
        
    ind_a = col_ind[0]
    ind_b = col_ind[1]
    type_a = table_js['types'][ind_a]
    type_b = table_js['types'][ind_b]
    field_a = header[ind_a]
    field_b = header[ind_b]
    data_values = []
    for row in table_js['rows']:
        col_a = row[ind_a]
        col_b = row[ind_b]
        if type_a == 'real':
            col_a = float(col_a)
        if type_b == 'real':
            col_b = float(col_b)
        data_values.append({field_a: col_a, field_b:col_b})

    data_ = str(data_values)    
    res_dict["makr"] = 'bar'    
    if agg == 1:
        agg_method = 'sum'
    else:
        agg_method = 'mean'
        
        
    field_b1 = str({"field": field_b, "type": 'nominal'})    
    field_a1 = str({"aggregate": agg_method, "field": field_a, "axis": {"title": field_a} })    
    res_str = "var vlSpec = { $schema: 'https://vega.github.io/schema/vega-lite/v4.json', data: { values:" \
                +data_+"},mark: 'bar', encoding: { y:" +field_a1 + ", x: " + field_b1+" } }"
    
    field_a2 = str({"field": field_a, "type": 'nominal'})    
    field_b2 = str({"aggregate": agg_method, "field": field_b, "axis": {"title": field_b} })    
    res_str2 = "var vlSpec2 = { $schema: 'https://vega.github.io/schema/vega-lite/v4.json', data: { values:" \
                +data_+"},mark: 'bar', encoding: { y:" +field_b2 + ", x: " + field_a2+" } }"
    
    return res_str+"\n\n"+res_str2

In [10]:
infile1 = open("before_table.html", "rb")
html_before_table = infile1.read()
infile2 = open("before_plot.html", "rb")
html_before_plot = infile2.read()
infile3 = open("after_plot.html", "rb")
html_after_plot = infile3.read()
index_table_byte = "<h3>Example</h3><tr><th>Firstname</th><th>Lastname</th><th>Age</th></tr>".encode()

### Example 1 for the demo system, the first cell is the table in json format, and the second cell is the question corresponding to the table

In [11]:
{"header": ["Tournament", "Wins", "Top-5", "Top-10", "Top-25", "Events", "Cuts made"], "page_title": "Peter Thomson (golfer)", "types": ["text", "real", "real", "real", "real", "real", "real"], "page_id": 1510351, "id": "2-1510351-5", "section_title": "Summary", "rows": [["Masters Tournament", "0", "1", "1", "5", "8", "6"], ["U.S. Open", "0", "1", "1", "2", "5", "3"], ["The Open Championship", "5", "10", "18", "23", "30", "26"], ["PGA Championship", "0", "0", "0", "0", "0", "0"], ["Totals", "5", "12", "20", "30", "43", "35"]], "caption": "Summary"}


{'header': ['Tournament',
  'Wins',
  'Top-5',
  'Top-10',
  'Top-25',
  'Events',
  'Cuts made'],
 'page_title': 'Peter Thomson (golfer)',
 'types': ['text', 'real', 'real', 'real', 'real', 'real', 'real'],
 'page_id': 1510351,
 'id': '2-1510351-5',
 'section_title': 'Summary',
 'rows': [['Masters Tournament', '0', '1', '1', '5', '8', '6'],
  ['U.S. Open', '0', '1', '1', '2', '5', '3'],
  ['The Open Championship', '5', '10', '18', '23', '30', '26'],
  ['PGA Championship', '0', '0', '0', '0', '0', '0'],
  ['Totals', '5', '12', '20', '30', '43', '35']],
 'caption': 'Summary'}

In [12]:
Name the average top 25 of different events

SyntaxError: invalid syntax (<ipython-input-12-40969e6b38c0>, line 1)

### Example 2 for the demo system, the first cell is the table in json format, and the second cell is the question corresponding to the table

In [13]:
{"header": ["Rider", "Bike", "Laps", "Time", "Grid"], "page_title": "2008 Misano Superbike World Championship round", "types": ["text", "text", "real", "text", "real"], "page_id": 18145978, "id": "2-18145978-2", "section_title": "Superbike race 2 classification", "rows": [["Ruben Xaus", "Ducati 1098 RS 08", "24", "39:19.710", "3"], ["Max Biaggi", "Ducati 1098 RS 08", "24", "+1.035", "5"], ["Troy Bayliss", "Ducati 1098 F08", "24", "+4.158", "2"], ["Noriyuki Haga", "Yamaha YZF-R1", "24", "+5.466", "14"], ["Troy Corser", "Yamaha YZF-R1", "24", "+6.759", "1"], ["Lorenzo Lanzi", "Ducati 1098 RS 08", "24", "+13.468", "6"], ["Max Neukirchner", "Suzuki GSX-R1000", "24", "+15.221", "10"], ["Carlos Checa", "Honda CBR1000RR", "24", "+16.687", "11"], ["Jakub Smrz", "Ducati 1098 RS 08", "24", "+17.030", "7"], ["Fonsi Nieto", "Suzuki GSX-R1000", "24", "+17.681", "9"], ["Michel Fabrizio", "Ducati 1098 F08", "24", "+21.356", "4"], ["Yukio Kagayama", "Suzuki GSX-R1000", "24", "+28.676", "13"], ["Ryuichi Kiyonari", "Honda CBR1000RR", "24", "+31.304", "22"], ["Gregorio Lavilla", "Honda CBR1000RR", "24", "+32.339", "19"], ["Shinichi Nakatomi", "Yamaha YZF-R1", "24", "+33.716", "20"], ["David Checa", "Yamaha YZF-R1", "24", "+34.171", "18"], ["Ayrton Badovini", "Kawasaki ZX-10R", "24", "+40.638", "16"], ["Roberto Rolfo", "Honda CBR1000RR", "24", "+41.136", "17"], ["Shuhei Aoyama", "Honda CBR1000RR", "24", "+49.699", "23"], ["Jason Pridmore", "Honda CBR1000RR", "23", "Retirement", "27"], ["S\u00e9bastien Gimbert", "Yamaha YZF-R1", "14", "Retirement", "21"], ["Vittorio Iannuzzo", "Kawasaki ZX-10R", "8", "Retirement", "25"], ["R\u00e9gis Laconi", "Kawasaki ZX-10R", "5", "Retirement", "8"], ["Makoto Tamada", "Kawasaki ZX-10R", "2", "Accident", "12"], ["Karl Muggeridge", "Honda CBR1000RR", "2", "Retirement", "15"], ["Kenan Sofuo\u011flu", "Honda CBR1000RR", "0", "Accident", "24"]], "caption": "Superbike race 2 classification"}


{'header': ['Rider', 'Bike', 'Laps', 'Time', 'Grid'],
 'page_title': '2008 Misano Superbike World Championship round',
 'types': ['text', 'text', 'real', 'text', 'real'],
 'page_id': 18145978,
 'id': '2-18145978-2',
 'section_title': 'Superbike race 2 classification',
 'rows': [['Ruben Xaus', 'Ducati 1098 RS 08', '24', '39:19.710', '3'],
  ['Max Biaggi', 'Ducati 1098 RS 08', '24', '+1.035', '5'],
  ['Troy Bayliss', 'Ducati 1098 F08', '24', '+4.158', '2'],
  ['Noriyuki Haga', 'Yamaha YZF-R1', '24', '+5.466', '14'],
  ['Troy Corser', 'Yamaha YZF-R1', '24', '+6.759', '1'],
  ['Lorenzo Lanzi', 'Ducati 1098 RS 08', '24', '+13.468', '6'],
  ['Max Neukirchner', 'Suzuki GSX-R1000', '24', '+15.221', '10'],
  ['Carlos Checa', 'Honda CBR1000RR', '24', '+16.687', '11'],
  ['Jakub Smrz', 'Ducati 1098 RS 08', '24', '+17.030', '7'],
  ['Fonsi Nieto', 'Suzuki GSX-R1000', '24', '+17.681', '9'],
  ['Michel Fabrizio', 'Ducati 1098 F08', '24', '+21.356', '4'],
  ['Yukio Kagayama', 'Suzuki GSX-R1000', '24'

In [14]:
What is the total number of Grid, when Laps has different values?

Object `values` not found.


## Start the server and visit http://127.0.0.1:8080 !

In [15]:
import os, os.path
import random
import string

import cherrypy


class StringGenerator(object):
    
    def __init__(self, model_col, model_agg):
        self.model_col_ = model_col
        self.model_agg_ = model_agg
        self.table_ = None
        self.table_byte_ = None
        self.data_byte_ = None
    
    @cherrypy.expose
    def index(self):
        index_html = html_before_table+index_table_byte+html_before_plot+html_after_plot
#         return open('index.html')
        return index_html


    @cherrypy.expose
    def update_table(self, uploaded_table='a'):
        self.table_ = json.loads(uploaded_table)
        table_byte = table_html(self.table_)
        self.table_byte_ = table_byte
        update_table_html = html_before_table+table_byte+html_before_plot+html_after_plot
        return update_table_html

    @cherrypy.expose
    def update_plot(self, question):
        self.data_byte_ = data_html(question, self.table_ ).encode()
        update_plot_html = html_before_table+self.table_byte_+html_before_plot+self.data_byte_+html_after_plot
        return update_plot_html
#         return cherrypy.session['mystring']


if __name__ == '__main__':
    conf = {
        '/': {
            'tools.sessions.on': True,
            'tools.staticdir.root': os.path.abspath(os.getcwd())
        },
        '/static': {
            'tools.staticdir.on': True,
            'tools.staticdir.dir': './public'
        }
    }
    cherrypy.quickstart(StringGenerator(1,2), '/', conf)

[15/May/2020:00:17:56] ENGINE Listening for SIGTERM.
[15/May/2020:00:17:56] ENGINE Listening for SIGHUP.
[15/May/2020:00:17:56] ENGINE Listening for SIGUSR1.
[15/May/2020:00:17:56] ENGINE Bus STARTING
CherryPy Checker:
'/Users/haox/Folder/Study/2020Spring/VisML/VisProject/WikiVis/./public' (root + dir) is not an existing filesystem path.
section: [/static]
root: '/Users/haox/Folder/Study/2020Spring/VisML/VisProject/WikiVis'
dir: './public'

[15/May/2020:00:17:56] ENGINE Started monitor thread 'Autoreloader'.
[15/May/2020:00:17:56] ENGINE Serving on http://127.0.0.1:8080
[15/May/2020:00:17:56] ENGINE Bus STARTED


127.0.0.1 - - [15/May/2020:00:17:56] "GET /crawls/default/metrics HTTP/1.1" 404 1386 "http://localhost:8080/monitoring/default" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
127.0.0.1 - - [15/May/2020:00:17:57] "GET /crawls/default/metrics HTTP/1.1" 404 1386 "http://localhost:8080/monitoring/default" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
127.0.0.1 - - [15/May/2020:00:17:58] "GET /crawls/default/metrics HTTP/1.1" 404 1386 "http://localhost:8080/monitoring/default" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
127.0.0.1 - - [15/May/2020:00:17:59] "GET /crawls/default/metrics HTTP/1.1" 404 1386 "http://localhost:8080/monitoring/default" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/

127.0.0.1 - - [15/May/2020:00:18:19] "GET /crawls/default/metrics HTTP/1.1" 404 1386 "http://localhost:8080/monitoring/default" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
127.0.0.1 - - [15/May/2020:00:18:20] "GET /crawls/default/metrics HTTP/1.1" 404 1386 "http://localhost:8080/monitoring/default" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
127.0.0.1 - - [15/May/2020:00:18:21] "GET /crawls/default/metrics HTTP/1.1" 404 1386 "http://localhost:8080/monitoring/default" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
127.0.0.1 - - [15/May/2020:00:18:22] "GET /update_table?uploaded_table=%7B%22header%22%3A+%5B%22Rider%22%2C+%22Bike%22%2C+%22Laps%22%2C+%22Time%22%2C+%22Grid%22%5D%2C+%22page_title%22%3A+%222008+Misano+Superbike+World+Championship+round%22%2C+%

127.0.0.1 - - [15/May/2020:00:18:30] "GET /crawls/default/metrics HTTP/1.1" 404 1386 "http://localhost:8080/monitoring/default" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
127.0.0.1 - - [15/May/2020:00:18:31] "GET /crawls/default/metrics HTTP/1.1" 404 1386 "http://localhost:8080/monitoring/default" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
127.0.0.1 - - [15/May/2020:00:18:32] "GET /crawls/default/metrics HTTP/1.1" 404 1386 "http://localhost:8080/monitoring/default" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"
127.0.0.1 - - [15/May/2020:00:18:33] "GET /crawls/default/metrics HTTP/1.1" 404 1386 "http://localhost:8080/monitoring/default" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/

[15/May/2020:00:18:35] ENGINE Keyboard Interrupt: shutting down bus
[15/May/2020:00:18:35] ENGINE Bus STOPPING
[15/May/2020:00:18:40] ENGINE HTTP Server cherrypy._cpwsgi_server.CPWSGIServer(('127.0.0.1', 8080)) shut down
[15/May/2020:00:18:40] ENGINE Stopped thread 'Autoreloader'.
[15/May/2020:00:18:40] ENGINE Bus STOPPED
[15/May/2020:00:18:40] ENGINE Bus EXITING
[15/May/2020:00:18:40] ENGINE Bus EXITED
[15/May/2020:00:18:40] ENGINE Waiting for child threads to terminate...


In [11]:
q_c = {"phase": 2, "table_id": "2-1510351-5", "question": "Name the average top 25 of different events", "sql": {"sel": 4, "conds": [[5, 2, 0]], "agg": 5}, "question1": "What is the average value of Top-25 over different Events", "question2": "How does the average Top-25 distribute over different Events"}
js_c = {"header": ["Tournament", "Wins", "Top-5", "Top-10", "Top-25", "Events", "Cuts made"], "page_title": "Peter Thomson (golfer)", "types": ["text", "real", "real", "real", "real", "real", "real"], "page_id": 1510351, "id": "2-1510351-5", "section_title": "Summary", "rows": [["Masters Tournament", "0", "1", "1", "5", "8", "6"], ["U.S. Open", "0", "1", "1", "2", "5", "3"], ["The Open Championship", "5", "10", "18", "23", "30", "26"], ["PGA Championship", "0", "0", "0", "0", "0", "0"], ["Totals", "5", "12", "20", "30", "43", "35"]], "caption": "Summary"}



In [141]:
q_d = {"phase": 2, "table_id": "2-18145978-2", "question": "What is the total number of Grid, when Laps has different values?", "sql": {"sel": 4, "conds": [[2, 1, 24]], "agg": 3}, "question1": "What is the Grid over different Laps", "question2": "How many Grid over different Laps"}
js_d = {"header": ["Rider", "Bike", "Laps", "Time", "Grid"], "page_title": "2008 Misano Superbike World Championship round", "types": ["text", "text", "real", "text", "real"], "page_id": 18145978, "id": "2-18145978-2", "section_title": "Superbike race 2 classification", "rows": [["Ruben Xaus", "Ducati 1098 RS 08", "24", "39:19.710", "3"], ["Max Biaggi", "Ducati 1098 RS 08", "24", "+1.035", "5"], ["Troy Bayliss", "Ducati 1098 F08", "24", "+4.158", "2"], ["Noriyuki Haga", "Yamaha YZF-R1", "24", "+5.466", "14"], ["Troy Corser", "Yamaha YZF-R1", "24", "+6.759", "1"], ["Lorenzo Lanzi", "Ducati 1098 RS 08", "24", "+13.468", "6"], ["Max Neukirchner", "Suzuki GSX-R1000", "24", "+15.221", "10"], ["Carlos Checa", "Honda CBR1000RR", "24", "+16.687", "11"], ["Jakub Smrz", "Ducati 1098 RS 08", "24", "+17.030", "7"], ["Fonsi Nieto", "Suzuki GSX-R1000", "24", "+17.681", "9"], ["Michel Fabrizio", "Ducati 1098 F08", "24", "+21.356", "4"], ["Yukio Kagayama", "Suzuki GSX-R1000", "24", "+28.676", "13"], ["Ryuichi Kiyonari", "Honda CBR1000RR", "24", "+31.304", "22"], ["Gregorio Lavilla", "Honda CBR1000RR", "24", "+32.339", "19"], ["Shinichi Nakatomi", "Yamaha YZF-R1", "24", "+33.716", "20"], ["David Checa", "Yamaha YZF-R1", "24", "+34.171", "18"], ["Ayrton Badovini", "Kawasaki ZX-10R", "24", "+40.638", "16"], ["Roberto Rolfo", "Honda CBR1000RR", "24", "+41.136", "17"], ["Shuhei Aoyama", "Honda CBR1000RR", "24", "+49.699", "23"], ["Jason Pridmore", "Honda CBR1000RR", "23", "Retirement", "27"], ["S\u00e9bastien Gimbert", "Yamaha YZF-R1", "14", "Retirement", "21"], ["Vittorio Iannuzzo", "Kawasaki ZX-10R", "8", "Retirement", "25"], ["R\u00e9gis Laconi", "Kawasaki ZX-10R", "5", "Retirement", "8"], ["Makoto Tamada", "Kawasaki ZX-10R", "2", "Accident", "12"], ["Karl Muggeridge", "Honda CBR1000RR", "2", "Retirement", "15"], ["Kenan Sofuo\u011flu", "Honda CBR1000RR", "0", "Accident", "24"]], "caption": "Superbike race 2 classification"}

