Skip to content

Commit

Permalink
add unicode data to tests (#432)
Browse files Browse the repository at this point in the history
* add unicode data to tests

* make tests pass on 2.7

* clean up data loading

- remove duplicate keys in slice_data
- reduce line length

* change manager option flag to -t, --load-test-data

* test --> load_test_data
  • Loading branch information
andrewhn authored and mistercrunch committed May 5, 2016
1 parent a3f549b commit 6941f1d
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 6 deletions.
10 changes: 7 additions & 3 deletions caravel/bin/caravel
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,9 @@ def version():
print(s)

@manager.option(
'-s', '--sample', action='store_true',
help="Only load 1000 rows (faster, used for testing)")
def load_examples(sample):
'-t', '--load-test-data', action='store_true',
help="Load additional test data")
def load_examples(load_test_data):
"""Loads a set of Slices and Dashboards and a supporting dataset """
print("Loading examples into {}".format(db))

Expand All @@ -85,6 +85,10 @@ def load_examples(sample):
print("Loading [Birth names]")
data.load_birth_names()

if load_test_data:
print("Loading [Unicode test data]")
data.load_unicode_test_data()

@manager.command
def refresh_druid():
"""Refresh all druid datasources"""
Expand Down
89 changes: 88 additions & 1 deletion caravel/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@
import json
import os
import textwrap
import datetime
import random

import pandas as pd
from sqlalchemy import String, DateTime, Float
from sqlalchemy import String, DateTime, Date, Float

from caravel import app, db, models, utils

Expand Down Expand Up @@ -808,3 +810,88 @@ def load_birth_names():
dash.slices = slices[:-1]
db.session.merge(dash)
db.session.commit()


def load_unicode_test_data():
"""Loading unicode test dataset from a csv file in the repo"""
df = pd.read_csv(os.path.join(DATA_FOLDER, 'unicode_utf8_unixnl_test.csv'),
encoding="utf-8")
# generate date/numeric data
df['date'] = datetime.datetime.now().date()
df['value'] = [random.randint(1, 100) for _ in range(len(df))]
df.to_sql(
'unicode_test',
db.engine,
if_exists='replace',
chunksize=500,
dtype={
'phrase': String(500),
'short_phrase': String(10),
'with_missing': String(100),
'date': Date(),
'value': Float(),
},
index=False)
print("Done loading table!")
print("-" * 80)

print("Creating table reference")
obj = db.session.query(TBL).filter_by(table_name='unicode_test').first()
if not obj:
obj = TBL(table_name='unicode_test')
obj.main_dttm_col = 'date'
obj.database = get_or_create_db(db.session)
obj.is_featured = False
db.session.merge(obj)
db.session.commit()
obj.fetch_metadata()
tbl = obj

slice_data = {
"datasource_id": "3",
"datasource_name": "unicode_test",
"datasource_type": "table",
"flt_op_1": "in",
"granularity": "date",
"groupby": [],
"metric": 'sum__value',
"row_limit": config.get("ROW_LIMIT"),
"since": "100 years ago",
"until": "now",
"where": "",
"viz_type": "word_cloud",
"size_from": "10",
"series": "short_phrase",
"size_to": "70",
"rotation": "square",
"limit": "100",
}

print("Creating a slice")
slc = Slice(
slice_name="Unicode Cloud",
viz_type='word_cloud',
datasource_type='table',
table=tbl,
params=get_slice_json(slice_data),
)
merge_slice(slc)

print("Creating a dashboard")
dash = db.session.query(Dash).filter_by(dashboard_title="Unicode Test").first()

if not dash:
dash = Dash()
pos = {
"size_y": 4,
"size_x": 4,
"col": 1,
"row": 1,
"slice_id": slc.id,
}
dash.dashboard_title = "Unicode Test"
dash.position_json = json.dumps([pos], indent=4)
dash.slug = "unicode-test"
dash.slices = [slc]
db.session.merge(dash)
db.session.commit()
42 changes: 42 additions & 0 deletions caravel/data/unicode_utf8_unixnl_test.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
phrase,short_phrase,with_missing
"Под южно дърво, цъфтящо в синьо, бягаше малко пухкаво зайче.",Под южно д,Fam hx-cardiovas dis NEC
Příliš žluťoučký kůň úpěl ďábelské ódy.,Příliš žlu,
視野無限廣,窗外有藍天,視野無限廣,窗外有藍,Sparganosis
微風迎客,軟語伴茶,微風迎客,軟語伴茶,Var mgr NEC wo ntc mgr
中国智造,慧及全球,中国智造,慧及全球,Mech prob w internal org
"Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen Walther spillede på xylofon.",Quizdeltag,Corneal dystrophy NOS
Pa’s wijze lynx bezag vroom het fikse aquaduct.,Pa’s wijze,Edema in preg-unspec
Eĥoŝanĝo ĉiuĵaŭde.,Eĥoŝanĝo ĉ,
See väike mölder jõuab rongile hüpata,See väike ,Twin NOS-nonhosp
Viekas kettu punaturkki laiskan koiran takaa kurkki.,Viekas ket,Postgastric surgery synd
Voix ambiguë d’un cœur qui au zéphyr préfère les jattes de kiwis.,Voix ambig,Loose body-mult joints
Portez ce vieux whisky au juge blond qui fume.,Portez ce ,Late eff acc poisoning
Zwölf Boxkämpfer jagen Viktor quer über den großen Sylter Deich,Zwölf Boxk,Opn brain inj w/o coma
Franz jagt im komplett verwahrlosten Taxi quer durch Bayern.,Franz jagt,TB of ear-unspec
Θέλει αρετή και τόλμη η ελευθερία. (Ανδρέας Κάλβος),Θέλει αρετ,Chr peptic ulcer w perf
Ο καλύμνιος σφουγγαράς ψιθύρισε πως θα βουτήξει χωρίς να διστάζει.,Ο καλύμνιο,Cns TB NEC-cult dx
דג סקרן שט לו בים זך אך לפתע פגש חבורה נחמדה שצצה כך.,דג סקרן שט,Polyhydramnios-delivered
Árvíztűrő tükörfúrógép,Árvíztűrő ,Malign neopl scrotum
"Egy hűtlen vejét fülöncsípő, dühös mexikói úr Wesselényinél mázol Quitóban.",Egy hűtlen,Tubal/broad lig anom NOS
Saya lihat foto Hamengkubuwono XV bersama enam zebra purba cantik yang jatuh dari Al Quranmu.,Saya lihat,Ben carcinoid duodenum
"Ma la volpe, col suo balzo, ha raggiunto il quieto Fido.",Ma la volp,Ch leu un cl wo ach rmsn
いろはにほへと ちりぬるを わかよたれそ つねならむ うゐのおくやま けふこえて あさきゆめみし ゑひもせす,いろはにほへと ちり,Mycotic arthritis-pelvis
다람쥐 헌 쳇바퀴에 타고파,다람쥐 헌 쳇바퀴에,Paral polio NEC-type 1
Sarkanās jūrascūciņas peld pa jūru.,Sarkanās j,Fx larynx/trachea-open
En god stil må først og fremst være klar. Den må være passende. Aristoteles.,En god sti,Dermatophytosis site NOS
Pchnąć w tę łódź jeża lub ośm skrzyń fig,Pchnąć w t,Anxiety disorder oth dis
A rápida raposa castanha salta por cima do cão lento.,A rápida r,Adenoid vegetations
A ligeira raposa marrom ataca o cão preguiçoso.,A ligeira ,Consanguinity
Zebras caolhas de Java querem passar fax para moças gigantes de New York,Zebras cao,"Hypotony NOS, eye"
Agera vulpe maronie sare peste câinele cel leneş.,Agera vulp,Urethral syndrome NOS
Съешь ещё этих мягких французских булок да выпей же чаю,Съешь ещё ,Coccidioidomycosis NOS
Чешће цeђење мрeжастим џаком побољшава фертилизацију генских хибрида.,Чешће цeђе,
Češće ceđenje mrežastim džakom poboljšava fertilizaciju genskih hibrida.,Češće ceđe,Scrn-hemoglobinopath NEC
Kŕdeľ šťastných ďatľov učí pri ústí Váhu mĺkveho koňa obhrýzať kôru a žrať čerstvé mäso.,Kŕdeľ šťas,
V kožuščku hudobnega fanta stopiclja mizar in kliče 0619872345.,V kožuščku,
El veloz murciélago hindú comía feliz cardillo y kiwi. La cigüeña tocaba el saxofón detrás del palenque de paja.,El veloz m,Cervical syndrome NEC
Flygande bäckasiner söka hwila på mjuka tuvor,Flygande b,Letterer-siwe dis abdom
เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะ ๆ จ๋า ๆ น่าฟังเอยฯ,เป็นมนุษย์,Balantidiasis
"Pijamalı hasta, yağız şoföre çabucak güvendi",Pijamalı h,Epilepsy-delivered w p/p
زۆھرەگۈل ئابدۇۋاجىت فرانسىيەنىڭ پارىژدىكى خېلى بىشەم ئوقۇغۇچى.,زۆھرەگۈل ئ,Fit/adj non-vsc cath NEC
ئاۋۇ بىر جۈپ خوراز فرانسىيەنىڭ پارىژ شەھرىگە يېقىن تاغقا كۆچەلمىدى.,ئاۋۇ بىر ج,Sat cerv smr-no trnsfrm
2 changes: 1 addition & 1 deletion caravel/viz.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ def data(self):
def get_csv(self):
df = self.get_df()
include_index = not isinstance(df.index, pd.RangeIndex)
return df.to_csv(index=include_index)
return df.to_csv(index=include_index, encoding="utf-8")

def get_data(self):
return []
Expand Down
3 changes: 2 additions & 1 deletion tests/core_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def tearDown(self):
pass

def load_examples(self):
cli.load_examples(sample=True)
cli.load_examples(load_test_data=True)

def test_save_slice(self):
self.login_admin()
Expand Down Expand Up @@ -137,6 +137,7 @@ def test_slices(self):
urls += [
(slc.slice_name, slc.slice_url),
(slc.slice_name, slc.viz.json_endpoint),
(slc.slice_name, slc.viz.csv_endpoint),
]
for name, url in urls:
print("Slice: " + name)
Expand Down

0 comments on commit 6941f1d

Please sign in to comment.