diff --git a/flashgeotext/extractor.py b/flashgeotext/extractor.py
index 3d62d60..55f4a98 100644
--- a/flashgeotext/extractor.py
+++ b/flashgeotext/extractor.py
@@ -1,5 +1,4 @@
 import json
-from typing import Union
 
 from flashtext import KeywordProcessor
 
@@ -12,30 +11,18 @@ class Alphabets(object):
 
 
 class DemoData(object):
-    cities: Union[list, dict] = []
-    countries: Union[list, dict] = []
+    cities: dict = {}
+    countries: dict = {}
 
-    def __init__(self, with_synonyms: bool = True):
-        self.load_demo_data(with_synonyms=with_synonyms)
+    def load(self) -> None:
 
-    def load_demo_data(self, with_synonyms: bool = True) -> None:
+        self.cities = self._load_data_dict(file=DEMODATA_CITIES)
+        self.countries = self._load_data_dict(file=DEMODATA_COUNTRIES)
 
-        if with_synonyms:
-            self.cities = self._load_data_dict(file=DEMODATA_CITIES)
-            self.countries = self._load_data_dict(file=DEMODATA_COUNTRIES)
-
-        else:
-            self.cities = self._load_data_list(file=DEMODATA_CITIES)
-            self.countries = self._load_data_list(file=DEMODATA_COUNTRIES)
-
-    def _load_data_dict(self, file: str = "") -> dict:
+    def _load_data_dict(self, file: str) -> dict:
         with open(file, "r", encoding="utf-8") as f:
             return json.loads(f.read())
 
-    def _load_data_list(self, file: str = "") -> list:
-        with open(file, "r", encoding="utf-8") as f:
-            return list(json.loads(f.read()).keys())
-
 
 class Extractor:
     cities: KeywordProcessor = KeywordProcessor(case_sensitive=True)
diff --git a/scripts/Untitled.ipynb b/scripts/Untitled.ipynb
deleted file mode 100644
index cce25f8..0000000
--- a/scripts/Untitled.ipynb
+++ /dev/null
@@ -1,708 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 45,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The autoreload extension is already loaded. To reload it, use:\n",
-      "  %reload_ext autoreload\n"
-     ]
-    }
-   ],
-   "source": [
-    "%load_ext autoreload\n",
-    "%autoreload 2\n",
-    "\n",
-    "import pandas as pd\n",
-    "from flashtext import KeywordProcessor"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {
-    "collapsed": true,
-    "jupyter": {
-     "outputs_hidden": true
-    }
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Help on function read_csv in module pandas.io.parsers:\n",
-      "\n",
-      "read_csv(filepath_or_buffer: Union[str, pathlib.Path, IO[~AnyStr]], sep=',', delimiter=None, header='infer', names=None, index_col=None, usecols=None, squeeze=False, prefix=None, mangle_dupe_cols=True, dtype=None, engine=None, converters=None, true_values=None, false_values=None, skipinitialspace=False, skiprows=None, skipfooter=0, nrows=None, na_values=None, keep_default_na=True, na_filter=True, verbose=False, skip_blank_lines=True, parse_dates=False, infer_datetime_format=False, keep_date_col=False, date_parser=None, dayfirst=False, cache_dates=True, iterator=False, chunksize=None, compression='infer', thousands=None, decimal: str = '.', lineterminator=None, quotechar='\"', quoting=0, doublequote=True, escapechar=None, comment=None, encoding=None, dialect=None, error_bad_lines=True, warn_bad_lines=True, delim_whitespace=False, low_memory=True, memory_map=False, float_precision=None)\n",
-      "    Read a comma-separated values (csv) file into DataFrame.\n",
-      "    \n",
-      "    Also supports optionally iterating or breaking of the file\n",
-      "    into chunks.\n",
-      "    \n",
-      "    Additional help can be found in the online docs for\n",
-      "    `IO Tools <https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html>`_.\n",
-      "    \n",
-      "    Parameters\n",
-      "    ----------\n",
-      "    filepath_or_buffer : str, path object or file-like object\n",
-      "        Any valid string path is acceptable. The string could be a URL. Valid\n",
-      "        URL schemes include http, ftp, s3, and file. For file URLs, a host is\n",
-      "        expected. A local file could be: file://localhost/path/to/table.csv.\n",
-      "    \n",
-      "        If you want to pass in a path object, pandas accepts any ``os.PathLike``.\n",
-      "    \n",
-      "        By file-like object, we refer to objects with a ``read()`` method, such as\n",
-      "        a file handler (e.g. via builtin ``open`` function) or ``StringIO``.\n",
-      "    sep : str, default ','\n",
-      "        Delimiter to use. If sep is None, the C engine cannot automatically detect\n",
-      "        the separator, but the Python parsing engine can, meaning the latter will\n",
-      "        be used and automatically detect the separator by Python's builtin sniffer\n",
-      "        tool, ``csv.Sniffer``. In addition, separators longer than 1 character and\n",
-      "        different from ``'\\s+'`` will be interpreted as regular expressions and\n",
-      "        will also force the use of the Python parsing engine. Note that regex\n",
-      "        delimiters are prone to ignoring quoted data. Regex example: ``'\\r\\t'``.\n",
-      "    delimiter : str, default ``None``\n",
-      "        Alias for sep.\n",
-      "    header : int, list of int, default 'infer'\n",
-      "        Row number(s) to use as the column names, and the start of the\n",
-      "        data.  Default behavior is to infer the column names: if no names\n",
-      "        are passed the behavior is identical to ``header=0`` and column\n",
-      "        names are inferred from the first line of the file, if column\n",
-      "        names are passed explicitly then the behavior is identical to\n",
-      "        ``header=None``. Explicitly pass ``header=0`` to be able to\n",
-      "        replace existing names. The header can be a list of integers that\n",
-      "        specify row locations for a multi-index on the columns\n",
-      "        e.g. [0,1,3]. Intervening rows that are not specified will be\n",
-      "        skipped (e.g. 2 in this example is skipped). Note that this\n",
-      "        parameter ignores commented lines and empty lines if\n",
-      "        ``skip_blank_lines=True``, so ``header=0`` denotes the first line of\n",
-      "        data rather than the first line of the file.\n",
-      "    names : array-like, optional\n",
-      "        List of column names to use. If the file contains a header row,\n",
-      "        then you should explicitly pass ``header=0`` to override the column names.\n",
-      "        Duplicates in this list are not allowed.\n",
-      "    index_col : int, str, sequence of int / str, or False, default ``None``\n",
-      "      Column(s) to use as the row labels of the ``DataFrame``, either given as\n",
-      "      string name or column index. If a sequence of int / str is given, a\n",
-      "      MultiIndex is used.\n",
-      "    \n",
-      "      Note: ``index_col=False`` can be used to force pandas to *not* use the first\n",
-      "      column as the index, e.g. when you have a malformed file with delimiters at\n",
-      "      the end of each line.\n",
-      "    usecols : list-like or callable, optional\n",
-      "        Return a subset of the columns. If list-like, all elements must either\n",
-      "        be positional (i.e. integer indices into the document columns) or strings\n",
-      "        that correspond to column names provided either by the user in `names` or\n",
-      "        inferred from the document header row(s). For example, a valid list-like\n",
-      "        `usecols` parameter would be ``[0, 1, 2]`` or ``['foo', 'bar', 'baz']``.\n",
-      "        Element order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``.\n",
-      "        To instantiate a DataFrame from ``data`` with element order preserved use\n",
-      "        ``pd.read_csv(data, usecols=['foo', 'bar'])[['foo', 'bar']]`` for columns\n",
-      "        in ``['foo', 'bar']`` order or\n",
-      "        ``pd.read_csv(data, usecols=['foo', 'bar'])[['bar', 'foo']]``\n",
-      "        for ``['bar', 'foo']`` order.\n",
-      "    \n",
-      "        If callable, the callable function will be evaluated against the column\n",
-      "        names, returning names where the callable function evaluates to True. An\n",
-      "        example of a valid callable argument would be ``lambda x: x.upper() in\n",
-      "        ['AAA', 'BBB', 'DDD']``. Using this parameter results in much faster\n",
-      "        parsing time and lower memory usage.\n",
-      "    squeeze : bool, default False\n",
-      "        If the parsed data only contains one column then return a Series.\n",
-      "    prefix : str, optional\n",
-      "        Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ...\n",
-      "    mangle_dupe_cols : bool, default True\n",
-      "        Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than\n",
-      "        'X'...'X'. Passing in False will cause data to be overwritten if there\n",
-      "        are duplicate names in the columns.\n",
-      "    dtype : Type name or dict of column -> type, optional\n",
-      "        Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32,\n",
-      "        'c': 'Int64'}\n",
-      "        Use `str` or `object` together with suitable `na_values` settings\n",
-      "        to preserve and not interpret dtype.\n",
-      "        If converters are specified, they will be applied INSTEAD\n",
-      "        of dtype conversion.\n",
-      "    engine : {'c', 'python'}, optional\n",
-      "        Parser engine to use. The C engine is faster while the python engine is\n",
-      "        currently more feature-complete.\n",
-      "    converters : dict, optional\n",
-      "        Dict of functions for converting values in certain columns. Keys can either\n",
-      "        be integers or column labels.\n",
-      "    true_values : list, optional\n",
-      "        Values to consider as True.\n",
-      "    false_values : list, optional\n",
-      "        Values to consider as False.\n",
-      "    skipinitialspace : bool, default False\n",
-      "        Skip spaces after delimiter.\n",
-      "    skiprows : list-like, int or callable, optional\n",
-      "        Line numbers to skip (0-indexed) or number of lines to skip (int)\n",
-      "        at the start of the file.\n",
-      "    \n",
-      "        If callable, the callable function will be evaluated against the row\n",
-      "        indices, returning True if the row should be skipped and False otherwise.\n",
-      "        An example of a valid callable argument would be ``lambda x: x in [0, 2]``.\n",
-      "    skipfooter : int, default 0\n",
-      "        Number of lines at bottom of file to skip (Unsupported with engine='c').\n",
-      "    nrows : int, optional\n",
-      "        Number of rows of file to read. Useful for reading pieces of large files.\n",
-      "    na_values : scalar, str, list-like, or dict, optional\n",
-      "        Additional strings to recognize as NA/NaN. If dict passed, specific\n",
-      "        per-column NA values.  By default the following values are interpreted as\n",
-      "        NaN: '', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan',\n",
-      "        '1.#IND', '1.#QNAN', '<NA>', 'N/A', 'NA', 'NULL', 'NaN', 'n/a',\n",
-      "        'nan', 'null'.\n",
-      "    keep_default_na : bool, default True\n",
-      "        Whether or not to include the default NaN values when parsing the data.\n",
-      "        Depending on whether `na_values` is passed in, the behavior is as follows:\n",
-      "    \n",
-      "        * If `keep_default_na` is True, and `na_values` are specified, `na_values`\n",
-      "          is appended to the default NaN values used for parsing.\n",
-      "        * If `keep_default_na` is True, and `na_values` are not specified, only\n",
-      "          the default NaN values are used for parsing.\n",
-      "        * If `keep_default_na` is False, and `na_values` are specified, only\n",
-      "          the NaN values specified `na_values` are used for parsing.\n",
-      "        * If `keep_default_na` is False, and `na_values` are not specified, no\n",
-      "          strings will be parsed as NaN.\n",
-      "    \n",
-      "        Note that if `na_filter` is passed in as False, the `keep_default_na` and\n",
-      "        `na_values` parameters will be ignored.\n",
-      "    na_filter : bool, default True\n",
-      "        Detect missing value markers (empty strings and the value of na_values). In\n",
-      "        data without any NAs, passing na_filter=False can improve the performance\n",
-      "        of reading a large file.\n",
-      "    verbose : bool, default False\n",
-      "        Indicate number of NA values placed in non-numeric columns.\n",
-      "    skip_blank_lines : bool, default True\n",
-      "        If True, skip over blank lines rather than interpreting as NaN values.\n",
-      "    parse_dates : bool or list of int or names or list of lists or dict, default False\n",
-      "        The behavior is as follows:\n",
-      "    \n",
-      "        * boolean. If True -> try parsing the index.\n",
-      "        * list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3\n",
-      "          each as a separate date column.\n",
-      "        * list of lists. e.g.  If [[1, 3]] -> combine columns 1 and 3 and parse as\n",
-      "          a single date column.\n",
-      "        * dict, e.g. {'foo' : [1, 3]} -> parse columns 1, 3 as date and call\n",
-      "          result 'foo'\n",
-      "    \n",
-      "        If a column or index cannot be represented as an array of datetimes,\n",
-      "        say because of an unparseable value or a mixture of timezones, the column\n",
-      "        or index will be returned unaltered as an object data type. For\n",
-      "        non-standard datetime parsing, use ``pd.to_datetime`` after\n",
-      "        ``pd.read_csv``. To parse an index or column with a mixture of timezones,\n",
-      "        specify ``date_parser`` to be a partially-applied\n",
-      "        :func:`pandas.to_datetime` with ``utc=True``. See\n",
-      "        :ref:`io.csv.mixed_timezones` for more.\n",
-      "    \n",
-      "        Note: A fast-path exists for iso8601-formatted dates.\n",
-      "    infer_datetime_format : bool, default False\n",
-      "        If True and `parse_dates` is enabled, pandas will attempt to infer the\n",
-      "        format of the datetime strings in the columns, and if it can be inferred,\n",
-      "        switch to a faster method of parsing them. In some cases this can increase\n",
-      "        the parsing speed by 5-10x.\n",
-      "    keep_date_col : bool, default False\n",
-      "        If True and `parse_dates` specifies combining multiple columns then\n",
-      "        keep the original columns.\n",
-      "    date_parser : function, optional\n",
-      "        Function to use for converting a sequence of string columns to an array of\n",
-      "        datetime instances. The default uses ``dateutil.parser.parser`` to do the\n",
-      "        conversion. Pandas will try to call `date_parser` in three different ways,\n",
-      "        advancing to the next if an exception occurs: 1) Pass one or more arrays\n",
-      "        (as defined by `parse_dates`) as arguments; 2) concatenate (row-wise) the\n",
-      "        string values from the columns defined by `parse_dates` into a single array\n",
-      "        and pass that; and 3) call `date_parser` once for each row using one or\n",
-      "        more strings (corresponding to the columns defined by `parse_dates`) as\n",
-      "        arguments.\n",
-      "    dayfirst : bool, default False\n",
-      "        DD/MM format dates, international and European format.\n",
-      "    cache_dates : bool, default True\n",
-      "        If True, use a cache of unique, converted dates to apply the datetime\n",
-      "        conversion. May produce significant speed-up when parsing duplicate\n",
-      "        date strings, especially ones with timezone offsets.\n",
-      "    \n",
-      "        .. versionadded:: 0.25.0\n",
-      "    iterator : bool, default False\n",
-      "        Return TextFileReader object for iteration or getting chunks with\n",
-      "        ``get_chunk()``.\n",
-      "    chunksize : int, optional\n",
-      "        Return TextFileReader object for iteration.\n",
-      "        See the `IO Tools docs\n",
-      "        <https://pandas.pydata.org/pandas-docs/stable/io.html#io-chunking>`_\n",
-      "        for more information on ``iterator`` and ``chunksize``.\n",
-      "    compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'\n",
-      "        For on-the-fly decompression of on-disk data. If 'infer' and\n",
-      "        `filepath_or_buffer` is path-like, then detect compression from the\n",
-      "        following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no\n",
-      "        decompression). If using 'zip', the ZIP file must contain only one data\n",
-      "        file to be read in. Set to None for no decompression.\n",
-      "    thousands : str, optional\n",
-      "        Thousands separator.\n",
-      "    decimal : str, default '.'\n",
-      "        Character to recognize as decimal point (e.g. use ',' for European data).\n",
-      "    lineterminator : str (length 1), optional\n",
-      "        Character to break file into lines. Only valid with C parser.\n",
-      "    quotechar : str (length 1), optional\n",
-      "        The character used to denote the start and end of a quoted item. Quoted\n",
-      "        items can include the delimiter and it will be ignored.\n",
-      "    quoting : int or csv.QUOTE_* instance, default 0\n",
-      "        Control field quoting behavior per ``csv.QUOTE_*`` constants. Use one of\n",
-      "        QUOTE_MINIMAL (0), QUOTE_ALL (1), QUOTE_NONNUMERIC (2) or QUOTE_NONE (3).\n",
-      "    doublequote : bool, default ``True``\n",
-      "       When quotechar is specified and quoting is not ``QUOTE_NONE``, indicate\n",
-      "       whether or not to interpret two consecutive quotechar elements INSIDE a\n",
-      "       field as a single ``quotechar`` element.\n",
-      "    escapechar : str (length 1), optional\n",
-      "        One-character string used to escape other characters.\n",
-      "    comment : str, optional\n",
-      "        Indicates remainder of line should not be parsed. If found at the beginning\n",
-      "        of a line, the line will be ignored altogether. This parameter must be a\n",
-      "        single character. Like empty lines (as long as ``skip_blank_lines=True``),\n",
-      "        fully commented lines are ignored by the parameter `header` but not by\n",
-      "        `skiprows`. For example, if ``comment='#'``, parsing\n",
-      "        ``#empty\\na,b,c\\n1,2,3`` with ``header=0`` will result in 'a,b,c' being\n",
-      "        treated as the header.\n",
-      "    encoding : str, optional\n",
-      "        Encoding to use for UTF when reading/writing (ex. 'utf-8'). `List of Python\n",
-      "        standard encodings\n",
-      "        <https://docs.python.org/3/library/codecs.html#standard-encodings>`_ .\n",
-      "    dialect : str or csv.Dialect, optional\n",
-      "        If provided, this parameter will override values (default or not) for the\n",
-      "        following parameters: `delimiter`, `doublequote`, `escapechar`,\n",
-      "        `skipinitialspace`, `quotechar`, and `quoting`. If it is necessary to\n",
-      "        override values, a ParserWarning will be issued. See csv.Dialect\n",
-      "        documentation for more details.\n",
-      "    error_bad_lines : bool, default True\n",
-      "        Lines with too many fields (e.g. a csv line with too many commas) will by\n",
-      "        default cause an exception to be raised, and no DataFrame will be returned.\n",
-      "        If False, then these \"bad lines\" will dropped from the DataFrame that is\n",
-      "        returned.\n",
-      "    warn_bad_lines : bool, default True\n",
-      "        If error_bad_lines is False, and warn_bad_lines is True, a warning for each\n",
-      "        \"bad line\" will be output.\n",
-      "    delim_whitespace : bool, default False\n",
-      "        Specifies whether or not whitespace (e.g. ``' '`` or ``'    '``) will be\n",
-      "        used as the sep. Equivalent to setting ``sep='\\s+'``. If this option\n",
-      "        is set to True, nothing should be passed in for the ``delimiter``\n",
-      "        parameter.\n",
-      "    low_memory : bool, default True\n",
-      "        Internally process the file in chunks, resulting in lower memory use\n",
-      "        while parsing, but possibly mixed type inference.  To ensure no mixed\n",
-      "        types either set False, or specify the type with the `dtype` parameter.\n",
-      "        Note that the entire file is read into a single DataFrame regardless,\n",
-      "        use the `chunksize` or `iterator` parameter to return the data in chunks.\n",
-      "        (Only valid with C parser).\n",
-      "    memory_map : bool, default False\n",
-      "        If a filepath is provided for `filepath_or_buffer`, map the file object\n",
-      "        directly onto memory and access the data directly from there. Using this\n",
-      "        option can improve performance because there is no longer any I/O overhead.\n",
-      "    float_precision : str, optional\n",
-      "        Specifies which converter the C engine should use for floating-point\n",
-      "        values. The options are `None` for the ordinary converter,\n",
-      "        `high` for the high-precision converter, and `round_trip` for the\n",
-      "        round-trip converter.\n",
-      "    \n",
-      "    Returns\n",
-      "    -------\n",
-      "    DataFrame or TextParser\n",
-      "        A comma-separated values (csv) file is returned as two-dimensional\n",
-      "        data structure with labeled axes.\n",
-      "    \n",
-      "    See Also\n",
-      "    --------\n",
-      "    to_csv : Write DataFrame to a comma-separated values (csv) file.\n",
-      "    read_csv : Read a comma-separated values (csv) file into DataFrame.\n",
-      "    read_fwf : Read a table of fixed-width formatted lines into DataFrame.\n",
-      "    \n",
-      "    Examples\n",
-      "    --------\n",
-      "    >>> pd.read_csv('data.csv')  # doctest: +SKIP\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "help(pd.read_csv)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 72,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<class 'pandas.core.frame.DataFrame'>\n",
-      "RangeIndex: 18237 entries, 0 to 18236\n",
-      "Data columns (total 4 columns):\n",
-      " #   Column          Non-Null Count  Dtype \n",
-      "---  ------          --------------  ----- \n",
-      " 0   name            18237 non-null  object\n",
-      " 1   synonyms        16268 non-null  object\n",
-      " 2   country_alpha2  18227 non-null  object\n",
-      " 3   population      18237 non-null  int64 \n",
-      "dtypes: int64(1), object(3)\n",
-      "memory usage: 570.0+ KB\n"
-     ]
-    }
-   ],
-   "source": [
-    "df = pd.read_csv(\"cities15000.txt\", sep=\"\\t\", header=None, usecols=[1,3,8,14])\n",
-    "columns = [\"name\", \"synonyms\", \"country_alpha2\", \"population\"]\n",
-    "df.columns = columns\n",
-    "\n",
-    "df.info()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 76,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "german_cities = df[df.country_alpha2 == \"DE\"].copy()\n",
-    "german_cities.fillna(\"\", inplace=True)\n",
-    "german_cities_dict = {\n",
-    "    name: synonym.split(\",\") for name, synonym \n",
-    "    in german_cities[[\"name\", \"synonyms\"]].values.tolist()\n",
-    "}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 87,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import string"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 91,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&\\'()*+,-./:;<=>?@[\\\\]^_`{|}~ \\t\\n\\r\\x0b\\x0c'"
-      ]
-     },
-     "execution_count": 91,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "test = string.\n",
-    "test"
-   ]
-  },
-  {
-   "cell_type": "raw",
-   "metadata": {},
-   "source": [
-    "german_cities_dict"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 81,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cities = KeywordProcessor()\n",
-    "cities.add_keywords_from_dict(german_cities_dict)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 84,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[('Erlangen', 64, 72)]"
-      ]
-     },
-     "execution_count": 84,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "keywords_found = cities.extract_keywords('Berlins ist die schoenste Stadt Deutschlands. Mal abgesehen von Erlangen meine ich natuerlich.', span_info=True)\n",
-    "keywords_found"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# alphabets, letters used additionally to basic unicode\n",
-    "https://www.unicode.org/cldr/charts/latest/summary/root.html"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 93,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 94,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with open(\"untitled.txt\", \"r\") as f:\n",
-    "    alph = json.load(f)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 97,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df2 = pd.DataFrame(alph)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 102,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>name</th>\n",
-       "      <th>silcode</th>\n",
-       "      <th>source</th>\n",
-       "      <th>region</th>\n",
-       "      <th>countries</th>\n",
-       "      <th>script</th>\n",
-       "      <th>speakers</th>\n",
-       "      <th>letter</th>\n",
-       "      <th>punctuation</th>\n",
-       "      <th>local</th>\n",
-       "      <th>...</th>\n",
-       "      <th>related</th>\n",
-       "      <th>separator</th>\n",
-       "      <th>symbol</th>\n",
-       "      <th>number</th>\n",
-       "      <th>redirect</th>\n",
-       "      <th>other</th>\n",
-       "      <th>sildcode</th>\n",
-       "      <th>deprecated</th>\n",
-       "      <th>more</th>\n",
-       "      <th>symbols</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>dsb</th>\n",
-       "      <td>Lower Sorbian</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>cldr_dsb</td>\n",
-       "      <td>eur</td>\n",
-       "      <td>Germany</td>\n",
-       "      <td>latn</td>\n",
-       "      <td>6900</td>\n",
-       "      <td>óÓčćěłńŕšśžźČĆĚŁŃŔŠŚŽŹ</td>\n",
-       "      <td>«»§‐–—…‘’‚“„</td>\n",
-       "      <td>dolnoserbšćina, dolnoserbski [ˈdɔlnɔˌsɛrskʲi]</td>\n",
-       "      <td>...</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>hsb</th>\n",
-       "      <td>Upper Sorbian</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>cldr_hsb,udhr_hsb</td>\n",
-       "      <td>eur</td>\n",
-       "      <td>Germany</td>\n",
-       "      <td>latn</td>\n",
-       "      <td>13000</td>\n",
-       "      <td>čćźěłńřšžČĆŹĚŁŃŘŠŽóÓ</td>\n",
-       "      <td>«»§‐–—…‘’‚“„</td>\n",
-       "      <td>Hornjoserbski, Hornjoserbšćina</td>\n",
-       "      <td>...</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>ksh</th>\n",
-       "      <td>Kölsch (Colognian)</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>cldr_ksh</td>\n",
-       "      <td>eur</td>\n",
-       "      <td>Germany</td>\n",
-       "      <td>latn</td>\n",
-       "      <td>250000</td>\n",
-       "      <td>ėœůĖŒŮåäæëößüÅÄÆËÖÜ</td>\n",
-       "      <td>‐–—…‘‚“„†‡§⸗</td>\n",
-       "      <td>Kölsch</td>\n",
-       "      <td>...</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>°</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>3 rows × 26 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                   name silcode             source region countries script  \\\n",
-       "dsb       Lower Sorbian     NaN           cldr_dsb    eur   Germany   latn   \n",
-       "hsb       Upper Sorbian     NaN  cldr_hsb,udhr_hsb    eur   Germany   latn   \n",
-       "ksh  Kölsch (Colognian)     NaN           cldr_ksh    eur   Germany   latn   \n",
-       "\n",
-       "    speakers                  letter   punctuation  \\\n",
-       "dsb     6900  óÓčćěłńŕšśžźČĆĚŁŃŔŠŚŽŹ  «»§‐–—…‘’‚“„   \n",
-       "hsb    13000    čćźěłńřšžČĆŹĚŁŃŘŠŽóÓ  «»§‐–—…‘’‚“„   \n",
-       "ksh   250000     ėœůĖŒŮåäæëößüÅÄÆËÖÜ  ‐–—…‘‚“„†‡§⸗   \n",
-       "\n",
-       "                                             local  ... related separator  \\\n",
-       "dsb  dolnoserbšćina, dolnoserbski [ˈdɔlnɔˌsɛrskʲi]  ...     NaN       NaN   \n",
-       "hsb                 Hornjoserbski, Hornjoserbšćina  ...     NaN       NaN   \n",
-       "ksh                                         Kölsch  ...     NaN       NaN   \n",
-       "\n",
-       "    symbol number redirect other sildcode deprecated more symbols  \n",
-       "dsb    NaN    NaN      NaN   NaN      NaN        NaN  NaN     NaN  \n",
-       "hsb    NaN    NaN      NaN   NaN      NaN        NaN  NaN     NaN  \n",
-       "ksh      °    NaN      NaN   NaN      NaN        NaN  NaN     NaN  \n",
-       "\n",
-       "[3 rows x 26 columns]"
-      ]
-     },
-     "execution_count": 102,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df2.T[df2.T.countries == \"Germany\"]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# flashgeotext"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from flashgeotext.extractor import Extractor"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ext = Extractor()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "hasattr(ext, \"cities\")"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/tests/unit/test_extractor.py b/tests/unit/test_extractor.py
index 722d226..f39b1c2 100644
--- a/tests/unit/test_extractor.py
+++ b/tests/unit/test_extractor.py
@@ -14,3 +14,11 @@ def test_demodata_content():
 
     assert hasattr(demodata, "cities")
     assert hasattr(demodata, "countries")
+
+
+def test_demodata_load_data():
+    demodata = DemoData()
+    demodata.load()
+
+    assert demodata.cities
+    assert demodata.countries