diff --git a/Makefile b/Makefile
index 854270d..14f3b48 100644
--- a/Makefile
+++ b/Makefile
@@ -15,4 +15,13 @@ run_pytest:
 
 test: venv run_pytest clean
 
-.PHONY: install test venv clean run_pytest
+# You can set these variables from the command line.
+SPHINXOPTS    =
+SPHINXBUILD   = sphinx-build
+SOURCEDIR     = docs/source
+BUILDDIR      = docs/build
+
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: install test venv clean run_pytest help Makefile
diff --git a/README.md b/README.md
index e2346b1..583a393 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,8 @@
 ## fabric8-analytics-rudra
-This library is the collection of the common utils and tools required by various fabric8-analytics machine learning projects.
+This library is the collection of the common utils and tools required by various fabric8-analytics machine learning projects. Documentation is available at [fabric8-analytics-rudra Docs](https://fabric8-analytics-rudra.readthedocs.io). Please follow this article  [Docs Examples](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html) for documenting the code.
 
 ### Installation:
 
-#### 
-
 ```bash
 $ pip install git+https://github.com/fabric8-analytics/fabric8-analytics-rudra
 ```
@@ -18,6 +16,14 @@ $ make install
 $ make test
 ```
 
+### Documentation:
+```bash
+$ export PYTHONPATH=/path/to/fabric8-analytics-rudra
+```
+```bash
+$ make html
+```
+
 ### Footnotes
 
 #### Coding standards
@@ -80,3 +86,4 @@ The script named `check-bashscripts.sh` can be used to check all BASH scripts (i
 
 Please see [the following link](https://github.com/koalaman/shellcheck) for further explanation, how the ShellCheck works and which issues can be detected.
 
+
diff --git a/docs/source/conf.py b/docs/source/conf.py
index bee24df..16e1471 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -39,10 +39,14 @@
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
+    'sphinx.ext.autodoc',
+    'sphinx.ext.mathjax',
+    'sphinx.ext.ifconfig',
+    'sphinx.ext.viewcode',
 ]
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['rudratemplates']
+templates_path = ['_templates']
 
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
@@ -74,7 +78,7 @@
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-html_theme = 'alabaster'
+html_theme = 'sphinx_rtd_theme'
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
@@ -85,7 +89,7 @@
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['rudrastatic']
+html_static_path = ['_static']
 
 # Custom sidebar templates, must be a dictionary that maps document names
 # to template names.
@@ -171,3 +175,6 @@
 
 # A list of files that should not be packed into the epub file.
 epub_exclude_files = ['search.html']
+
+
+# -- Extension configuration -------------------------------------------------
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 77a6d4e..57f07b2 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -1,5 +1,5 @@
 .. fabric8-analytics-rudra documentation master file, created by
-   sphinx-quickstart on Wed Mar 20 21:38:11 2019.
+   sphinx-quickstart on Wed Mar 20 23:00:30 2019.
    You can adapt this file completely to your liking, but it should at least
    contain the root `toctree` directive.
 
@@ -18,3 +18,106 @@ Indices and tables
 * :ref:`genindex`
 * :ref:`modindex`
 * :ref:`search`
+
+.. toctree::
+   :maxdepth: 3
+
+.. automodule:: rudra.data_store
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: rudra.data_store.aws
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: rudra.data_store.local_data_store
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: rudra.data_store.bigquery
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: rudra.data_store.bigquery.base
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: rudra.data_store.bigquery.maven_bigquery
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: rudra.data_store.bigquery.pypi_bigquery
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: rudra.data_store.bigquery.npm_bigquery
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: rudra.deployments
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: rudra.deployments.emr_scripts
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: rudra.deployments.emr_scripts.emr_config
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: rudra.deployments.emr_scripts.emr_script_builder
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: rudra.deployments.emr_scripts.maven_emr
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: rudra.deployments.emr_scripts.npm_emr
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: rudra.deployments.emr_scripts.pypi_emr
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: rudra.utils
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: rudra.utils.validation
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: rudra.utils.helper
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: rudra.utils.mercator
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+.. automodule:: rudra.utils.pypi_parser
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/requirements.txt b/requirements.txt
index ba9bab5..7ee48a9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,9 @@
+alabaster==0.7.12
 asn1crypto==0.24.0
 atomicwrites==1.3.0
 attrs==18.2.0
 aws-xray-sdk==0.95
+Babel==2.6.0
 beautifulsoup4==4.7.1
 boto==2.49.0
 boto3==1.7.84
@@ -26,6 +28,7 @@ google-cloud-core==0.29.1
 google-resumable-media==0.3.2
 googleapis-common-protos==1.5.8
 idna==2.8
+imagesize==1.1.0
 Jinja2==2.10
 jmespath==0.9.3
 jsondiff==1.1.1
@@ -36,6 +39,7 @@ mock==2.0.0
 more-itertools==6.0.0
 moto==1.3.6
 numpy==1.16.1
+packaging==19.0
 pbr==5.1.2
 pluggy==0.8.1
 protobuf==3.7.0
@@ -46,6 +50,8 @@ pyasn1-modules==0.2.4
 pycodestyle==2.5.0
 pycparser==2.19
 pycryptodome==3.7.3
+Pygments==2.3.1
+pyparsing==2.3.1
 pytest==4.3.0
 python-dateutil==2.8.0
 python-jose==2.0.2
@@ -59,7 +65,16 @@ ruamel.yaml==0.15.88
 s3transfer==0.1.13
 scipy==1.2.1
 six==1.12.0
+snowballstemmer==1.2.1
 soupsieve==1.8
+Sphinx==2.0.0
+sphinx-rtd-theme==0.4.3
+sphinxcontrib-applehelp==1.0.1
+sphinxcontrib-devhelp==1.0.1
+sphinxcontrib-htmlhelp==1.0.1
+sphinxcontrib-jsmath==1.0.1
+sphinxcontrib-qthelp==1.0.2
+sphinxcontrib-serializinghtml==1.1.1
 urllib3==1.24.1
 websocket-client==0.54.0
 Werkzeug==0.14.1
diff --git a/rudra/data_store/bigquery/base.py b/rudra/data_store/bigquery/base.py
index 2b4ec7e..868df2c 100644
--- a/rudra/data_store/bigquery/base.py
+++ b/rudra/data_store/bigquery/base.py
@@ -1,14 +1,10 @@
 """Implementation Bigquery builder base."""
 import os
 import time
-from collections import Counter
 
 from google.cloud import bigquery
-from requests import Session
-from requests_futures.sessions import FuturesSession
 
 from rudra import logger
-from rudra.utils.helper import CacheDict
 from rudra.data_store.aws import AmazonS3
 
 
@@ -88,40 +84,7 @@ class DataProcessing:
 
     def __init__(self, s3_client=None):
         """Initialize DataProcessing object."""
-        self.data = None
-        self.cache = CacheDict(max_len=50000)
-        self.pkg_counter = Counter()
         self.s3_client = s3_client
-        self.req_session = FuturesSession(session=Session())
-
-    def async_fetch(self, url,
-                    method='GET',
-                    others=None):
-        """Fetch urls asynchronously."""
-        if url in self.cache:
-            self.responses.append(self.cache[url])
-        else:
-            self.process_queue.append(
-                (others, url, self.req_session.request(method, url)))
-
-    def is_fetch_done(self, callback=lambda x: x):
-        """Check whether all the requests are processed or not."""
-        _flag = True
-        for resp in self.process_queue:
-            _flag = False
-            others, url, req_obj = resp
-            logger.info("other:{}, url:{}, req_obj:{}".format(others, url, req_obj))
-
-            if url in self.cache:
-                req_obj.cancel()
-                self.process_queue.remove(resp)
-                self.responses.append(self.cache[url])
-            elif req_obj.done():
-                req_obj.cancel()
-                self.process_queue.remove(resp)
-                self.cache[url] = (others, callback(req_obj))
-                self.responses.append((others, callback(req_obj)))
-        return _flag
 
     def update_s3_bucket(self, data,
                          bucket_name,
diff --git a/rudra/data_store/bigquery/pypi_bigquery.py b/rudra/data_store/bigquery/pypi_bigquery.py
index ab6c88c..69cefd1 100644
--- a/rudra/data_store/bigquery/pypi_bigquery.py
+++ b/rudra/data_store/bigquery/pypi_bigquery.py
@@ -6,6 +6,7 @@
 from rudra.data_store.bigquery.base import BigqueryBuilder
 from rudra.utils.pypi_parser import pip_req
 from rudra.data_store.bigquery.base import DataProcessing
+from rudra.utils.validation import BQValidation
 from rudra import logger
 
 
@@ -46,52 +47,31 @@ def __init__(self, big_query_instance=None, s3_client=None):
         self.filename = '{}/big-query-data/collated.json'.format(
             os.getenv('DEPLOYMENT_PREFIX', 'dev'))
 
-    def process(self):
+    def process(self, validate=False):
         """Process Pypi Bigquery response data."""
-        start = time.monotonic()
+        bq_validation = BQValidation()
         logger.info("Running Bigquery for pypi synchronously")
         self.big_query_instance.run_query_sync()
-
-        logger.info("fetching bigquery result.")
-        for content in self.big_query_instance.get_result():
-            self.big_query_content.append(content)
-            logger.info("collected manifests: {}".format(len(self.big_query_content)))
-        logger.info("Succefully retrieved data from Bigquery, time:{}".format(
-            time.monotonic() - start))
-        base_url_pypi = 'https://pypi.org/pypi/{pkg}/json'
-        logger.info("Starting package cleaning")
         start_process_time = time.monotonic()
-        for idx, obj in enumerate(self.big_query_content):
+        for idx, obj in enumerate(self.big_query_instance.get_result()):
             start = time.monotonic()
             content = obj.get('content')
-            self.process_queue = list()
-            self.responses = list()
+            packages = []
             if content:
                 try:
-                    for name in pip_req.parse_requirements(content):
-                        logger.info("searching pkg:`{}` in Python Package Index \
-                                Repository" .format(name))
-                        self.async_fetch(base_url_pypi.format(pkg=name), others=name)
+                    packages = sorted({p for p in pip_req.parse_requirements(content)})
+                    if validate:
+                        packages = sorted(bq_validation.validate_pypi(packages))
                 except Exception as _exc:
                     logger.error("IGNORE: {}".format(_exc))
                     logger.error("Failed to parse content data {}".format(content))
 
-                try:
-                    while not self.is_fetch_done(lambda x: x.result().status_code):
-                        # hold the process until all request finishes.
-                        time.sleep(0.001)
-                except Exception as _exc:
-                    logger.error("IGNORE: {}".format(_exc))
-                    # discard process_queue
-                    self.process_queue = []
-                    self.responses = []
-                packages = sorted(set(self.handle_response()))
                 if packages:
                     pkg_string = ', '.join(packages)
                     logger.info("PACKAGES: {}".format(pkg_string))
                     self.counter.update([pkg_string])
-                logger.info("Processed content in time: {} process:{}/{}".format(
-                    (time.monotonic() - start), idx, len(self.big_query_content)))
+                logger.info("Processed content in time: {} counter:{}".format(
+                    (time.monotonic() - start), idx))
         logger.info("Processed All the manifests in time: {}".format(
             time.monotonic() - start_process_time))
 
@@ -101,18 +81,3 @@ def process(self):
                               filename=self.filename)
 
         logger.info("Succefully Processed the PyPiBigQuery")
-
-    def handle_response(self):
-        """Process and get the response of async requests."""
-        results = list()
-        for resp in self.responses:
-            pkg_name, req_obj = resp
-            if isinstance(req_obj, int):
-                if req_obj == 200:
-                    results.append(pkg_name)
-            elif req_obj.status_code == 200:
-                results.append(pkg_name)
-                logger.info("Received status:{} for pkg:{}".format(req_obj.status_code, pkg_name))
-            else:
-                logger.info("Received status:{} for pkg:{}".format(req_obj.status_code, pkg_name))
-        return results
diff --git a/rudra/deployments/emr_scripts/__init__.py b/rudra/deployments/emr_scripts/__init__.py
index 1078053..4467be4 100644
--- a/rudra/deployments/emr_scripts/__init__.py
+++ b/rudra/deployments/emr_scripts/__init__.py
@@ -4,4 +4,4 @@
 from rudra.deployments.emr_scripts.npm_emr import NpmEMR
 from rudra.deployments.emr_scripts.pypi_emr import PyPiEMR
 
-__all__ = [MavenEMR, NpmEMR, PyPiEMR]
+__all__ = ['MavenEMR', 'NpmEMR', 'PyPiEMR']
diff --git a/rudra/utils/validation.py b/rudra/utils/validation.py
index a5905ea..23dc23a 100644
--- a/rudra/utils/validation.py
+++ b/rudra/utils/validation.py
@@ -1,7 +1,9 @@
 """Validation Utility module."""
 
 import urllib.request as request
+import xmlrpc.client as xmlrpclib
 from rudra import logger
+from pip._vendor.distlib.util import normalize_name as nn
 
 
 def check_field_exists(input_data, fields):
@@ -27,3 +29,33 @@ def check_url_alive(url, accept_codes=[401]):
     except Exception as exc:
         logger.debug("Unable to reach url", extra={"exception": str(exc)})
     return False
+
+
+class BQValidation:
+    """Add validation for ecosystems."""
+
+    def __init__(self):
+        """Initialize the BQValidation object."""
+        pypi_org = xmlrpclib.ServerProxy('https://pypi.python.org/pypi')
+        self.pypi_org_packages = {nn(p) for p in pypi_org.list_packages()}
+
+    def validate_pypi(self, content):
+        """Validate python packages.
+
+        Attributes:
+            content (:obj:`str` or [:obj:`str`] or {:obj:`str`}):
+                list/set of packages or package str
+
+        Returns:
+            [:obj:`str`]: list of valid packages.
+
+        Raises:
+            ValueError: if content is not a type of :obj:`str` or :obj:`list`
+
+        """
+        if not isinstance(content, (str, list, set, frozenset)):
+            raise ValueError("content type should be string or set/list of string")
+
+        content = [content] if isinstance(content, str) else content
+
+        return list(self.pypi_org_packages.intersection(content))
diff --git a/tests/data_store/bigquery/test_base.py b/tests/data_store/bigquery/test_base.py
index 13aeb62..72d6c57 100644
--- a/tests/data_store/bigquery/test_base.py
+++ b/tests/data_store/bigquery/test_base.py
@@ -7,8 +7,6 @@
 import mock
 import pytest
 
-from rudra.data_store.bigquery.base import DataProcessing
-
 os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/path-to-credentials'
 
 
@@ -150,59 +148,3 @@ def test_iter_(self, _builder_client):
         assert job_id is not None
         for d in _builder_client:
             assert not set(['id', 'name', 'content']).difference(d)
-
-
-class TestDataProcessing:
-
-    def test_async_fetch(self):
-        dpro = DataProcessing()
-        pkg = 'flask'
-        url = 'https://pypi.org/pypi/{p}/json'.format(p=pkg)
-        dpro.process_queue = list()
-        dpro.responses = list()
-        for _ in range(10):
-            dpro.async_fetch(url, others='flask')
-        assert len(dpro.process_queue) == 10
-        assert len(dpro.responses) == 0
-        while dpro.process_queue:
-            _pkg, _url, _obj = dpro.process_queue[-1]
-            assert _pkg == pkg
-            assert _url == url
-            if _obj.done():
-                code = _obj.result().status_code
-                assert code == 200
-                dpro.process_queue.pop()
-
-    def test_is_fetch_done(self):
-        dpro = DataProcessing()
-        pkg = 'flask'
-        url = 'https://pypi.org/pypi/{p}/json'.format(p=pkg)
-        dpro.process_queue = list()
-        dpro.responses = list()
-        num = 10
-        for _ in range(num):
-            dpro.async_fetch(url, others='flask')
-        assert len(dpro.process_queue) == num
-        assert len(dpro.responses) == 0
-        while not dpro.is_fetch_done(lambda x: x.result().status_code):
-            pq_len = len(dpro.process_queue)
-            if pq_len < num:
-                assert len(dpro.responses) == num - pq_len
-
-    def test_caching(self):
-        dpro = DataProcessing()
-        pkg = 'flask'
-        url = 'https://pypi.org/pypi/{p}/json'.format(p=pkg)
-        dpro.process_queue = list()
-        dpro.responses = list()
-        num = 10000
-        for _ in range(num):
-            dpro.async_fetch(url, others='flask')
-        assert len(dpro.process_queue) == num
-        assert len(dpro.responses) == 0
-        while not dpro.is_fetch_done(lambda x: x.result().status_code):
-            pq_len = len(dpro.process_queue)
-            if pq_len < num:
-                assert len(dpro.responses) == num - pq_len
-                assert url in dpro.cache
-                assert dpro.cache[url] == (pkg, 200)
diff --git a/tests/data_store/bigquery/test_pypi_bigquery.py b/tests/data_store/bigquery/test_pypi_bigquery.py
index b0fecd7..57a4a18 100644
--- a/tests/data_store/bigquery/test_pypi_bigquery.py
+++ b/tests/data_store/bigquery/test_pypi_bigquery.py
@@ -91,9 +91,9 @@ def test_get_result_async(self, _pypi_bigquery_client):
 
 
 class TestPyPiDataProcessing:
-    def test_process(self, _data_process_client):
+    def test_process_with_validation(self, _data_process_client):
         dp_client, s3_client = _data_process_client
-        dp_client.process()
+        dp_client.process(validate=True)
         data = s3_client.read_json_file(dp_client.filename)
         assert 'pypi' in data
         assert len(data['pypi']) > 0
@@ -101,12 +101,18 @@ def test_process(self, _data_process_client):
             assert 'boto' in k
             assert 'chardet' in k
             assert 'flask' in k
+            assert 'unknown1' not in k
             assert v == 2
 
-    def test_handle_response(self, _data_process_client):
+    def test_process_without_validation(self, _data_process_client):
         dp_client, s3_client = _data_process_client
-        dp_client.responses = [
-            ('flask', 200), ('django', type('Request', (), {"status_code": 200}))]
-        result = dp_client.handle_response()
-        assert len(result) == 2
-        assert 'flask' in result and 'django' in result
+        dp_client.process(validate=False)
+        data = s3_client.read_json_file(dp_client.filename)
+        assert 'pypi' in data
+        assert len(data['pypi']) > 0
+        for k, v in data['pypi'].items():
+            assert 'boto' in k
+            assert 'chardet' in k
+            assert 'flask' in k
+            assert 'unknown1'in k
+            assert v == 2
diff --git a/tests/utils/test_validation.py b/tests/utils/test_validation.py
index 948215e..729df37 100644
--- a/tests/utils/test_validation.py
+++ b/tests/utils/test_validation.py
@@ -1,4 +1,4 @@
-from rudra.utils.validation import check_field_exists, check_url_alive
+from rudra.utils.validation import check_field_exists, check_url_alive, BQValidation
 import pytest
 
 
@@ -20,3 +20,20 @@ def test_check_url_alive():
     assert check_url_alive(url)
     url = 'https://234j23ksadasca.com'
     assert not check_url_alive(url)
+
+
+class TestBQValidation:
+
+    @staticmethod
+    def test_validate_pypi_content():
+        bq_validation = BQValidation()
+        content = 'flask'
+        assert not set(bq_validation.validate_pypi(content)).difference([content])
+        content = ['flask', 'django', 'unknownpkg']
+        assert not set(['flask', 'django']).difference(bq_validation.validate_pypi(content))
+        content = {'flask', 'django'}
+        assert not content.difference(bq_validation.validate_pypi(content))
+        content = frozenset(['flask', 'django'])
+        assert not content.difference(bq_validation.validate_pypi(content))
+        with pytest.raises(ValueError):
+            bq_validation.validate_pypi({"name": "flask"})