Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Marco fix geo ip test #155

Merged
merged 8 commits into from Feb 4, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
28 changes: 14 additions & 14 deletions .circleci/config.yml
Expand Up @@ -47,9 +47,9 @@ jobs:

- restore_cache:
keys:
- source-v1-{{ .Branch }}-{{ .Revision }}
- source-v1-{{ .Branch }}-
- source-v1-
- source-v2-{{ .Branch }}-{{ .Revision }}
- source-v2-{{ .Branch }}-
- source-v2-

- checkout

Expand All @@ -63,14 +63,14 @@ jobs:

- restore_cache:
keys:
- pyenv-2-v1-{{ .Branch }}-{{ .Revision }}
- pyenv-2-v1-{{ .Branch }}-
- pyenv-2-v1-
- pyenv-2-v2-{{ .Branch }}-{{ .Revision }}
- pyenv-2-v2-{{ .Branch }}-
- pyenv-2-v2-

- run: lore test -s tests.unit.__init__

- save_cache:
key: pyenv-2-v1-{{ .Branch }}-{{ .Revision }}
key: pyenv-2-v2-{{ .Branch }}-{{ .Revision }}
paths:
- /home/circleci/.pyenv
- /home/circleci/python
Expand Down Expand Up @@ -101,9 +101,9 @@ jobs:

- restore_cache:
keys:
- source-v1-{{ .Branch }}-{{ .Revision }}
- source-v1-{{ .Branch }}-
- source-v1-
- source-v2-{{ .Branch }}-{{ .Revision }}
- source-v2-{{ .Branch }}-
- source-v2-

- checkout

Expand All @@ -117,14 +117,14 @@ jobs:

- restore_cache:
keys:
- pyenv-3-v1-{{ .Branch }}-{{ .Revision }}
- pyenv-3-v1-{{ .Branch }}-
- pyenv-3-v1-
- pyenv-3-v2-{{ .Branch }}-{{ .Revision }}
- pyenv-3-v2-{{ .Branch }}-
- pyenv-3-v2-

- run: lore test -s tests.unit.__init__

- save_cache:
key: pyenv-3-v1-{{ .Branch }}-{{ .Revision }}
key: pyenv-3-v2-{{ .Branch }}-{{ .Revision }}
paths:
- /home/circleci/.pyenv
- /home/circleci/python
Expand Down
2 changes: 1 addition & 1 deletion lore/dependencies.py
Expand Up @@ -14,7 +14,7 @@

SQL = ['sqlalchemy>=1.2.0b3, <1.2.99', 'sqlalchemy-migrate>=0.11, <0.11.99']
SNOWFLAKE = [
'snowflake-connector-python>=1.5.5, <1.6.0',
'snowflake-connector-python>=2.0.2, <3.0.0',
'snowflake-sqlalchemy>=1.1.0, <1.2.0',
]
POSTGRES = ['psycopg2>=2.7, <2.7.99'] + SQL
Expand Down
17 changes: 14 additions & 3 deletions lore/io/__init__.py
Expand Up @@ -5,7 +5,10 @@
import shutil
import tarfile
import tempfile

try:
from urllib.parse import urlparse
except ImportError:
from urlparse import urlparse

import lore
from lore.env import require, configparser
Expand Down Expand Up @@ -70,12 +73,20 @@
def download(remote_url, local_path=None, cache=True, extract=False):
if re.match(r'^https?://', remote_url):
protocol = 'http'
elif re.match(r'^s3?://', remote_url):
require(lore.dependencies.S3)
import boto3
from botocore.exceptions import ClientError
protocol = 's3'
url_parts = urlparse(remote_url)
remote_url = url_parts.path[1:]
_bucket = boto3.resource('s3').Bucket(url_parts.netloc)
else:
if s3 is None:
raise NotImplementedError("Cannot download from s3 without config/aws.cfg")
protocol = 's3'
remote_url = prefix_remote_root(remote_url)

_bucket = None
if cache:
if local_path is None:
if protocol == 'http':
Expand All @@ -98,7 +109,7 @@ def download(remote_url, local_path=None, cache=True, extract=False):
if protocol == 'http':
lore.env.retrieve_url(remote_url, temp_path)
else:
bucket.download_file(remote_url, temp_path)
_bucket.download_file(remote_url, temp_path)
except ClientError as e:
logger.error("Error downloading file: %s" % e)
raise
Expand Down
12 changes: 7 additions & 5 deletions lore/transformers.py
Expand Up @@ -278,7 +278,7 @@ def transform(self, data):
class GeoIP(Base):
reader = None

def __init__(self, column, operator):
def __init__(self, column, operator, geoip2_loc = 's3://instacart-buildpacks/downloads/geoip2/latest.tar.gz'):
import lore # This is crazy, why is this statement necessary?
require(lore.dependencies.GEOIP)
import geoip2.database
Expand All @@ -287,13 +287,15 @@ def __init__(self, column, operator):
import lore.io
import glob
file = lore.io.download(
'http://geolite.maxmind.com/download/geoip/database/GeoLite2-City.tar.gz',
geoip2_loc,
cache=True,
extract=True
)

path = [file for file in glob.glob(file.split('.')[0] + '*') if os.path.isdir(file)][0]
GeoIP.reader = geoip2.database.Reader(os.path.join(path, 'GeoLite2-City.mmdb'))
path = [file for file in glob.glob(os.path.join(*file.split('/')[0:-1]) + '/*') if os.path.isdir(file)][0]
try:
GeoIP.reader = geoip2.database.Reader(os.path.join(path, 'GeoLite2-City.mmdb'))
except FileNotFoundError:
GeoIP.reader = geoip2.database.Reader(os.path.join(path, 'GeoIP2-City.mmdb'))

super(GeoIP, self).__init__(column)
self.operator = operator
Expand Down
16 changes: 8 additions & 8 deletions tests/unit/test_transformers.py
Expand Up @@ -136,23 +136,23 @@ def test_transform_name(self):
class TestGeoIP(unittest.TestCase):
def test_transform_latitude(self):
transformer = lore.transformers.GeoIP('test', 'latitude')

data = pandas.DataFrame({'test': ['124.0.0.1', '124.0.0.2']})
transformed = transformer.transform(data)
self.assertEqual(transformed.iloc[0], 37.5112)
self.assertEqual(transformed.iloc[1], 37.5112)
self.assertAlmostEqual(transformed.iloc[0], 37.5112)
self.assertAlmostEqual(transformed.iloc[1], 37.5112)

def test_transform_longitude(self):
transformer = lore.transformers.GeoIP('test', 'longitude')

data = pandas.DataFrame({'test': ['124.0.0.1', '124.0.0.2']})
transformed = transformer.transform(data)
self.assertEqual(transformed.iloc[0], 126.9741)
self.assertEqual(transformed.iloc[1], 126.9741)
self.assertAlmostEqual(transformed.iloc[0], 126.9741)
self.assertAlmostEqual(transformed.iloc[1], 126.9741)

def test_transform_accuracy(self):
transformer = lore.transformers.GeoIP('test', 'accuracy')

data = pandas.DataFrame({'test': ['124.0.0.1', '124.0.0.2']})
transformed = transformer.transform(data)
self.assertEqual(transformed.iloc[0], 200)
Expand All @@ -164,7 +164,7 @@ def test_missing_ip(self):
transformed = transformer.transform(data)
self.assertTrue(math.isnan(transformed.iloc[0]))


class TestDistance(unittest.TestCase):
def test_distance(self):
data = pandas.DataFrame({
Expand Down