diff --git a/adventurous_departures.csv b/adventurous_departures.csv new file mode 100644 index 0000000..3083df7 --- /dev/null +++ b/adventurous_departures.csv @@ -0,0 +1,25 @@ +Category,Finish Date,Name,Start Date +Adventurous,2018-09-10,New Zealand Encompassed,2018-08-31 +Adventurous,2018-09-10,Cambodia Overland,2018-08-31 +Adventurous,2018-09-10,Peru Overland,2018-08-31 +Adventurous,2018-09-10,Vietnam Safari,2018-08-31 +Adventurous,2018-09-10,Brazil Trek,2018-08-31 +Adventurous,2018-09-10,Cambodia Multisport,2018-08-31 +Adventurous,2018-09-10,Morocco Safari,2018-08-31 +Adventurous,2018-09-10,Galapagos Encompassed,2018-08-31 +Adventurous,2018-09-10,Galapagos Discovery,2018-08-31 +Adventurous,2018-09-10,New Zealand Adventure,2018-08-31 +Adventurous,2018-09-10,Galapagos Trek,2018-08-31 +Adventurous,2018-09-10,Brazil Encompassed,2018-08-31 +Adventurous,2018-09-10,Kenya Express,2018-08-31 +Adventurous,2018-09-10,Galapagos Discovery,2018-08-31 +Adventurous,2018-09-10,Sri Lanka Adventure,2018-08-31 +Adventurous,2018-09-10,Brazil Adventure,2018-08-31 +Adventurous,2018-09-10,Brazil Overland,2018-08-31 +Adventurous,2018-09-10,Kenya Encompassed,2018-08-31 +Adventurous,2018-09-10,Morocco Discovery,2018-08-31 +Adventurous,2018-09-10,Vietnam Trek,2018-08-31 +Adventurous,2018-09-10,New Zealand Safari,2018-08-31 +Adventurous,2018-09-10,Vietnam Encompassed,2018-08-31 +Adventurous,2018-09-10,Ethopia Express,2018-08-31 +Adventurous,2018-09-10,Sri Lanka Safari,2018-08-31 diff --git a/data_collection_script.py b/data_collection_script.py new file mode 100644 index 0000000..41fe301 --- /dev/null +++ b/data_collection_script.py @@ -0,0 +1,82 @@ +import pandas as pd +import re +import requests +import json +import datetime + +API_URL = 'http://127.0.0.1:8000/departures/' + +CSV_FILENAME = "adventurous_departures.csv" + + +def get_api_content(url): + """ + Returns the content of the given URL. + """ + response = requests.get(url) + return response.content + + +def get_departures(): + """ + Returns the 'departures' data from the URL, based + on the provided information of parsing the JSON data. + """ + results = [] + json_data = get_api_content(API_URL) + + apidata = json.loads(json_data) + results.extend(apidata['results']) + + next_data = apidata.get('next') + + while next_data: + apidata = json.loads(get_api_content(apidata['next'])) + next_data = apidata.get('next') + results.extend(apidata['results']) + + return results + + +def convert_to_title_case(input_string): + """ + Returns the given string in title case formatting. + """ + temp = re.sub(r'_|-', " ", input_string) + return temp.title() + + +def filter_departures(in_data): + """ + Filters the given data with the desired filters using Pandas. + Returns a dataframe for writing to CSV + """ + df = pd.DataFrame.from_dict(in_data) + df['start_date'] = df['start_date'].astype('datetime64[ns]') + filtered_df = df[(df.start_date > datetime.datetime(2018, 6, 1)) & + (df.category == "Adventurous")] + + return filtered_df + + +def write_csv(data_frame, csv_name=CSV_FILENAME): + """ + Writes the given data frame into a CSV file with the given name. + """ + data_frame.rename(columns=convert_to_title_case, inplace=True) + data_frame.to_csv(csv_name, index=False) + + +def main(): + """ + The main function of the script. + Calls all required functions to complete the task + of getting API data, filtering it and writing it to + a CSV file. + """ + apidata = get_departures() + filtered_data = filter_departures(apidata) + write_csv(filtered_data) + +if __name__ == '__main__': + main() diff --git a/db.sqlite3 b/db.sqlite3 index d3ec30e..7923437 100644 Binary files a/db.sqlite3 and b/db.sqlite3 differ diff --git a/departures/migrations/0002_auto_20181227_1125.py b/departures/migrations/0002_auto_20181227_1125.py new file mode 100644 index 0000000..887dd77 --- /dev/null +++ b/departures/migrations/0002_auto_20181227_1125.py @@ -0,0 +1,35 @@ +# Generated by Django 2.1.4 on 2018-12-27 09:25 +import json + +from django.db import migrations + + +def load_data_from_file(apps, schema_editor): + """ + This function loads the departures data from the provided JSON file + into the DB during the migration process. + """ + Departure = apps.get_model('departures', 'Departure') + + json_filename = "departures.json" + + with open(json_filename, 'r') as json_file: + json_data = json.loads(json_file.read()) + + for item in json_data: + departure = Departure.objects.create(name=item['name'], + start_date=item['start_date'], + finish_date=item['finish_date'], + category=item['category']) + departure.save() + + +class Migration(migrations.Migration): + + dependencies = [ + ('departures', '0001_initial'), + ] + + operations = [ + migrations.RunPython(load_data_from_file), + ] diff --git a/requirements.txt b/requirements.txt index 506c7d5..9749344 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,5 @@ Django==2.1.4 djangorestframework==3.7.7 +requests +pandas +requests_testing diff --git a/tests_data_collection_script.py b/tests_data_collection_script.py new file mode 100644 index 0000000..6156dec --- /dev/null +++ b/tests_data_collection_script.py @@ -0,0 +1,156 @@ +import unittest +import data_collection_script as dc +from datetime import datetime +from unittest.mock import Mock +from unittest.mock import patch +import json +import requests_testing + + +class TestCollectionScript(unittest.TestCase): + + def test_title_case_single_word(self): + assert(dc.convert_to_title_case("single") == + "Single") + + def test_title_case_two_words(self): + assert(dc.convert_to_title_case("double_word") == + "Double Word") + + def test_title_case_multiple_words(self): + assert(dc.convert_to_title_case( + "multiple_word-with-different_punctuation") == + "Multiple Word With Different Punctuation") + + def test_filter_valid_date_and_category(self): + input_data = [{ + "name": "New Zealand Safari", + "start_date": "2018-04-03", + "finish_date": "2018-04-13", + "category": "Marine" + }, { + "name": "New Zealand Encompassed", + "start_date": "2018-06-02", + "finish_date": "2018-06-10", + "category": "Adventurous" + }, { + "name": "Australia Encompassed", + "start_date": "2018-06-02", + "finish_date": "2018-06-10", + "category": "Marine" + }] + + out_df = dc.filter_departures(input_data) + assert(len(out_df) == 1) + assert(out_df.iloc[0, 0] == input_data[1]['category']) + assert(out_df.iloc[0, 1] == input_data[1]['finish_date']) + assert(out_df.iloc[0, 2] == input_data[1]['name']) + assert(out_df.iloc[0, 3] == + datetime.strptime(input_data[1]['start_date'], '%Y-%m-%d')) + + def test_filter_invalid_category(self): + input_data = [{ + "name": "New Zealand Safari", + "start_date": "2018-04-03", + "finish_date": "2018-04-13", + "category": "Marine" + }, { + "name": "New Zealand Encompassed", + "start_date": "2018-06-02", + "finish_date": "2018-06-10", + "category": "Random" + }, { + "name": "Australia Encompassed", + "start_date": "2018-06-02", + "finish_date": "2018-06-10", + "category": "Marine" + }] + + out_df = dc.filter_departures(input_data) + assert(len(out_df) == 0) + + @requests_testing.activate + def tests_get_departures_single_get(self): + mock_data = { + "count": 150, + "next": None, + "previous": None, + "results": [ + { + "name": "New Zealand Safari", + "start_date": "2018-04-03", + "finish_date": "2018-04-13", + "category": "Marine" + }, + { + "name": "New Zealand Encompassed", + "start_date": "2018-08-31", + "finish_date": "2018-09-10", + "category": "Adventurous" + }] + } + + requests_testing.add( + request={'url': 'http://127.0.0.1:8000/departures/'}, + response={'body': json.dumps(mock_data)} + ) + + response = dc.get_departures() + assert(response == mock_data['results']) + + @requests_testing.activate + def tests_get_departures_get_with_next_data(self): + mock_data = { + "count": 150, + "next": "http://127.0.0.1:8000/departures/?limit=50&offset=50", + "previous": None, + "results": [ + { + "name": "New Zealand Safari", + "start_date": "2018-04-03", + "finish_date": "2018-04-13", + "category": "Marine" + }, + { + "name": "New Zealand Encompassed", + "start_date": "2018-08-31", + "finish_date": "2018-09-10", + "category": "Adventurous" + }] + } + + mock_data_next = { + "count": 150, + "next": None, + "previous": None, + "results": [{ + "name": "Brazil Adventure", + "start_date": "2018-04-03", + "finish_date": "2018-04-13", + "category": "Classic" + }, + { + "name": "Vietnam Encompassed", + "start_date": "2018-04-03", + "finish_date": "2018-04-13", + "category": "Marine" + }] + } + + requests_testing.add( + request={'url': 'http://127.0.0.1:8000/departures/'}, + response={'body': json.dumps(mock_data)} + ) + requests_testing.add( + request={'url': + 'http://127.0.0.1:8000/departures/?limit=50&offset=50'}, + response={'body': json.dumps(mock_data_next)} + ) + + # mock_get.return_value.content = json.dumps(mock_data) + + response = dc.get_departures() + assert(response == mock_data['results'] + mock_data_next['results']) + +if __name__ == '__main__': + unittest.main() \ No newline at end of file