In [1]:
import numpy as np
import pandas as pd
import bz2


In [4]:
def data_preprocessing(df, year=2013):
    '''
    Takes a dataframe and a year, filters it by year, drops the year,
    and drops null values.
    Returns a dataframe.
    
    PARAMETERS
    df: Dataframe
    year: Year to filter by
    '''
    df[df['year'] == year]
    df.drop('year', axis=1, inplace=True)
    df.dropna(inplace=True)
    return df

In [5]:
data_iterator = pd.read_csv('challenge/bookings.csv.bz2', chunksize=10**6, 
                            sep='^', usecols=['arr_port', 'pax', 'year'], 
                            compression='bz2')

chunk_list = []  
# Each chunk is in dataframe format
for data_chunk in data_iterator: 
    data_chunk = data_preprocessing(data_chunk)
    chunk_list.append(data_chunk)

bookings = pd.concat(chunk_list)

In [6]:
bookings.shape

(10000009, 2)

In [7]:
def top_n(n):
    top_n = bookings.pivot_table(values='pax', index='arr_port', 
                                  aggfunc=np.sum)\
                                  .sort_values(by='pax', ascending=False).head(n)
    return top_n

In [17]:
def top_n(n):
    top_n = bookings.pivot_table(values='pax', index='arr_port', 
                                  aggfunc=np.sum)\
                                  .sort_values(by='pax', ascending=False).head(n)
    top_n_dict = {}
    for i in range(n):
        top_n_dict[top_n.index[i]] = top_n.values[i,0]
  
    return top_n_dict

In [36]:
top_n(10)

{'LHR     ': 88809.0,
 'MCO     ': 70930.0,
 'LAX     ': 70530.0,
 'LAS     ': 69630.0,
 'JFK     ': 66270.0,
 'CDG     ': 64490.0,
 'BKK     ': 59460.0,
 'MIA     ': 58150.0,
 'SFO     ': 58000.0,
 'DXB     ': 55590.0}

In [12]:
range(10)

range(0, 10)

In [33]:
def top_n_airports(n):
    
    number = int(n)
    try:
        if number > 0:
            result = top_n(number)
            return jsonify(result)
        else:
            return jsonify({'message':'Check input format'})
    except:
        return jsonify({'message':'Something failed. Check input format'})

### API

In [None]:
import numpy as np
import pandas as pd
import bz2

from flask import Flask, jsonify, request
import requests
from bs4 import BeautifulSoup

app = Flask(__name__)

#############################
'''
def data_preprocessing(df, year=2013):
    ''' '''
    Takes a dataframe and a year, filters it by year, drops the year,
    and drops null values.
    Returns a dataframe.
    
    PARAMETERS
    df: Dataframe
    year: Year to filter by
    ''' '''
    df[df['year'] == year]
    df.drop('year', axis=1, inplace=True)
    df.dropna(inplace=True)
    return df

data_iterator = pd.read_csv('challenge/bookings.csv.bz2', chunksize=10**6, 
                            sep='^', usecols=['arr_port', 'pax', 'year'], 
                            compression='bz2')

chunk_list = []  
# Each chunk is in dataframe format
for data_chunk in data_iterator: 
    data_chunk = data_preprocessing(data_chunk)
    chunk_list.append(data_chunk)

bookings = pd.concat(chunk_list)
'''

def top_n(n):
    top_n = bookings.pivot_table(values='pax', index='arr_port', 
                                  aggfunc=np.sum)\
                                  .sort_values(by='pax', ascending=False).head(n)
    top_n_dict = {}
    for i in range(n):
        top_n_dict[top_n.index[i]] = top_n.values[i,0]
  
    return top_n_dict
###################################

@app.route('/top_n_airports/<string:n>', methods=['GET'])
def top_n_airports(n):
    
    number = int(n)
    try:
        if number > 0:
            result = top_n(number)
            return jsonify(result)
        else:
            return jsonify({'message':'Check input format'})
    except:
        return jsonify({'message':'Something failed. Check input format'})
    

# Option 2 Ventaja -> No errores fatales, NONE
@app.route('/top_n_airports', methods=['GET'])
def top_n_airports_opt2():
    
    number = request.args.get('n', type=int)
    try:
        if number > 0:
            result = top_n(number)
            return jsonify(result)
        else:
            return jsonify({'message':'Check input format'})
    except:
        return jsonify({'message':'Something failed. Check input format'})
    
# With POST
@app.route('/top_n_airports_multiple', methods=['POST'])
def top_n_airports_mult():
    try:
        numbers = request.json
        result = []
        for number in numbers:
            top_result = [number['top_n'], top_n_airports(number['n'])]
            result.append(top_result)
            return jsonify(result)
    except: 
        return jsonify({'message':'Something failed. Check input format'})
    
##############################

if __name__ == '__main__':
    app.run(host='10.0.2.15', debug=False, port=5000) #host='10.0.2.15'

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://10.0.2.15:5000/ (Press CTRL+C to quit)
10.0.2.15 - - [15/Jun/2021 18:56:42] "[37mPOST /top_n_airports_multiple HTTP/1.1[0m" 200 -
