In [1]:
## Enable matplotlib inline
%matplotlib inline
import matplotlib.pyplot as plt

## Imports
import pandas as pd
pd.set_option('mode.chained_assignment',None)
pd.set_option('display.mpl_style', 'default') 
pd.set_option('display.width', 5000) 
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 200)

import numpy as np

In [2]:
gifts_df = pd.read_csv('gifts.csv')

In [3]:
import math

## --------------------------------------------------
def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    
    sklearn implementation
    2 arcsin(sqrt(sin^2(0.5*dx)cos(x1)cos(x2)sin^2(0.5*dy)))
    
    
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(math.radians, [lon1, lat1, lon2, lat2])

    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
    c = 2 * math.asin(math.sqrt(a)) 
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles
    return c * r

In [4]:
## --------------------------------------------------
def trip_work(trip):
    """
    Calculates the score for a trip
    """
    
    work = 0.0
    total_weight = 10.0 + trip['Weight'].sum()
    lon = 0
    lat = 90
    
    for i, row in trip.iterrows():
        current_lon = row['Longitude']
        current_lat = row['Latitude']
        current_w   = row['Weight']
        
        distance = haversine(lon, lat, current_lon, current_lat)
        work += distance * total_weight
        
        total_weight -= current_w
        lon = current_lon
        lat = current_lat
        
    work += haversine(lon, lat, 0, 90) * 10.0
    
    return work

In [5]:
## --------------------------------------------------
def score(df):
    """
    Calculates the total score on the entire dataset
    """
    
    n = df['TripId'].max()
    x = 0
    for i in range(1, n+1):
        trip = df[df['TripId'] == i]
        x += trip_work(trip)
    return x, n

In [6]:
## --------------------------------------------------
def dlon(lon1, lon2):
    """
    Calculates the difference between longitudes
    """
    
    lon1 = np.radians(lon1)
    lon2 = np.radians(lon2)
    
    return np.degrees(np.fabs(np.arctan2(np.sin(lon2 - lon1), np.cos(lon2 - lon1))))

In [7]:
gifts_df['NPlon'] = 0
gifts_df['NPlat'] = 90
gifts_df['Cost'] = gifts_df['Weight'] * map(haversine, gifts_df['NPlon'], gifts_df['NPlat'], gifts_df['Longitude'], gifts_df['Latitude'])

In [None]:
gifts_df = gifts_df.sort_values(by='Cost', ascending=False)
del gifts_df['NPlon']
del gifts_df['NPlat']

In [None]:
import time

trip_number = 1
trips = []

gifts = gifts_df[:]

while len(gifts):

    t0 = time.time()
    
    print 'Working on trip #', trip_number
    
    trip  = gifts[:1]
    gifts = gifts[1:]
    
    central_longitude = trip['Longitude'].values[0]
    
    search_zone = gifts[dlon(central_longitude, gifts['Longitude'].values) < 1.0]
    
    while trip['Weight'].sum() < 990.0 and len(search_zone):
        
        n = len(trip)
        trip['order'] = np.arange(1,n+1)
        inserts = [i+0.5 for i in xrange(n+1)]
        
        best_work = float('inf')
        best_trip = None
        best_gift = None
        
        for index, row in search_zone.iterrows():
            
            row_df = row.to_frame().transpose()
    
            for k in inserts:
                row_df['order'] = k
                test_trip = pd.concat([trip, row_df])
                test_trip = test_trip.sort_values(by='order')
                work = trip_work(test_trip) - test_trip['Cost'].sum()
                if work < best_work:
                    best_work = work
                    best_trip = test_trip
                    best_gift = int(row['GiftId'])
                    
        trip = best_trip
        gifts = gifts[gifts['GiftId'] != best_gift]
        search_zone = search_zone[search_zone['GiftId'] != best_gift]
        
        print len(gifts)
        
    trip['TripId'] = trip_number
    trips.append(trip)
    trip_number += 1
    
    t1 = time.time()
    print 'Time to complete trip:', t1-t0
    
    
    
        

Working on trip # 1
99998
99997
99996
99995
99994
99993
99992
99991
99990