In [2]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd

### Import and process the data

In [178]:
mp = pd.read_csv('Data/Outputs/mushed.csv')
flights = pd.read_csv('Data/Outputs/flights.csv')
flights.drop(['Unnamed: 0', 'ItinID', 'MktID',
       'Quarter',  'OriginWac',  'DestWac', 
       'ContiguousUSA', 'NumTicketsOrdered'], axis=1, inplace=True)
flights[['departure','arrival']] = flights[['departure','arrival']].astype(np.datetime64)
mpa = mp[mp['iata'].isin(flights['Origin'].unique())]
mpa.drop(['country', 'Unnamed: 0'], inplace=True, axis=1)
ports = mpa[mpa['category']=='Airport']

### Search function peudocode:

   **search_func**(_from, to, dates_range_):<br>
        get direct flight if available</p>
        limit the search by _dates_range_<br>
        get label of from<br>
        get label of to<br>
        get all in label of to<br>
        get all in label of from<br>
        get all flights from _all_from_ to _all_to_ from the flights data set<br>
        return **n** cheapest combinations
    

### Search functioin

In [308]:
def search(fr, to, start, stop, n):    
    '''Search the flight data to find flights from origin 'fr' to destination 'to' and
        'n' cheapest flights from and to the respecting clusters in the time range 'start'-'stop'.
        Start and stop dates are str in format YYYY-MM-DD.
        'fr' and 'to' are strings of airport abbreviations like 'LAX'.
        'n' is an integer. 
        Returns dict of direct and indirect flights dataframes.'''    
    # Search criteria
    fr_label = ports[ports['iata']==fr]['Labels'].iloc[0]
    to_label = ports[ports['iata']==to]['Labels'].iloc[0]
    fr_ports = ports[ports['Labels']==fr_label]
    to_potts = ports[ports['Labels']==to_label]
    start = np.datetime64(start)
    stop = np.datetime64(stop)    
    # Get direct flights
    dir_fl = flights[(flights['departure']<stop)
                     &(flights['departure']>start)
                     &(flights['Origin']==fr)
                     &(flights['Dest']==to)]
    # Get indirect flights
    all_from = ports[ports['Labels']==fr_label]['iata']
    all_to = ports[ports['Labels']==to_label]['iata']
    all_options = flights[(flights['departure']<stop)
                      &(flights['departure']>start)
                      &(flights.Origin.isin(all_from))
                      &(flights.Dest.isin(all_to))]
    
    return {'direct':dir_fl, 'indirect':all_options.nsmallest(n,'PricePerTicket')}

### Test the function once with arbitrary parameters.

In [309]:
res = search('BUR', 'SAT', '2020-01-30', '2020-02-12', 100)

In [310]:
res['direct']

Unnamed: 0,AIRPORT,CITY,STATE,LATITUDE,LONGITUDE,Origin,Dest,Miles,AirlineCompany,PricePerTicket,departure,arrival
1199104,Burbank-Glendale-Pasadena,Burbank,CA,34.200619,-118.358497,BUR,SAT,1211.0,WN,348.35,2020-02-04 16:00:00,2020-02-04 18:01:00


In [312]:
res['indirect'].iloc[:5]

Unnamed: 0,AIRPORT,CITY,STATE,LATITUDE,LONGITUDE,Origin,Dest,Miles,AirlineCompany,PricePerTicket,departure,arrival
4665325,Los Angeles International,Los Angeles,CA,33.942536,-118.408074,LAX,IAH,1379.0,UA,50.0,2020-02-10 08:52:00,2020-02-10 11:09:00
4591927,Los Angeles International,Los Angeles,CA,33.942536,-118.408074,LAX,DFW,1235.0,AA,53.0,2020-02-08 09:59:00,2020-02-08 12:02:00
4665326,Los Angeles International,Los Angeles,CA,33.942536,-118.408074,LAX,IAH,1379.0,UA,54.0,2020-02-10 09:53:00,2020-02-10 12:10:00
4564188,Los Angeles International,Los Angeles,CA,33.942536,-118.408074,LAX,DAL,1246.0,WN,54.5,2020-02-03 03:12:00,2020-02-03 05:16:00
4591928,Los Angeles International,Los Angeles,CA,33.942536,-118.408074,LAX,DFW,1235.0,AA,55.0,2020-02-08 10:46:00,2020-02-08 12:49:00


#### Test if i can find cheaper alternative for more than 0.7 of flights that cost more than 200$

slice flights more than 200$ assign it to 'Search' df add a column alt_price, and AirBnB

Find alternatives 
get average price of alternatives as an alt_price at the row
if no alternative alt_price = 0
search for the room price in the area fill it in AirBnB col, else add q_price, else add 0.

In [346]:
Search = flights[flights['PricePerTicket']>200.0].nlargest(10**6, 'PricePerTicket')

In [347]:
Search[['alt_price','AirBnB']]=0

In [351]:
Search.reset_index(inplace = True)
Search.head()

Unnamed: 0,index,AIRPORT,CITY,STATE,LATITUDE,LONGITUDE,Origin,Dest,Miles,AirlineCompany,PricePerTicket,departure,arrival,alt_price,AirBnB
0,68438,Ted Stevens Anchorage International,Anchorage,AK,61.17432,-149.996186,ANC,DEN,2405.0,UA,1000.0,2020-10-26 15:36:00,2020-10-26 19:36:00,0,0
1,72807,Ted Stevens Anchorage International,Anchorage,AK,61.17432,-149.996186,ANC,IAH,3266.0,UA,1000.0,2020-11-24 18:34:00,2020-11-25 00:00:00,0,0
2,174269,William B Hartsfield-Atlanta Intl,Atlanta,GA,33.640444,-84.426944,ATL,DEN,1199.0,DL,1000.0,2020-01-27 03:35:00,2020-01-27 05:34:00,0,0
3,185288,William B Hartsfield-Atlanta Intl,Atlanta,GA,33.640444,-84.426944,ATL,DFW,731.0,AA,1000.0,2020-03-01 10:11:00,2020-03-01 11:24:00,0,0
4,404505,William B Hartsfield-Atlanta Intl,Atlanta,GA,33.640444,-84.426944,ATL,PHL,666.0,AA,1000.0,2020-10-08 08:39:00,2020-10-08 09:45:00,0,0


In [None]:
frl=Search.Origin
tol=Search.Dest

In [354]:
froml = Search['departure'].apply(lambda x: x - np.timedelta64(2, 'D'))

In [354]:
tol = Search['departure'].apply(lambda x: x - np.timedelta64(2, 'D'))

In [None]:
np.timedelta(2, 'D')