In [None]:
import numpy as np
import pandas as pd

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

import datetime as dt
import matplotlib.pyplot as plt # for plotting

In [None]:
Train = pd.read_csv('Train.csv')
Test = pd.read_csv('Test.csv')
Riders = pd.read_csv('Riders.csv')
variables = pd.read_csv('VariableDefinitions.csv')

In [None]:
print(Train.shape)
print(Test.shape)
print(Riders.shape)

In [None]:
Y = Train[['Arrival at Destination - Day of Month','Arrival at Destination - Weekday (Mo = 1)','Arrival at Destination - Time',
           'Time from Pickup to Arrival']]
Train = Train.drop(['Arrival at Destination - Day of Month','Arrival at Destination - Weekday (Mo = 1)','Arrival at Destination - Time',
           'Time from Pickup to Arrival'],axis = 1)
Combine_Train_Test = pd.concat([Train, Test])
Combine_Train_Test = Combine_Train_Test.drop(['Vehicle Type','Platform Type','Personal or Business','User Id','Rider Id','Order No'],axis = 1)
print(Combine_Train_Test.shape)

In [None]:
# Total number of missing values in the Train_Riders data 
Missval_Train_Test = Combine_Train_Test.isnull().sum()
print('The missing values is show below :', Missval_Train_Test)

In [None]:
# Times

# Placement - Time
# Confirmation - Time
# Arrival at Pickup - Time
# Pickup - Time

In [None]:
#### Convert Time to DateTime

In [None]:
Combine_Train_Test['Placement - Time'] = Combine_Train_Test['Placement - Time'].astype('datetime64')
Combine_Train_Test['Confirmation - Time'] = Combine_Train_Test['Confirmation - Time'].astype('datetime64')
Combine_Train_Test['Arrival at Pickup - Time'] = Combine_Train_Test['Arrival at Pickup - Time'].astype('datetime64')
Combine_Train_Test['Pickup - Time'] = Combine_Train_Test['Pickup - Time'].astype('datetime64')

In [None]:
#### Get Hour and Minute from Times

In [None]:
Combine_Train_Test['Placement_Hour'] = Combine_Train_Test['Placement - Time'].dt.hour
Combine_Train_Test['Placement_Minute'] = Combine_Train_Test['Placement - Time'].dt.minute
Combine_Train_Test['Confirmation_Hour'] = Combine_Train_Test['Confirmation - Time'].dt.hour
Combine_Train_Test['Confirmation_Minute'] = Combine_Train_Test['Confirmation - Time'].dt.minute
Combine_Train_Test['Arrival_Hour'] = Combine_Train_Test['Arrival at Pickup - Time'].dt.hour
Combine_Train_Test['Arrival_Minute'] = Combine_Train_Test['Arrival at Pickup - Time'].dt.minute
Combine_Train_Test['Pickup_Hour'] = Combine_Train_Test['Pickup - Time'].dt.hour
Combine_Train_Test['Pickup_Minute'] = Combine_Train_Test['Pickup - Time'].dt.minute

In [None]:
### Additions and Subtractions of time

In [None]:
Combine_Train_Test['process_time_hour'] = Combine_Train_Test['Confirmation - Time'].dt.hour - Combine_Train_Test['Placement - Time'].dt.hour
Combine_Train_Test['process_time_minute'] = Combine_Train_Test['Confirmation - Time'].dt.minute - Combine_Train_Test['Placement - Time'].dt.minute

Combine_Train_Test['cust_wait1_time_hour'] = Combine_Train_Test['Arrival at Pickup - Time'].dt.hour - Combine_Train_Test['Placement - Time'].dt.hour
Combine_Train_Test['cust_wait1_time_minute'] = Combine_Train_Test['Arrival at Pickup - Time'].dt.minute - Combine_Train_Test['Placement - Time'].dt.minute

Combine_Train_Test['cust_wait2_time_hour'] = Combine_Train_Test['Arrival at Pickup - Time'].dt.hour - Combine_Train_Test['Confirmation - Time'].dt.hour
Combine_Train_Test['cust_wait2_time_minute'] = Combine_Train_Test['Arrival at Pickup - Time'].dt.minute - Combine_Train_Test['Confirmation - Time'].dt.minute

Combine_Train_Test['standby_hoours'] = Combine_Train_Test['Pickup - Time'].dt.hour - Combine_Train_Test['Arrival at Pickup - Time'].dt.hour
Combine_Train_Test['standby_minutes'] = Combine_Train_Test['Pickup - Time'].dt.minute - Combine_Train_Test['Arrival at Pickup - Time'].dt.minute

Combine_Train_Test['cust_wait3_time_hour'] = Combine_Train_Test['Pickup - Time'].dt.hour - Combine_Train_Test['Confirmation - Time'].dt.hour
Combine_Train_Test['cust_wait3_time_minute'] = Combine_Train_Test['Pickup - Time'].dt.minute - Combine_Train_Test['Confirmation - Time'].dt.minute

Combine_Train_Test['cust_wait4_time_hour'] = Combine_Train_Test['Pickup - Time'].dt.hour - Combine_Train_Test['Placement - Time'].dt.hour
Combine_Train_Test['cust_wait4_time_minute'] = Combine_Train_Test['Pickup - Time'].dt.minute - Combine_Train_Test['Placement - Time'].dt.minute

Combine_Train_Test = Combine_Train_Test.drop(['Placement - Time','Confirmation - Time','Arrival at Pickup - Time','Pickup - Time'],axis = 1)

In [None]:
#### Get Time of Day from Hours

In [None]:
def get_part_hr(hour):
    if (hour > 4) and (hour <= 8):
        return 'Early Morning'
    elif (hour > 8) and (hour < 12 ):
        return 'Morning'
    elif (hour >= 12) and (hour <= 16):
        return'Noon'
    elif (hour > 16) and (hour <= 20):
        return 'Evening'
    elif (hour > 20) and (hour <= 22):
        return'Night'
    else:
        return'Late Night'

In [None]:
Combine_Train_Test['Placement_Time_Day'] = Combine_Train_Test['Placement_Hour'].apply(get_part_hr)
Combine_Train_Test['Confirmation_Time_Day'] = Combine_Train_Test['Confirmation_Hour'].apply(get_part_hr)
Combine_Train_Test['Arrival_Time_Day'] = Combine_Train_Test['Arrival_Hour'].apply(get_part_hr)
Combine_Train_Test['Pickup_Time_Day'] = Combine_Train_Test['Pickup_Hour'].apply(get_part_hr)

In [None]:
#### Get Weekend or Weekday

In [None]:
Combine_Train_Test['Actual Day of Placement'] = np.where(Combine_Train_Test['Placement - Weekday (Mo = 1)'] < 6,'Weekday','Weekend')
Combine_Train_Test['Actual Day of Confirmation'] = np.where(Combine_Train_Test['Confirmation - Weekday (Mo = 1)'] < 6,'Weekday','Weekend')
Combine_Train_Test['Actual Day of Arrival'] = np.where(Combine_Train_Test['Arrival at Pickup - Weekday (Mo = 1)'] < 6,'Weekday','Weekend')
Combine_Train_Test['Actual Day of Pickup'] = np.where(Combine_Train_Test['Pickup - Weekday (Mo = 1)'] < 6,'Weekday','Weekend')

In [None]:
#### Get Times of Month

In [None]:
Place_Day_Month = []

# Using a for loop to populate the list
for Each_Day in Combine_Train_Test['Placement - Day of Month']:
    if (Each_Day > 0 and Each_Day <=7):
        Place_Day_Month.append('1st Week')        
    elif (Each_Day > 7 and Each_Day <=15):
        Place_Day_Month.append('2nd Week')
    elif (Each_Day > 15 and Each_Day <=23):
        Place_Day_Month.append('3rd Week')
    else:
        Place_Day_Month.append('4th Week')
        
Combine_Train_Test['Placement_Day into Weeks'] = Place_Day_Month

In [None]:
Confirm_Day_Month = []

# Using a for loop to populate the list
for Each_Day in Combine_Train_Test['Confirmation - Day of Month']:
    if (Each_Day > 0 and Each_Day <=7):
        Confirm_Day_Month.append('1st Week')        
    elif (Each_Day > 7 and Each_Day <=15):
        Confirm_Day_Month.append('2nd Week')
    elif (Each_Day > 15 and Each_Day <=23):
        Confirm_Day_Month.append('3rd Week')
    else:
        Confirm_Day_Month.append('4th Week')

Combine_Train_Test['Confirmation_Day into Weeks'] = Confirm_Day_Month

In [None]:
Arrival_Pickup_Day_Month = []

# Using a for loop to populate the list
for Each_Day in Combine_Train_Test['Arrival at Pickup - Day of Month']:
    if (Each_Day > 0 and Each_Day <=7):
        Arrival_Pickup_Day_Month.append('1st Week')        
    elif (Each_Day > 7 and Each_Day <=15):
        Arrival_Pickup_Day_Month.append('2nd Week')
    elif (Each_Day > 15 and Each_Day <=23):
        Arrival_Pickup_Day_Month.append('3rd Week')
    else:
        Arrival_Pickup_Day_Month.append('4th Week')
        
Combine_Train_Test['Arrival_Pickup_Day into Weeks'] = Arrival_Pickup_Day_Month

In [None]:
Pickup_Day_Month = []

# Using a for loop to populate the list
for Each_Day in Combine_Train_Test['Pickup - Day of Month']:
    if (Each_Day > 0 and Each_Day <=7):
        Pickup_Day_Month.append('1st Week')        
    elif (Each_Day > 7 and Each_Day <=15):
        Pickup_Day_Month.append('2nd Week')
    elif (Each_Day > 15 and Each_Day <=23):
        Pickup_Day_Month.append('3rd Week')
    else:
        Pickup_Day_Month.append('4th Week')
        
Combine_Train_Test['Pickup_Day into Weeks'] = Pickup_Day_Month

In [None]:
# Lat and Long Cords

In [None]:
##clustering
kmeans = KMeans(n_clusters = 50, init ='k-means++')
kmeans.fit(Combine_Train_Test[['Pickup Lat', 'Pickup Long']]) # Compute k-means clustering.
Combine_Train_Test['pickup_cluster_label'] = kmeans.fit_predict(Combine_Train_Test[['Pickup Lat', 'Pickup Long']])
centers1 = kmeans.cluster_centers_ # Coordinates of cluster centers.
labels1 = kmeans.predict(Combine_Train_Test[['Pickup Lat', 'Pickup Long']]) # Labels of each point
Combine_Train_Test['pickup_cluster_label'] = Combine_Train_Test['pickup_cluster_label'].astype('category')

In [None]:
Combine_Train_Test.plot.scatter(x = 'Pickup Lat', y = 'Pickup Long', c=labels1, s=50, cmap='viridis')
plt.scatter(centers1[:, 0], centers1[:, 1], c='black', s=200, alpha=0.5)

plt.show()

In [None]:
##clustering
kmeans = KMeans(n_clusters = 50, init ='k-means++')
kmeans.fit(Combine_Train_Test[['Destination Lat', 'Destination Long']]) # Compute k-means clustering.
Combine_Train_Test['Destination_cluster_label'] = kmeans.fit_predict(Combine_Train_Test[['Destination Lat', 'Destination Long']])
centers1 = kmeans.cluster_centers_ # Coordinates of cluster centers.
labels1 = kmeans.predict(Combine_Train_Test[['Destination Lat', 'Destination Long']]) # Labels of each point
Combine_Train_Test['Destination_cluster_label'] = Combine_Train_Test['Destination_cluster_label'].astype('category')

In [None]:
Combine_Train_Test.plot.scatter(x = 'Destination Lat', y = 'Destination Long', c=labels1, s=50, cmap='viridis')
plt.scatter(centers1[:, 0], centers1[:, 1], c='black', s=200, alpha=0.5)

plt.show()

In [None]:
Combine_Train_Test = Combine_Train_Test.drop(['Pickup Lat','Pickup Long','Destination Lat','Destination Long'],axis = 1)

In [None]:
# Temperature
# Precipitation in millimeters

In [None]:
# Combine_Train_Test = Combine_Train_Test.drop(['Temperature','Precipitation in millimeters'],axis = 1)
Combine_Train_Test['Temperature'].fillna((Combine_Train_Test['Temperature'].mean()), inplace=True)
Combine_Train_Test['Precipitation in millimeters'].fillna((Combine_Train_Test['Precipitation in millimeters'].mean()), inplace=True)

In [None]:
#Creating New Feature = Grouping Temperature into Average High, Average Low and Normal
Temperature_Deg = []

# Using a for loop to populate the list
for Degree in Combine_Train_Test['Temperature']:
    if (Degree >= 15) and (Degree <= 27):
        Temperature_Deg.append('Average High Temperature')
    elif (Degree >= 12) and (Degree <= 22 ):
        Temperature_Deg.append('Average Low Temperature')
    else:
        Temperature_Deg.append('Normal Temperature')
Combine_Train_Test['Temperature Condition'] = Temperature_Deg

In [None]:
Combine_Train_Test.columns

In [None]:
Combine_Train_Test = pd.get_dummies(Combine_Train_Test)

In [None]:
sc = StandardScaler()
Combine_Train_Test = sc.fit_transform(Combine_Train_Test)

In [None]:
rr = Ridge(alpha=0.01)
# rr = Lasso(alpha=0.01, max_iter=10e5)
rr.fit(pd.DataFrame(Combine_Train_Test).iloc[:21201], Y['Time from Pickup to Arrival'].values) 
y_pred_train_rr= rr.predict(pd.DataFrame(Combine_Train_Test).iloc[21201:])

In [None]:
results = pd.DataFrame()
results['Order_No'] = Test['Order No']
results['Time from Pickup to Arrival'] = y_pred_train_rr
results = results.set_index('Order_No')
results.head()

In [None]:
results.to_csv('submission.csv')