# Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor
from xgboost import XGBRegressor

from sklearn.metrics import explained_variance_score, r2_score, mean_squared_error
from math import sqrt

# Data Preprocessing

In [2]:
flight_data = pd.read_csv('/home/cupgreek/Documents/flight_details(without NaN).csv')
weather_data = pd.read_csv('/home/cupgreek/Documents/weather_data.csv')

In [3]:
flight_data['ArrTime'] = flight_data['ArrTime'] - flight_data['ArrTime']%100

In [4]:
dataset = flight_data.merge(right = weather_data, left_on = ['FlightDate','Dest','ArrTime'], right_on = ['FlightDate','airport','time'], validate = 'm:m', how = 'inner')

In [5]:
dataset = dataset.drop(columns = ['FlightDate', 'Origin', 'ArrTime', 'Dest', 'DepTime', 'time', 'airport'])
dataset = dataset[dataset['ArrDel15'].isin([1])]

In [6]:
X = dataset.iloc[:,2:].values
y = dataset.iloc[:,1].values

In [7]:
X_train, X_test, y_train, y_test= train_test_split(X, y, test_size = 0.25)

# Regressors

In [18]:
#RandomForest Regressor
rf_regressor = RandomForestRegressor(n_estimators = 10)
rf_regressor.fit(X_train, y_train)

y_pred_rf = rf_regressor.predict(X_test)

In [9]:
#DecisionTree Regressor
xgb_regressor = XGBRegressor()
xgb_regressor.fit(X_train, y_train)

y_pred_xgb = xgb_regressor.predict(X_test)



In [19]:
#ExtraTrees Regressor
et_regressor = ExtraTreesRegressor(n_estimators = 10)
et_regressor.fit(X_train, y_train)

y_pred_et = et_regressor.predict(X_test)

# Report

In [20]:
print("XGB Regressor",
      "\n\tExplained Varience: ", explained_variance_score(y_test, y_pred_xgb), 
      '\n\tR-squared:\t    ', r2_score(y_test, y_pred_xgb),
      '\n\tMean Squared Error: ', mean_squared_error(y_test, y_pred_xgb), 
      '\n\tRMSE:\t\t    ', sqrt(mean_squared_error(y_test, y_pred_xgb)))

print("\nRandomForest Regressor",
      "\n\tExplained Varience: ", explained_variance_score(y_test, y_pred_rf), 
      '\n\tR-squared:\t    ', r2_score(y_test, y_pred_rf),
      '\n\tMean Squared Error: ', mean_squared_error(y_test, y_pred_rf), 
      '\n\tRMSE:\t\t    ', sqrt(mean_squared_error(y_test, y_pred_rf)))

print("\nExtraTrees Regressor",
      "\n\tExplained Varience: ", explained_variance_score(y_test, y_pred_et), 
      '\n\tR-squared:\t    ', r2_score(y_test, y_pred_et),
      '\n\tMean Squared Error: ', mean_squared_error(y_test, y_pred_et), 
      '\n\tRMSE:\t\t    ', sqrt(mean_squared_error(y_test, y_pred_et)))

XGB Regressor 
	Explained Varience:  0.9466090887201216 
	R-squared:	     0.9466090766348108 
	Mean Squared Error:  288.88783399879617 
	RMSE:		     16.996700679802423

RandomForest Regressor 
	Explained Varience:  0.9391779402911382 
	R-squared:	     0.9390006919040605 
	Mean Squared Error:  330.0553142849492 
	RMSE:		     18.167424536376895

ExtraTrees Regressor 
	Explained Varience:  0.9380376123372746 
	R-squared:	     0.9378924161218823 
	Mean Squared Error:  336.05197757539014 
	RMSE:		     18.331720529600872
