diff --git a/FLIGHT_FARE_PREDICTION/.gitignore b/FLIGHT_FARE_PREDICTION/.gitignore new file mode 100644 index 0000000..58461f2 --- /dev/null +++ b/FLIGHT_FARE_PREDICTION/.gitignore @@ -0,0 +1 @@ +.ipynb_checkpoints \ No newline at end of file diff --git a/FLIGHT_FARE_PREDICTION/README.md b/FLIGHT_FARE_PREDICTION/README.md new file mode 100644 index 0000000..8843de0 --- /dev/null +++ b/FLIGHT_FARE_PREDICTION/README.md @@ -0,0 +1 @@ +# Fligth-Fare-Prediction diff --git a/FLIGHT_FARE_PREDICTION/data/Data_Train.xlsx b/FLIGHT_FARE_PREDICTION/data/Data_Train.xlsx new file mode 100644 index 0000000..a997892 Binary files /dev/null and b/FLIGHT_FARE_PREDICTION/data/Data_Train.xlsx differ diff --git a/FLIGHT_FARE_PREDICTION/fligth_fare_prediction_.ipynb b/FLIGHT_FARE_PREDICTION/fligth_fare_prediction_.ipynb new file mode 100644 index 0000000..75238b2 --- /dev/null +++ b/FLIGHT_FARE_PREDICTION/fligth_fare_prediction_.ipynb @@ -0,0 +1,2242 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "krtRyhJ3TrC_" + }, + "source": [ + "## Importing Packages\n" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": { + "id": "B6FuNK65fTHP" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0q_lkwWwT6m7" + }, + "source": [ + "## Importing Data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 347 + }, + "id": "eyl5eh33fd3e", + "outputId": "6fb4c9fb-3412-4d96-e552-4e8beb578c14" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AirlineDate_of_JourneySourceDestinationRouteDep_TimeArrival_TimeDurationTotal_StopsAdditional_InfoPrice
0IndiGo24/03/2019BangloreNew DelhiBLR → DEL22:2001:10 22 Mar2h 50mnon-stopNo info3897
1Air India1/05/2019KolkataBangloreCCU → IXR → BBI → BLR05:5013:157h 25m2 stopsNo info7662
2Jet Airways9/06/2019DelhiCochinDEL → LKO → BOM → COK09:2504:25 10 Jun19h2 stopsNo info13882
3IndiGo12/05/2019KolkataBangloreCCU → NAG → BLR18:0523:305h 25m1 stopNo info6218
4IndiGo01/03/2019BangloreNew DelhiBLR → NAG → DEL16:5021:354h 45m1 stopNo info13302
5SpiceJet24/06/2019KolkataBangloreCCU → BLR09:0011:252h 25mnon-stopNo info3873
6Jet Airways12/03/2019BangloreNew DelhiBLR → BOM → DEL18:5510:25 13 Mar15h 30m1 stopIn-flight meal not included11087
7Jet Airways01/03/2019BangloreNew DelhiBLR → BOM → DEL08:0005:05 02 Mar21h 5m1 stopNo info22270
8Jet Airways12/03/2019BangloreNew DelhiBLR → BOM → DEL08:5510:25 13 Mar25h 30m1 stopIn-flight meal not included11087
9Multiple carriers27/05/2019DelhiCochinDEL → BOM → COK11:2519:157h 50m1 stopNo info8625
\n", + "
" + ], + "text/plain": [ + " Airline Date_of_Journey Source Destination \\\n", + "0 IndiGo 24/03/2019 Banglore New Delhi \n", + "1 Air India 1/05/2019 Kolkata Banglore \n", + "2 Jet Airways 9/06/2019 Delhi Cochin \n", + "3 IndiGo 12/05/2019 Kolkata Banglore \n", + "4 IndiGo 01/03/2019 Banglore New Delhi \n", + "5 SpiceJet 24/06/2019 Kolkata Banglore \n", + "6 Jet Airways 12/03/2019 Banglore New Delhi \n", + "7 Jet Airways 01/03/2019 Banglore New Delhi \n", + "8 Jet Airways 12/03/2019 Banglore New Delhi \n", + "9 Multiple carriers 27/05/2019 Delhi Cochin \n", + "\n", + " Route Dep_Time Arrival_Time Duration Total_Stops \\\n", + "0 BLR → DEL 22:20 01:10 22 Mar 2h 50m non-stop \n", + "1 CCU → IXR → BBI → BLR 05:50 13:15 7h 25m 2 stops \n", + "2 DEL → LKO → BOM → COK 09:25 04:25 10 Jun 19h 2 stops \n", + "3 CCU → NAG → BLR 18:05 23:30 5h 25m 1 stop \n", + "4 BLR → NAG → DEL 16:50 21:35 4h 45m 1 stop \n", + "5 CCU → BLR 09:00 11:25 2h 25m non-stop \n", + "6 BLR → BOM → DEL 18:55 10:25 13 Mar 15h 30m 1 stop \n", + "7 BLR → BOM → DEL 08:00 05:05 02 Mar 21h 5m 1 stop \n", + "8 BLR → BOM → DEL 08:55 10:25 13 Mar 25h 30m 1 stop \n", + "9 DEL → BOM → COK 11:25 19:15 7h 50m 1 stop \n", + "\n", + " Additional_Info Price \n", + "0 No info 3897 \n", + "1 No info 7662 \n", + "2 No info 13882 \n", + "3 No info 6218 \n", + "4 No info 13302 \n", + "5 No info 3873 \n", + "6 In-flight meal not included 11087 \n", + "7 No info 22270 \n", + "8 In-flight meal not included 11087 \n", + "9 No info 8625 " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# df=pd.read_excel(\"/content/drive/My Drive/flight fare pediction/Data_Train.xlsx\")\n", + "df = pd.read_excel(\"data/data_train.xlsx\")\n", + "df.head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Dbu_s_0iULIH" + }, + "source": [ + "## Data Information" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 217 + }, + "id": "UN9-V71UAaX0", + "outputId": "ffeeb284-9385-4b0f-abbb-397a0d966680" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 10683 entries, 0 to 10682\n", + "Data columns (total 11 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Airline 10683 non-null object\n", + " 1 Date_of_Journey 10683 non-null object\n", + " 2 Source 10683 non-null object\n", + " 3 Destination 10683 non-null object\n", + " 4 Route 10682 non-null object\n", + " 5 Dep_Time 10683 non-null object\n", + " 6 Arrival_Time 10683 non-null object\n", + " 7 Duration 10683 non-null object\n", + " 8 Total_Stops 10682 non-null object\n", + " 9 Additional_Info 10683 non-null object\n", + " 10 Price 10683 non-null int64 \n", + "dtypes: int64(1), object(10)\n", + "memory usage: 918.2+ KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 33 + }, + "id": "RMgf9wNAHeOm", + "outputId": "1e47b8ee-cc40-4b06-f495-296e22899693" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(10683, 11)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "T-kvkT91UUJZ" + }, + "source": [ + "## Filtering Null Values" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 217 + }, + "id": "einq44JyAv3j", + "outputId": "6db1d153-b33c-47e1-bb21-82d3e7f5bd81" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Airline 0\n", + "Date_of_Journey 0\n", + "Source 0\n", + "Destination 0\n", + "Route 1\n", + "Dep_Time 0\n", + "Arrival_Time 0\n", + "Duration 0\n", + "Total_Stops 1\n", + "Additional_Info 0\n", + "Price 0\n", + "dtype: int64" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "df.dropna(inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 217 + }, + "id": "L-QbgBawA3Ik", + "outputId": "9cda1dd2-f70d-4745-8832-104b629db910", + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Airline 0\n", + "Date_of_Journey 0\n", + "Source 0\n", + "Destination 0\n", + "Route 0\n", + "Dep_Time 0\n", + "Arrival_Time 0\n", + "Duration 0\n", + "Total_Stops 0\n", + "Additional_Info 0\n", + "Price 0\n", + "dtype: int64" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isnull().sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Finding Unique Values and Eccoding\n", + "For better understanding of Machine" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['non-stop', '2 stops', '1 stop', '3 stops', '4 stops'],\n", + " dtype=object)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Total_Stops.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1 stop 5625\n", + "non-stop 3491\n", + "2 stops 1520\n", + "3 stops 45\n", + "4 stops 1\n", + "Name: Total_Stops, dtype: int64" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Total_Stops.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Banglore', 'Kolkata', 'Delhi', 'Chennai', 'Mumbai'], dtype=object)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Source.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['New Delhi', 'Banglore', 'Cochin', 'Kolkata', 'Delhi', 'Hyderabad'],\n", + " dtype=object)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Destination.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['IndiGo', 'Air India', 'Jet Airways', 'SpiceJet',\n", + " 'Multiple carriers', 'GoAir', 'Vistara', 'Air Asia',\n", + " 'Vistara Premium economy', 'Jet Airways Business',\n", + " 'Multiple carriers Premium economy', 'Trujet'], dtype=object)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Airline.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['No info', 'In-flight meal not included',\n", + " 'No check-in baggage included', '1 Short layover', 'No Info',\n", + " '1 Long layover', 'Change airports', 'Business class',\n", + " 'Red-eye flight', '2 Long layover'], dtype=object)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Additional_Info.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "No info 8344\n", + "In-flight meal not included 1982\n", + "No check-in baggage included 320\n", + "1 Long layover 19\n", + "Change airports 7\n", + "Business class 4\n", + "No Info 3\n", + "2 Long layover 1\n", + "Red-eye flight 1\n", + "1 Short layover 1\n", + "Name: Additional_Info, dtype: int64" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Additional_Info'].value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Additional Info is not a good distribution for training \n", + "So we will remove this" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "df.drop('Additional_Info',axis=1,inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AirlineDate_of_JourneySourceDestinationRouteDep_TimeArrival_TimeDurationTotal_StopsPrice
0IndiGo24/03/2019BangloreNew DelhiBLR → DEL22:2001:10 22 Mar2h 50mnon-stop3897
1Air India1/05/2019KolkataBangloreCCU → IXR → BBI → BLR05:5013:157h 25m2 stops7662
2Jet Airways9/06/2019DelhiCochinDEL → LKO → BOM → COK09:2504:25 10 Jun19h2 stops13882
3IndiGo12/05/2019KolkataBangloreCCU → NAG → BLR18:0523:305h 25m1 stop6218
4IndiGo01/03/2019BangloreNew DelhiBLR → NAG → DEL16:5021:354h 45m1 stop13302
\n", + "
" + ], + "text/plain": [ + " Airline Date_of_Journey Source Destination Route \\\n", + "0 IndiGo 24/03/2019 Banglore New Delhi BLR → DEL \n", + "1 Air India 1/05/2019 Kolkata Banglore CCU → IXR → BBI → BLR \n", + "2 Jet Airways 9/06/2019 Delhi Cochin DEL → LKO → BOM → COK \n", + "3 IndiGo 12/05/2019 Kolkata Banglore CCU → NAG → BLR \n", + "4 IndiGo 01/03/2019 Banglore New Delhi BLR → NAG → DEL \n", + "\n", + " Dep_Time Arrival_Time Duration Total_Stops Price \n", + "0 22:20 01:10 22 Mar 2h 50m non-stop 3897 \n", + "1 05:50 13:15 7h 25m 2 stops 7662 \n", + "2 09:25 04:25 10 Jun 19h 2 stops 13882 \n", + "3 18:05 23:30 5h 25m 1 stop 6218 \n", + "4 16:50 21:35 4h 45m 1 stop 13302 " + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Label Encoding for unique Values" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import LabelEncoder" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "encoder = LabelEncoder()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LabelEncoder()" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "encoder.fit(df.Airline)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "df['Airline'] = encoder.transform(df.Airline)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Air Asia', 'Air India', 'GoAir', 'IndiGo', 'Jet Airways',\n", + " 'Jet Airways Business', 'Multiple carriers',\n", + " 'Multiple carriers Premium economy', 'SpiceJet', 'Trujet',\n", + " 'Vistara', 'Vistara Premium economy'], dtype=object)" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "encoder.classes_" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 3, 1, 4, 8, 6, 2, 10, 0, 11, 5, 7, 9])" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Airline'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "df['Source'] = encoder.fit_transform(df['Source'])" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Banglore', 'Chennai', 'Delhi', 'Kolkata', 'Mumbai'], dtype=object)" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "encoder.classes_" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "df['Destination'] = encoder.fit_transform(df['Destination'])" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Banglore', 'Cochin', 'Delhi', 'Hyderabad', 'Kolkata', 'New Delhi'],\n", + " dtype=object)" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "encoder.classes_" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['Airline', 'Date_of_Journey', 'Source', 'Destination', 'Route',\n", + " 'Dep_Time', 'Arrival_Time', 'Duration', 'Total_Stops', 'Price'],\n", + " dtype='object')" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['non-stop', '2 stops', '1 stop', '3 stops', '4 stops'],\n", + " dtype=object)" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Total_Stops.unique()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Converting Total Stops to number" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "df['Total_Stops'] = df.Total_Stops.apply(lambda x:'0 stop' if x=='non-stop' else x)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "df['Total_Stops'] = df.Total_Stops.apply(lambda x:int(x.split()[0]))" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AirlineDate_of_JourneySourceDestinationRouteDep_TimeArrival_TimeDurationTotal_StopsPrice
0324/03/201905BLR → DEL22:2001:10 22 Mar2h 50m03897
111/05/201930CCU → IXR → BBI → BLR05:5013:157h 25m27662
249/06/201921DEL → LKO → BOM → COK09:2504:25 10 Jun19h213882
3312/05/201930CCU → NAG → BLR18:0523:305h 25m16218
4301/03/201905BLR → NAG → DEL16:5021:354h 45m113302
\n", + "
" + ], + "text/plain": [ + " Airline Date_of_Journey Source Destination Route \\\n", + "0 3 24/03/2019 0 5 BLR → DEL \n", + "1 1 1/05/2019 3 0 CCU → IXR → BBI → BLR \n", + "2 4 9/06/2019 2 1 DEL → LKO → BOM → COK \n", + "3 3 12/05/2019 3 0 CCU → NAG → BLR \n", + "4 3 01/03/2019 0 5 BLR → NAG → DEL \n", + "\n", + " Dep_Time Arrival_Time Duration Total_Stops Price \n", + "0 22:20 01:10 22 Mar 2h 50m 0 3897 \n", + "1 05:50 13:15 7h 25m 2 7662 \n", + "2 09:25 04:25 10 Jun 19h 2 13882 \n", + "3 18:05 23:30 5h 25m 1 6218 \n", + "4 16:50 21:35 4h 45m 1 13302 " + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Working With Time" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "df['Date_of_Journey'] =pd.to_datetime(df['Date_of_Journey'])" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Int64Index: 10682 entries, 0 to 10682\n", + "Data columns (total 10 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Airline 10682 non-null int32 \n", + " 1 Date_of_Journey 10682 non-null datetime64[ns]\n", + " 2 Source 10682 non-null int32 \n", + " 3 Destination 10682 non-null int32 \n", + " 4 Route 10682 non-null object \n", + " 5 Dep_Time 10682 non-null object \n", + " 6 Arrival_Time 10682 non-null object \n", + " 7 Duration 10682 non-null object \n", + " 8 Total_Stops 10682 non-null int64 \n", + " 9 Price 10682 non-null int64 \n", + "dtypes: datetime64[ns](1), int32(3), int64(2), object(4)\n", + "memory usage: 792.8+ KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "dep_time to dep_time hour of the day" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "df['Dep_Time'] = df.Dep_Time.apply(lambda x:int(x.split(':')[0]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "same for arriving time" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "df['Arrival_Time'] = df.Arrival_Time.apply(lambda x:int(x.split(':')[0]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Adjustments with time" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "df['Arrival_Time'] = df[['Dep_Time','Arrival_Time']].apply(lambda x:x['Arrival_Time']+24 if x['Dep_Time']>x['Arrival_Time'] else x['Arrival_Time'],axis=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note: - Arrival time is shown as Dep_time + Duration Time" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AirlineDate_of_JourneySourceDestinationRouteDep_TimeArrival_TimeDurationTotal_StopsPrice
032019-03-2405BLR → DEL22252h 50m03897
112019-01-0530CCU → IXR → BBI → BLR5137h 25m27662
242019-09-0621DEL → LKO → BOM → COK92819h213882
332019-12-0530CCU → NAG → BLR18235h 25m16218
432019-01-0305BLR → NAG → DEL16214h 45m113302
\n", + "
" + ], + "text/plain": [ + " Airline Date_of_Journey Source Destination Route \\\n", + "0 3 2019-03-24 0 5 BLR → DEL \n", + "1 1 2019-01-05 3 0 CCU → IXR → BBI → BLR \n", + "2 4 2019-09-06 2 1 DEL → LKO → BOM → COK \n", + "3 3 2019-12-05 3 0 CCU → NAG → BLR \n", + "4 3 2019-01-03 0 5 BLR → NAG → DEL \n", + "\n", + " Dep_Time Arrival_Time Duration Total_Stops Price \n", + "0 22 25 2h 50m 0 3897 \n", + "1 5 13 7h 25m 2 7662 \n", + "2 9 28 19h 2 13882 \n", + "3 18 23 5h 25m 1 6218 \n", + "4 16 21 4h 45m 1 13302 " + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Month of journey" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "df['Month_of_Journey'] = df['Date_of_Journey'].map(lambda x:x.month)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Duration in minutes" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "def duration_time(x):\n", + " x = x.split()\n", + " x = list(map(lambda t:int(t[:-1]),x))\n", + " if len(x) == 1:\n", + " return x[0]*60\n", + " else:\n", + " return x[0]*60 + x[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "df['Duration'] = df['Duration'].apply(duration_time)" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AirlineDate_of_JourneySourceDestinationRouteDep_TimeArrival_TimeDurationTotal_StopsPriceMonth_of_Journey
032019-03-2405BLR → DEL2225170038973
112019-01-0530CCU → IXR → BBI → BLR513445276621
242019-09-0621DEL → LKO → BOM → COK92811402138829
332019-12-0530CCU → NAG → BLR18233251621812
432019-01-0305BLR → NAG → DEL16212851133021
\n", + "
" + ], + "text/plain": [ + " Airline Date_of_Journey Source Destination Route \\\n", + "0 3 2019-03-24 0 5 BLR → DEL \n", + "1 1 2019-01-05 3 0 CCU → IXR → BBI → BLR \n", + "2 4 2019-09-06 2 1 DEL → LKO → BOM → COK \n", + "3 3 2019-12-05 3 0 CCU → NAG → BLR \n", + "4 3 2019-01-03 0 5 BLR → NAG → DEL \n", + "\n", + " Dep_Time Arrival_Time Duration Total_Stops Price Month_of_Journey \n", + "0 22 25 170 0 3897 3 \n", + "1 5 13 445 2 7662 1 \n", + "2 9 28 1140 2 13882 9 \n", + "3 18 23 325 1 6218 12 \n", + "4 16 21 285 1 13302 1 " + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Target data visualization" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAEGCAYAAABsLkJ6AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAU/klEQVR4nO3df7DldX3f8ecrixJQiRCuzroL3cWudoBJFtkBotWhwYQfkxHs1HYxCaShs2JxJtZ2Eqkz1XRmZ2yqsWVSsasQtRUQgxQmIwZCTExbBC+4wvJjwyIbuOyGvQkTJdGh2fXdP873Zg+Xc/funnP3nrN+no+ZM+d73uf7431h73nd7+f746SqkCS158fG3YAkaTwMAElqlAEgSY0yACSpUQaAJDXqqHE3sJgTTzyx1qxZM+42JOmIcv/99/9lVU0daJ6JD4A1a9YwPT097jYk6YiS5M8Xm8chIElqlAEgSY0yACSpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKjDABJatTEXwl8ONxw71MD6+8+++Rl7kSSxsc9AElqlAEgSY0yACSpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKjDABJapQBIEmNMgAkqVGLBkCS65PsSbKtr/bFJFu7x84kW7v6miQ/6HvvU33LnJnkoSQ7klyTJIfnR5IkHYyDuRncZ4HfAT4/V6iqfzE3neTjwHf75n+iqtYPWM+1wCbgG8BXgAuAOw69ZUnSUlh0D6Cqvg48N+i97q/4fw7ceKB1JFkJHFdV91RV0QuTSw69XUnSUhn1GMBbgWer6vG+2tok30ryJ0ne2tVWATN988x0tYGSbEoynWR6dnZ2xBYlSYOMGgCX8uK//ncDJ1fVGcAHgBuSHAcMGu+vhVZaVVuqakNVbZiamhqxRUnSIEN/IUySo4B/Cpw5V6uqF4AXuun7kzwBvIHeX/yr+xZfDewadtuSpNGNsgfwduCxqvr7oZ0kU0lWdNOnAOuA71TVbuD5JOd0xw0uA24bYduSpBEdzGmgNwL3AG9MMpPkiu6tjbz04O/bgAeTfBv4PeDKqpo7gPxe4DPADuAJPANIksZq0SGgqrp0gfqvDKjdAtyywPzTwOmH2J8k6TDxSmBJapQBIEmNMgAkqVEGgCQ1ygCQpEYZAJLUKANAkhplAEhSowwASWqUASBJjTIAJKlRBoAkNcoAkKRGGQCS1CgDQJIaZQBIUqMMAElqlAEgSY06mO8Evj7JniTb+mofSfJMkq3d46K+965OsiPJ9iTn99XPTPJQ99413ZfDS5LG5GD2AD4LXDCg/omqWt89vgKQ5FR6XxZ/WrfMJ5Os6Oa/FtgErOseg9YpSVomiwZAVX0deO4g13cxcFNVvVBVTwI7gLOSrASOq6p7qqqAzwOXDNu0JGl0oxwDeF+SB7shouO72irg6b55Zrraqm56fl2SNCbDBsC1wOuB9cBu4ONdfdC4fh2gPlCSTUmmk0zPzs4O2aIk6UCGCoCqeraq9lXVD4FPA2d1b80AJ/XNuhrY1dVXD6gvtP4tVbWhqjZMTU0N06IkaRFDBUA3pj/nncDcGUK3AxuTHJ1kLb2DvfdV1W7g+STndGf/XAbcNkLfkqQRHbXYDEluBM4FTkwyA3wYODfJenrDODuB9wBU1cNJbgYeAfYCV1XVvm5V76V3RtExwB3dQ5I0JosGQFVdOqB83QHm3wxsHlCfBk4/pO4kSYeNVwJLUqMMAElqlAEgSY1a9BiA4IZ7nxpYf/fZJy9zJ5K0dNwDkKRGGQCS1CgDQJIaZQBIUqMMAElqlAEgSY0yACSpUV4HcBh43YCkI4EB0GehD25J+lHkEJAkNcoAkKRGGQCS1CgDQJIaZQBIUqMMAElq1KIBkOT6JHuSbOur/eckjyV5MMmtSV7d1dck+UGSrd3jU33LnJnkoSQ7klyTJIfnR5IkHYyD2QP4LHDBvNpdwOlV9VPAnwFX9733RFWt7x5X9tWvBTYB67rH/HVKkpbRogFQVV8HnptXu7Oq9nYvvwGsPtA6kqwEjquqe6qqgM8DlwzXsiRpKSzFMYBfBe7oe702ybeS/EmSt3a1VcBM3zwzXW2gJJuSTCeZnp2dXYIWJUnzjRQAST4E7AW+0JV2AydX1RnAB4AbkhwHDBrvr4XWW1VbqmpDVW2YmpoapUVJ0gKGvhdQksuBXwDO64Z1qKoXgBe66fuTPAG8gd5f/P3DRKuBXcNuW5I0uqH2AJJcAPwG8I6q+n5ffSrJim76FHoHe79TVbuB55Oc0539cxlw28jdS5KGtugeQJIbgXOBE5PMAB+md9bP0cBd3dmc3+jO+Hkb8B+T7AX2AVdW1dwB5PfSO6PoGHrHDPqPG0iSltmiAVBVlw4oX7fAvLcAtyzw3jRw+iF1J0k6bLwSWJIaZQBIUqMMAElqlF8JOSH8HmFJy809AElqlAEgSY0yACSpUQaAJDXKAJCkRhkAktQoA0CSGmUASFKjDABJapQBIEmNMgAkqVEGgCQ1ygCQpEYZAJLUqEUDIMn1SfYk2dZXOyHJXUke756P73vv6iQ7kmxPcn5f/cwkD3XvXdN9ObwkaUwOZg/gs8AF82ofBO6uqnXA3d1rkpwKbARO65b5ZJIV3TLXApuAdd1j/jolScvoYL4U/utJ1swrXwyc201/Dvhj4De6+k1V9QLwZJIdwFlJdgLHVdU9AEk+D1wC3DHyTzBGC32JiyQdCYY9BvDaqtoN0D2/pquvAp7um2+mq63qpufXB0qyKcl0kunZ2dkhW5QkHchSHwQeNK5fB6gPVFVbqmpDVW2YmppasuYkSfsNGwDPJlkJ0D3v6eozwEl9860GdnX11QPqkqQxGTYAbgcu76YvB27rq29McnSStfQO9t7XDRM9n+Sc7uyfy/qWkSSNwaIHgZPcSO+A74lJZoAPAx8Fbk5yBfAU8C6Aqno4yc3AI8Be4Kqq2tet6r30zig6ht7B3yP6ALAkHekO5iygSxd467wF5t8MbB5QnwZOP6TuJEmHjVcCS1KjDABJapQBIEmNMgAkqVEGgCQ1ygCQpEYZAJLUKANAkhplAEhSowwASWqUASBJjTIAJKlRBoAkNcoAkKRGGQCS1KhFvw9A43XDvU8NrL/77JOXuRNJP2rcA5CkRhkAktQoh4CW0ULDOZI0DkPvASR5Y5KtfY/vJXl/ko8keaavflHfMlcn2ZFke5Lzl+ZHkCQNY+g9gKraDqwHSLICeAa4FfiXwCeq6mP98yc5FdgInAa8DvjDJG+oqn3D9iBJGt5SHQM4D3iiqv78APNcDNxUVS9U1ZPADuCsJdq+JOkQLVUAbARu7Hv9viQPJrk+yfFdbRXwdN88M13tJZJsSjKdZHp2dnaJWpQk9Rs5AJK8HHgH8KWudC3wenrDQ7uBj8/NOmDxGrTOqtpSVRuqasPU1NSoLUqSBliKPYALgQeq6lmAqnq2qvZV1Q+BT7N/mGcGOKlvudXAriXYviRpCEsRAJfSN/yTZGXfe+8EtnXTtwMbkxydZC2wDrhvCbYvSRrCSNcBJDkW+DngPX3l30qynt7wzs6596rq4SQ3A48Ae4GrPANIksZnpACoqu8DPzmv9ssHmH8zsHmUbUqSloa3gpCkRhkAktQoA0CSGmUASFKjDABJapQBIEmNMgAkqVEGgCQ1ygCQpEYZAJLUKANAkhplAEhSowwASWqUASBJjTIAJKlRBoAkNcoAkKRGGQCS1KiRAiDJziQPJdmaZLqrnZDkriSPd8/H981/dZIdSbYnOX/U5iVJw1uKPYB/UlXrq2pD9/qDwN1VtQ64u3tNklOBjcBpwAXAJ5OsWILtS5KGcDiGgC4GPtdNfw64pK9+U1W9UFVPAjuAsw7D9iVJB2HUACjgziT3J9nU1V5bVbsBuufXdPVVwNN9y850tZdIsinJdJLp2dnZEVuUJA1y1IjLv6WqdiV5DXBXkscOMG8G1GrQjFW1BdgCsGHDhoHzSJJGM9IeQFXt6p73ALfSG9J5NslKgO55Tzf7DHBS3+KrgV2jbF+SNLyhAyDJK5K8am4a+HlgG3A7cHk32+XAbd307cDGJEcnWQusA+4bdvuSpNGMMgT0WuDWJHPruaGqvprkm8DNSa4AngLeBVBVDye5GXgE2AtcVVX7RupekjS0oQOgqr4D/PSA+l8B5y2wzGZg87DblCQtHa8ElqRGGQCS1CgDQJIaZQBIUqMMAElqlAEgSY0yACSpUQaAJDXKAJCkRo16N9CJdsO9T427BUmaWO4BSFKjDABJapQBIEmNMgAkqVEGgCQ1ygCQpEYZAJLUKANAkho1ypfCn5Tka0keTfJwkl/r6h9J8kySrd3jor5lrk6yI8n2JOcvxQ8gSRrOKFcC7wX+bVU9kORVwP1J7ure+0RVfax/5iSnAhuB04DXAX+Y5A1+MfxwFrrK+d1nn7zMnUg6Ug29B1BVu6vqgW76eeBRYNUBFrkYuKmqXqiqJ4EdwFnDbl+SNJolOQaQZA1wBnBvV3pfkgeTXJ/k+K62Cni6b7EZFgiMJJuSTCeZnp2dXYoWJUnzjBwASV4J3AK8v6q+B1wLvB5YD+wGPj4364DFa9A6q2pLVW2oqg1TU1OjtihJGmCkAEjyMnof/l+oqi8DVNWzVbWvqn4IfJr9wzwzwEl9i68Gdo2yfUnS8EY5CyjAdcCjVfXbffWVfbO9E9jWTd8ObExydJK1wDrgvmG3L0kazShnAb0F+GXgoSRbu9q/By5Nsp7e8M5O4D0AVfVwkpuBR+idQXSVZwBJ0vgMHQBV9b8ZPK7/lQMssxnYPOw2JUlLxyuBJalRBoAkNcoAkKRGGQCS1CgDQJIaZQBIUqMMAElqlAEgSY0yACSpUQaAJDXKAJCkRhkAktQoA0CSGjXK7aA1gfyyeEkHyz0ASWqUASBJjTIAJKlRHgNohMcGJM1nAOiwMnikybXsAZDkAuC/AiuAz1TVR5e7B+230Af0Qvzgln50LGsAJFkB/Dfg54AZ4JtJbq+qR5azDy29Qw0SSeO33HsAZwE7quo7AEluAi4GDIAjxDg/6A91OMm9m+E5dNeG5Q6AVcDTfa9ngLPnz5RkE7Cpe/k3SbYfYJ0nAn+5ZB0uLXtbwC8e+O1D6m2RdR20Q1hPs/9fR/xv3ex/txEN29s/WGyG5Q6ADKjVSwpVW4AtB7XCZLqqNoza2OFgb8OZ5N5gsvuzt+G02ttyXwcwA5zU93o1sGuZe5AksfwB8E1gXZK1SV4ObARuX+YeJEks8xBQVe1N8j7gD+idBnp9VT084moPaqhoTOxtOJPcG0x2f/Y2nCZ7S9VLhuAlSQ3wXkCS1CgDQJJaVVVH5AO4ANgO7AA+eBi3cz2wB9jWVzsBuAt4vHs+vu+9q7uetgPn99XPBB7q3ruG/cNvRwNf7Or3AmsOobeTgK8BjwIPA782Kf0BPw7cB3y76+03J6W3vvWuAL4F/P4E9razW+9WYHqS+gNeDfwe8Fj3b+9nJqE34I3df6+5x/eA909Cb92y/4be78I24EZ6vyNj7e2wfGge7ge9X9wngFOAl9P7kDn1MG3rbcCbeHEA/BZd6AAfBP5TN31q18vRwNquxxXde/d1vygB7gAu7Or/GvhUN70R+OIh9LYSeFM3/Srgz7oext5ft55XdtMv6/5BnjMJvfX1+AHgBvYHwCT1thM4cV5tIvoDPgf8q2765fQCYSJ6m/cZ8Rf0LoYae2/0LoJ9Ejime30z8Cvj7m3sH+bDPLof/g/6Xl8NXH0Yt7eGFwfAdmBlN70S2D6oD3pnO/1MN89jffVLgf/eP083fRS9K/4yZJ+30bvP0kT1BxwLPEDvqu+J6I3eNSh3Az/L/gCYiN66ZXby0gAYe3/AcfQ+yDJpvc3r5+eB/zMpvbH/LggndMv9ftfjWHs7Uo8BDLqlxKpl3P5rq2o3QPf8mkX6WtVNz6+/aJmq2gt8F/jJQ20oyRrgDHp/aU9Ef0lWJNlKbwjtrqqamN6A/wL8OvDDvtqk9Aa9K+TvTHJ/d2uUSenvFGAW+N0k30rymSSvmJDe+m2kN8zCJPRWVc8AHwOeAnYD362qO8fd25EaAAd1S4kxWKivA/U78s+S5JXALcD7q+p7k9JfVe2rqvX0/to+K8npk9Bbkl8A9lTV/YvNu9y99XlLVb0JuBC4KsnbJqS/o+gNiV5bVWcAf0tv6GISeust3LvI9B3Alxabdbl6S3I8vRtfrgVeB7wiyS+Nu7cjNQDGfUuJZ5OsBOie9yzS10w3Pb/+omWSHAX8BPDcwTaS5GX0Pvy/UFVfnrT+AKrqr4E/pnfgfhJ6ewvwjiQ7gZuAn03yPyekNwCqalf3vAe4ld6ddCehvxlgptubg97B4DdNSG9zLgQeqKpnu9eT0NvbgSeraraq/g74MvDmcfd2pAbAuG8pcTtweTd9Ob2x97n6xiRHJ1kLrAPu63btnk9yTpIAl81bZm5d/wz4o+oG8RbTres64NGq+u1J6i/JVJJXd9PH0PsFeGwSequqq6tqdVWtofdv54+q6pcmoTeAJK9I8qq5aXpjxdsmob+q+gvg6SRv7Ern0bud+9h763Mp+4d/5q9vXL09BZyT5NhunefRO4NqvL0dyoGVSXoAF9E76+UJ4EOHcTs30huz+zt6CXsFvXG1u+mdunU3cELf/B/qetpOd3S+q2+g90v8BPA77D9168fp7aruoHd0/5RD6O0f09vFe5D9p75dNAn9AT9F7xTLB7v1/oeuPvbe5vV5LvsPAk9Eb/TG2b/N/lNoPzRh/a0Hprv/t/8LOH6CejsW+CvgJ/pqk9Lbb9L7I2gb8D/oneEz1t68FYQkNepIHQKSJI3IAJCkRhkAktQoA0CSGmUASFKjDACpT5J9SbYm2ZbkS0mOXWC+/7vcvUlLzQCQXuwHVbW+qk4H/h9wZf+bSVYAVNWbx9GctJQMAGlhfwr8wyTnJvlakhvo3YedJH8zN1OSX0/yUJJvJ/loV3t9kq92N3P70yT/aDw/grSwZf1SeOlI0d1L5ULgq13pLOD0qnpy3nwXApcAZ1fV95Oc0L21Bbiyqh5PcjbwSXq3npYmhgEgvdgx3S2sobcHcB29m3bdN//Dv/N24Her6vsAVfVcd3fWNwNf6t2uBehd9i9NFANAerEfVO8W1n+v+xD/2wXmDy+95e6PAX89fz3SpPEYgDSaO4FfnTtbKMkJ1ftOhieTvKurJclPj7NJaRADQBpBVX2V3m14p7uho3/XvfWLwBVJ5u7oefGYWpQW5N1AJalR7gFIUqMMAElqlAEgSY0yACSpUQaAJDXKAJCkRhkAktSo/w+Egxgn2m8/rAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.distplot(df['Price'],kde=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As you can see output data is not Distributed Uniformally\n", + "To make it uniform we will remove data of price > 40000" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AirlineDate_of_JourneySourceDestinationRouteDep_TimeArrival_TimeDurationTotal_StopsPriceMonth_of_Journey
65752019-01-0305BLR → BOM → DEL5103001522291
147842019-03-1805BLR → BOM → DEL18243651548263
261842019-03-1805BLR → BOM → DEL22293751548263
292452019-01-0305BLR → BOM → DEL5113401795121
537252019-01-0305BLR → BOM → DEL5124001624271
543942019-01-0305BLR → BOM → DEL16233651548261
735152019-03-0321DEL → ATQ → BOM → COK20285002464903
971552019-06-0321DEL → ATQ → BOM → COK20285002522856
1036452019-01-0305BLR → MAA → DEL9142801572091
\n", + "
" + ], + "text/plain": [ + " Airline Date_of_Journey Source Destination Route \\\n", + "657 5 2019-01-03 0 5 BLR → BOM → DEL \n", + "1478 4 2019-03-18 0 5 BLR → BOM → DEL \n", + "2618 4 2019-03-18 0 5 BLR → BOM → DEL \n", + "2924 5 2019-01-03 0 5 BLR → BOM → DEL \n", + "5372 5 2019-01-03 0 5 BLR → BOM → DEL \n", + "5439 4 2019-01-03 0 5 BLR → BOM → DEL \n", + "7351 5 2019-03-03 2 1 DEL → ATQ → BOM → COK \n", + "9715 5 2019-06-03 2 1 DEL → ATQ → BOM → COK \n", + "10364 5 2019-01-03 0 5 BLR → MAA → DEL \n", + "\n", + " Dep_Time Arrival_Time Duration Total_Stops Price Month_of_Journey \n", + "657 5 10 300 1 52229 1 \n", + "1478 18 24 365 1 54826 3 \n", + "2618 22 29 375 1 54826 3 \n", + "2924 5 11 340 1 79512 1 \n", + "5372 5 12 400 1 62427 1 \n", + "5439 16 23 365 1 54826 1 \n", + "7351 20 28 500 2 46490 3 \n", + "9715 20 28 500 2 52285 6 \n", + "10364 9 14 280 1 57209 1 " + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df['Price']>40000]" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "df.drop(df[df['Price']>40000].index,inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEGCAYAAACevtWaAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAQsElEQVR4nO3df6zdd13H8eeLbWyMMVldt5S2sx0OtSMCs+nGMMQ4cD80diYsaQba6JKGOHQYCXSQCP7RpBolYhRMZZCqjFEGZo2JkzEgYtSVDjZYV2oLxa6srkXCD4EMNt7+cb5dztp77j1tz7nn3E+fj+TmfM/nfL7nvu+n9776Od/z/X5OqgpJUlueM+kCJEmjZ7hLUoMMd0lqkOEuSQ0y3CWpQWdOugCACy+8sFasWDHpMiRpQXnwwQe/UVWLZ3psKsJ9xYoV7Ny5c9JlSNKCkuS/Bz3mYRlJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWrQVFyhOu3ufODAjO03X3nJPFciScNx5i5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUF+zF6fQR+nJ0kLjTN3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoOGCvckf5BkV5JHknw4yTlJFiW5L8ne7vaCvv63J9mXZE+Sa8dXviRpJnNeoZpkKfD7wKqq+kGSbcA6YBVwf1VtTrIR2Ai8Lcmq7vHLgRcBn0zykqp6emw/heY06Orbm6+8ZJ4rkTQfhj0scybwvCRnAucCjwNrga3d41uBG7vttcBdVfVkVe0H9gFrRleyJGkuc4Z7VX0d+DPgAHAI+HZVfQK4uKoOdX0OARd1uywFHut7ioNd27Mk2ZBkZ5KdR44cObWfQpL0LHOGe3csfS2wkt5hlucnecNsu8zQVsc1VG2pqtVVtXrx4sXD1itJGsIwh2VeA+yvqiNV9SPg48DVwBNJlgB0t4e7/geB5X37L6N3GEeSNE+GCfcDwFVJzk0S4BpgN7AdWN/1WQ/c021vB9YlOTvJSuAyYMdoy5YkzWbOs2Wq6oEkdwOfB54CvgBsAc4DtiW5hd5/ADd1/Xd1Z9Q82vW/1TNlJGl+DfVhHVX1TuCdxzQ/SW8WP1P/TcCmUytNknSyvEJVkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGDXWeu0bDZXclzRdn7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CCvUB2DQVeiStJ8ceYuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDRoq3JO8MMndSb6cZHeSVyZZlOS+JHu72wv6+t+eZF+SPUmuHV/5kqSZDPsxe+8B7q2q1yV5LnAu8Hbg/qranGQjsBF4W5JVwDrgcuBFwCeTvKSqnh5D/U0Y9LF8N195yTxXIqkVc87ck5wPvBq4A6CqflhV3wLWAlu7bluBG7vttcBdVfVkVe0H9gFrRl24JGmwYWbulwJHgA8meRnwIHAbcHFVHQKoqkNJLur6LwX+s2//g13bsyTZAGwAuOSS+Z2h+gHWklo3zDH3M4ErgPdV1SuA79E7BDNIZmir4xqqtlTV6qpavXjx4qGKlSQNZ5hwPwgcrKoHuvt30wv7J5IsAehuD/f1X963/zLg8dGUK0kaxpyHZarqf5I8luRnqmoPcA3waPe1Htjc3d7T7bIduDPJu+m9oXoZsGMcxWv++eavtDAMe7bM7wEf6s6U+Srw2/Rm/duS3AIcAG4CqKpdSbbRC/+ngFs9U0aS5tdQ4V5VDwGrZ3jomgH9NwGbTqEuSdIp8ApVSWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaNOxFTNJIeaWrNF7O3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAa5toxGwrVipOnizF2SGmS4S1KDDHdJapDH3KeYx7ElnSxn7pLUIGfup2DQzFqSJs2ZuyQ1yHCXpAZ5WEZj5aEraTKcuUtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGDR3uSc5I8oUk/9TdX5TkviR7u9sL+vrenmRfkj1Jrh1H4ZKkwU7kPPfbgN3A+d39jcD9VbU5ycbu/tuSrALWAZcDLwI+meQlVfX0COvWAJ5XLgmGnLknWQb8KvD+vua1wNZueytwY1/7XVX1ZFXtB/YBa0ZTriRpGMMelvkL4K3Aj/vaLq6qQwDd7UVd+1Lgsb5+B7u2Z0myIcnOJDuPHDlywoVLkgabM9yT/BpwuKoeHPI5M0NbHddQtaWqVlfV6sWLFw/51JKkYQxzzP1VwK8nuQE4Bzg/yT8ATyRZUlWHkiwBDnf9DwLL+/ZfBjw+yqIlSbObc+ZeVbdX1bKqWkHvjdJPVdUbgO3A+q7beuCebns7sC7J2UlWApcBO0ZeuSRpoFNZFXIzsC3JLcAB4CaAqtqVZBvwKPAUcKtnykjS/DqhcK+qzwCf6bb/F7hmQL9NwKZTrE3zwM9pldrkFaqS1CDDXZIaZLhLUoMMd0lqkJ+hugC5foykuThzl6QGGe6S1CDDXZIaZLhLUoN8Q1Uz8k1baWFz5i5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDZoz3JMsT/LpJLuT7EpyW9e+KMl9SfZ2txf07XN7kn1J9iS5dpw/gCTpeMPM3J8C/rCqfg64Crg1ySpgI3B/VV0G3N/dp3tsHXA5cB3w3iRnjKN4SdLM5gz3qjpUVZ/vtr8L7AaWAmuBrV23rcCN3fZa4K6qerKq9gP7gDWjLlySNNgJHXNPsgJ4BfAAcHFVHYLefwDARV23pcBjfbsd7NqOfa4NSXYm2XnkyJETr1ySNNDQ4Z7kPOBjwJur6juzdZ2hrY5rqNpSVauravXixYuHLUOSNIShwj3JWfSC/UNV9fGu+YkkS7rHlwCHu/aDwPK+3ZcBj4+mXEnSMIY5WybAHcDuqnp330PbgfXd9nrgnr72dUnOTrISuAzYMbqSJUlzOXOIPq8CfhP4UpKHura3A5uBbUluAQ4ANwFU1a4k24BH6Z1pc2tVPT3yyiVJA80Z7lX1b8x8HB3gmgH7bAI2nUJdkqRT4BWqktQgw12SGmS4S1KDhnlDderd+cCBGdtvvvKSea5Ep8p/S2k0nLlLUoMMd0lqUBOHZQYZ9BJfklrnzF2SGmS4S1KDDHdJapDhLkkNMtwlqUFNny2jdnhxk3RiDHctaIa+NDMPy0hSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIVSHVpNk+HN0VI3U6cOYuSQ1y5q7Tzmyz+pk409dC5MxdkhpkuEtSgzwsI+k4fnzhwufMXZIa5MxdmsOJzmLH3V8axtjCPcl1wHuAM4D3V9XmcX0vaSE40bN05sOoavI/qOkzlnBPcgbw18BrgYPA55Jsr6pHx/H9pEkYd1gbmHNzjAYb18x9DbCvqr4KkOQuYC1guEunaJKvAKbx1ce4LdTDbOMK96XAY333DwJX9ndIsgHY0N19MskjY6plHC4EvjHpIoZkreNhrUN4/cntdsr1nuT3PZnvMXStY6rppwY9MK5wzwxt9aw7VVuALQBJdlbV6jHVMnILqV5rHQ9rHZ+FVO801zquUyEPAsv77i8DHh/T95IkHWNc4f454LIkK5M8F1gHbB/T95IkHWMsh2Wq6qkkbwL+hd6pkB+oql2z7LJlHHWM0UKq11rHw1rHZyHVO7W1pqrm7iVJWlBcfkCSGmS4S1KDJh7uSa5LsifJviQbJ1jH15J8KclDSXZ2bYuS3Jdkb3d7QV//27ua9yS5tq/9F7rn2ZfkL5PMdFroidb2gSSH+68FGGVtSc5O8pGu/YEkK0Zc67uSfL0b24eS3DAltS5P8ukku5PsSnJb1z51YztLrVM3tknOSbIjycNdrX/ctU/duM5R79SN7Qmpqol90Xuz9SvApcBzgYeBVROq5WvAhce0/SmwsdveCPxJt72qq/VsYGX3M5zRPbYDeCW9c/3/Gbh+BLW9GrgCeGQctQG/C/xNt70O+MiIa30X8JYZ+k661iXAFd32C4D/6mqaurGdpdapG9vuec/rts8CHgCumsZxnaPeqRvbE/ma9Mz9mWUKquqHwNFlCqbFWmBrt70VuLGv/a6qerKq9gP7gDVJlgDnV9V/VO9f8e/69jlpVfWvwDfHWFv/c90NXHN0xjGiWgeZdK2Hqurz3fZ3gd30rq6eurGdpdZBJllrVdX/dXfP6r6KKRzXOeodZKL1DmvS4T7TMgWz/cKOUwGfSPJgeksjAFxcVYeg98cFXNS1D6p7abd9bPs4jLK2Z/apqqeAbwM/OeJ635Tki+kdtjn6cnxqau1eJr+C3qxtqsf2mFphCsc2yRlJHgIOA/dV1VSP64B6YQrHdliTDvc5lymYR6+qqiuA64Fbk7x6lr6D6p6Gn+dkaht33e8DXgy8HDgE/Pkc33dea01yHvAx4M1V9Z3Zug743vNW7wy1TuXYVtXTVfVyelenr0ny0lm6T3xcB9Q7lWM7rEmH+9QsU1BVj3e3h4F/pHfI6InupRbd7eGu+6C6D3bbx7aPwyhre2afJGcCP8Hwh1bmVFVPdH88Pwb+lt7YTkWtSc6iF5YfqqqPd81TObYz1TrNY9vV9y3gM8B1TOm4Dqp32sd2LpMO96lYpiDJ85O84Og28CvAI10t67tu64F7uu3twLruHfCVwGXAju6l5neTXNUdT/utvn1GbZS19T/X64BPdccMR+LoH3TnN+iN7cRr7Z77DmB3Vb2776GpG9tBtU7j2CZZnOSF3fbzgNcAX2YKx3W2eqdxbE/Iyb4TO6ov4AZ67/x/BXjHhGq4lN673w8Du47WQe+Y2P3A3u52Ud8+7+hq3kPfGTHAanq/BF8B/oruKuBTrO/D9F4W/ojeDOCWUdYGnAN8lN4bQzuAS0dc698DXwK+SO+XfMmU1PqL9F4afxF4qPu6YRrHdpZap25sgZ8HvtDV9AjwR6P+exrx78GgeqdubE/ky+UHJKlBkz4sI0kaA8NdkhpkuEtSgwx3SWqQ4S5JDTLcddpJ8nR6q/w9kuSjSc4d0O/f57s2aVQMd52OflBVL6+qlwI/BN7Y/2CSMwCq6upJFCeNguGu091ngZ9O8kvprZd+J70LV0hydKVAkrw1vXW6H06yuWt7cZJ7u8XmPpvkZyfzI0jHG8sHZEsLQbfGx/XAvV3TGuCl1VvGtb/f9fSWbr2yqr6fZFH30BbgjVW1N8mVwHuBX56f6qXZGe46HT2vW94VejP3O4Cr6a0Psn+G/q8BPlhV3weoqm92qzNeDXy0b1nus8dbtjQ8w12nox9Ub3nXZ3QB/b0B/cPxy7M+B/jWsc8jTQuPuUtz+wTwO0fPqkmyqHprqe9PclPXliQvm2SRUj/DXZpDVd1Lb1XAnd3hnLd0D70euCXJ0dVEp+kjInWac1VISWqQM3dJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhr0/54if21ZVFQwAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.distplot(df['Price'],kde=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Better than Before" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Inputs for Model" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AirlineSourceDestinationDep_TimeArrival_TimeDurationTotal_StopsPriceMonth_of_Journey
03052225170038973
1130513445276621
242192811402138829
333018233251621812
430516212851133021
\n", + "
" + ], + "text/plain": [ + " Airline Source Destination Dep_Time Arrival_Time Duration \\\n", + "0 3 0 5 22 25 170 \n", + "1 1 3 0 5 13 445 \n", + "2 4 2 1 9 28 1140 \n", + "3 3 3 0 18 23 325 \n", + "4 3 0 5 16 21 285 \n", + "\n", + " Total_Stops Price Month_of_Journey \n", + "0 0 3897 3 \n", + "1 2 7662 1 \n", + "2 2 13882 9 \n", + "3 1 6218 12 \n", + "4 1 13302 1 " + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df._get_numeric_data().head()" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "X = df._get_numeric_data().drop('Price',axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [ + "y = df['Price']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Splitting of Data in the ratio of 70% , 30% for testing" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Model Building" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LinearRegression" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [], + "source": [ + "model = LinearRegression()" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LinearRegression()" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.fit(X_train,y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Model Evaluation" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [], + "source": [ + "predictions = model.predict(X_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Predictions Visualization" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.scatterplot(y_test,predictions)" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEGCAYAAABrQF4qAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAR0klEQVR4nO3df6ykVX3H8ffHFdGqbaAsZIWli3abFkxc7M1ipTFUrPz4ZzEpzapREklWU0g00TSgf0jbbIJNxaRpsVkDdduIuFYNG2OpuLVRU8t6MSuybLesQuG6G3YVrVgNyvrtH/NsGZb7Y+6dmZ17z75fyWSeOXOeuefcZ+Zzzz3zzJlUFZKkdj1v0g2QJI2XQS9JjTPoJalxBr0kNc6gl6TGPX/SDQA444wzat26dZNuhiStKPfdd9/3q2r1QvWWRdCvW7eO6enpSTdDklaUJP89SD2nbiSpcQa9JDXOoJekxi0Y9ElemGR3km8l2Zvkz7ry05Pck+Sh7vq0vn1uTHIgyf4kl42zA5Kk+Q0yon8KeH1VvQrYAFye5DXADcCuqloP7Opuk+R8YDNwAXA5cGuSVeNovCRpYQsGffX8pLt5SncpYBOwvSvfDlzVbW8C7qyqp6rqYeAAsHGkrZYkDWygOfokq5LsAQ4D91TVvcBZVXUIoLs+s6t+NvBY3+4zXdnxj7klyXSS6SNHjgzTB0nSPAYK+qo6WlUbgHOAjUleOU/1zPYQszzmtqqaqqqp1asXPN9fkrREizrrpqp+BPwbvbn3x5OsAeiuD3fVZoC1fbudAxwcuqWSpCVZ8JOxSVYDv6iqHyV5EfAG4EPATuAa4Obu+q5ul53AHUluAV4GrAd2j6HtmsUd9z46a/lbLjr3BLdE0nIxyBIIa4Dt3ZkzzwN2VNXnk3wd2JHkWuBR4GqAqtqbZAfwIPA0cF1VHR1P8yVJC1kw6KvqfuDCWcp/AFw6xz5bga1Dt06SNDQ/GStJjTPoJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNG+SrBNUAv0tWOnk5opekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXGeR78CzXVOvCTNZsERfZK1Sb6cZF+SvUne3ZXflOR7SfZ0lyv79rkxyYEk+5NcNs4OSJLmN8iI/mngvVX1zSQvBe5Lck9330eq6q/6Kyc5H9gMXAC8DPhSkt+qqqOjbLgkaTALjuir6lBVfbPbfhLYB5w9zy6bgDur6qmqehg4AGwcRWMlSYu3qDn6JOuAC4F7gYuB65O8HZimN+r/Ib0/Av/Rt9sMs/xhSLIF2AJw7rmut7LcuDaO1I6Bz7pJ8hLgM8B7qurHwEeBVwAbgEPAh49VnWX3ek5B1baqmqqqqdWrVy+64ZKkwQwU9ElOoRfyn6iqzwJU1eNVdbSqfgl8jGemZ2aAtX27nwMcHF2TJUmLMchZNwFuA/ZV1S195Wv6qr0JeKDb3glsTnJqkvOA9cDu0TVZkrQYg8zRXwy8Dfh2kj1d2fuBNyfZQG9a5hHgnQBVtTfJDuBBemfsXOcZN5I0OQsGfVV9jdnn3b8wzz5bga1DtEuSNCIugSBJjTPoJalxrnVzknPdHKl9juglqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4wx6SWqcQS9JjTPoJalxBr0kNc4lEJYxlyeQNAqO6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUuAWDPsnaJF9Osi/J3iTv7spPT3JPkoe669P69rkxyYEk+5NcNs4OSJLmN8iI/mngvVX1O8BrgOuSnA/cAOyqqvXAru423X2bgQuAy4Fbk6waR+MlSQtbMOir6lBVfbPbfhLYB5wNbAK2d9W2A1d125uAO6vqqap6GDgAbBx1wyVJg1nUHH2SdcCFwL3AWVV1CHp/DIAzu2pnA4/17TbTlR3/WFuSTCeZPnLkyOJbLkkayMBBn+QlwGeA91TVj+erOktZPaegaltVTVXV1OrVqwdthiRpkQYK+iSn0Av5T1TVZ7vix5Os6e5fAxzuymeAtX27nwMcHE1zJUmLNchZNwFuA/ZV1S19d+0Erum2rwHu6ivfnOTUJOcB64Hdo2uyJGkxBvnO2IuBtwHfTrKnK3s/cDOwI8m1wKPA1QBVtTfJDuBBemfsXFdVR0fecknSQBYM+qr6GrPPuwNcOsc+W4GtQ7RLy9RcX1j+lovOPcEtkTQoPxkrSY0z6CWpcQa9JDVukDdjNWZzzXtL0ig4opekxjmi10h4No60fDmil6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4xYM+iS3Jzmc5IG+spuSfC/Jnu5yZd99NyY5kGR/ksvG1XBJ0mAGGdF/HLh8lvKPVNWG7vIFgCTnA5uBC7p9bk2yalSNlSQt3oJBX1VfAZ4Y8PE2AXdW1VNV9TBwANg4RPskSUMaZo7++iT3d1M7p3VlZwOP9dWZ6cqeI8mWJNNJpo8cOTJEMyRJ83n+Evf7KPAXQHXXHwbeAWSWujXbA1TVNmAbwNTU1Kx1tPLdce+js5a/5aJzT3BLpJPXkkb0VfV4VR2tql8CH+OZ6ZkZYG1f1XOAg8M1UZI0jCUFfZI1fTffBBw7I2cnsDnJqUnOA9YDu4droiRpGAtO3ST5JHAJcEaSGeCDwCVJNtCblnkEeCdAVe1NsgN4EHgauK6qjo6n6ZKkQSwY9FX15lmKb5un/lZg6zCNkiSNjp+MlaTGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxhn0ktQ4g16SGmfQS1LjDHpJapxBL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhpn0EtS4xb8cnCNzh33PjrpJiwbc/0u3nLRuSe4JVL7HNFLUuMMeklqnEEvSY0z6CWpcQa9JDVuwaBPcnuSw0ke6Cs7Pck9SR7qrk/ru+/GJAeS7E9y2bgaLkkazCAj+o8Dlx9XdgOwq6rWA7u62yQ5H9gMXNDtc2uSVSNrrSRp0RYM+qr6CvDEccWbgO3d9nbgqr7yO6vqqap6GDgAbBxRWyVJS7DUOfqzquoQQHd9Zld+NvBYX72Zruw5kmxJMp1k+siRI0tshiRpIaN+MzazlNVsFatqW1VNVdXU6tWrR9wMSdIxSw36x5OsAeiuD3flM8DavnrnAAeX3jxJ0rCWGvQ7gWu67WuAu/rKNyc5Ncl5wHpg93BNlCQNY8FFzZJ8ErgEOCPJDPBB4GZgR5JrgUeBqwGqam+SHcCDwNPAdVV1dExtlyQNYMGgr6o3z3HXpXPU3wpsHaZRkqTR8ZOxktQ4g16SGmfQS1Lj/IYprQh+I5W0dAa9lhW/blEaPaduJKlxjui1ojmlIy3MEb0kNc6gl6TGGfSS1DiDXpIa55uxOun4Bq5ONo7oJalxjujHwA/9SFpOHNFLUuMMeklqnFM3apLTZ9IzHNFLUuMMeklqnEEvSY0z6CWpcQa9JDXOs26kjksjqFWO6CWpcQa9JDXOoJekxg01R5/kEeBJ4CjwdFVNJTkd+BSwDngE+OOq+uFwzZQkLdUoRvR/UFUbqmqqu30DsKuq1gO7utuSpAkZx9TNJmB7t70duGoMP0OSNKBhg76ALya5L8mWruysqjoE0F2fOduOSbYkmU4yfeTIkSGbIUmay7Dn0V9cVQeTnAnck+Q/B92xqrYB2wCmpqZqyHZIkuYw1Ii+qg5214eBzwEbgceTrAHorg8P20hJ0tIteUSf5MXA86rqyW77jcCfAzuBa4Cbu+u7RtFQaVL8xKxWumGmbs4CPpfk2OPcUVV3J/kGsCPJtcCjwNXDN1OStFRLDvqq+i7wqlnKfwBcOkyjJEmj4ydjJalxBr0kNc6gl6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcQa9JDXOoJekxg27Hv1Jba5VDSVpOXFEL0mNM+glqXEGvSQ1zqCXpMYZ9JLUOINekhrn6ZXSCeKXjGtSDHppiQxurRQGfR8/ACWpRSdl0BvoGiefX1pufDNWkhp3Uo7opZXA9wA0Kk0Hvf9Cq0X+AdBiNR30kk5O/jF8trHN0Se5PMn+JAeS3DCunyNJmt9YRvRJVgF/C/whMAN8I8nOqnpwHD/PKRppbpN8fYxqBD2qEfokR/qT/NnjmrrZCByoqu8CJLkT2ASMJeillazlgcpi+zaq4J6k5dimcQX92cBjfbdngIv6KyTZAmzpbv4kyf4xteVEOQP4/qQbMSat9q2pfr312TdXZN/eunAVGEPfBvy5YzHkcfuNQSqNK+gzS1k960bVNmDbmH7+CZdkuqqmJt2OcWi1b632C+zbSjWuvo3rzdgZYG3f7XOAg2P6WZKkeYwr6L8BrE9yXpIXAJuBnWP6WZKkeYxl6qaqnk5yPfAvwCrg9qraO46ftYw0Mw01i1b71mq/wL6tVGPpW6pq4VqSpBXLRc0kqXEGvSQ1zqAfQJKbknwvyZ7ucmXffTd2yzzsT3JZX/nvJvl2d99fJ0lXfmqST3Xl9yZZd+J7NJiVuIxFkke63/ueJNNd2elJ7knyUHd9Wl/9RR2/E9yX25McTvJAX9nI+jLJ5+IcfWvidZZkbZIvJ9mXZG+Sd3flkzt2VeVlgQtwE/C+WcrPB74FnAqcB3wHWNXdtxv4PXqfKfhn4Iqu/E+Av+u2NwOfmnT/5ujzqq4/Lwde0PXz/Em3a4B2PwKccVzZXwI3dNs3AB9a6vE7wX15HfBq4IFx9GWSz8U5+tbE6wxYA7y6234p8F9dHyZ27BzRD2cTcGdVPVVVDwMHgI1J1gC/WlVfr96R+Afgqr59tnfb/wRcOonR4gD+fxmLqvo5cGwZi5Wo/3e+nWcfi8UevxOmqr4CPHFc8Sj7MrHn4hx9m8tK69uhqvpmt/0ksI/eagETO3YG/eCuT3J/9y/nsX+5Zlvq4ezuMjNL+bP2qaqngf8Bfn2cDV+iufq23BXwxST3pbfMBsBZVXUIei9C4MyufCnHb9JG2Zfl+Fxs6nXWTalcCNzLBI+dQd9J8qUkD8xy2QR8FHgFsAE4BHz42G6zPFTNUz7fPsvNSmnn8S6uqlcDVwDXJXndPHWXcvyWqxaei029zpK8BPgM8J6q+vF8VWcpG2n//OKRTlW9YZB6ST4GfL67OddSDzPd9vHl/fvMJHk+8GsM/i/sibQil7GoqoPd9eEkn6M3BfV4kjVVdaj7d/hwV30px2/SRtmXZfVcrKrHj22v9NdZklPohfwnquqzXfHEjp0j+gF0B+WYNwHHzhTYCWzu3gE/D1gP7O7+LXsyyWu6ebO3A3f17XNNt/1HwL9282/LzYpbxiLJi5O89Ng28EZ6x6r/d34Nzz4Wiz1+kzbKviyr52Irr7OuLbcB+6rqlr67JnfsTtQ70Sv5Avwj8G3g/u4XvKbvvg/Qe5d8P31nZgBT9J6o3wH+hmc+hfxC4NP03nDZDbx80v2bp99X0jtj4DvABybdngHa+3J6Zy98C9h7rM305i53AQ9116cv9fid4P58kt4Uxi/ojeCuHWVfJvlcnKNvTbzOgN+nN41yP7Cnu1w5yWPnEgiS1DinbiSpcQa9JDXOoJekxhn0ktQ4g16SGmfQ66ST5Gi3OuIDST6d5FfmqPfvJ7pt0jgY9DoZ/ayqNlTVK4GfA+/qvzPJKoCqeu0kGieNmkGvk91Xgd9Mckm3hvgd9D60Q5KfHKuU5E+7dcG/leTmruwVSe7uFlD7apLfnkwXpPm51o1OWt0aIVcAd3dFG4FXVm+p2P56V9BbHvaiqvppktO7u7YB76qqh5JcBNwKvP7EtF4anEGvk9GLkuzptr9Kb12S19JbX+ThWeq/Afj7qvopQFU90a1M+Frg033LgJ863mZLS2PQ62T0s6ra0F/QhfX/zlE/PHcJ2OcBPzr+caTlyDl6aWFfBN5x7OycJKdXb33xh5Nc3ZUlyasm2UhpLga9tICqupveaorT3ZTP+7q73gpcm+TYapkr9asW1ThXr5Skxjmil6TGGfSS1DiDXpIaZ9BLUuMMeklqnEEvSY0z6CWpcf8HIpE4oxdLfKUAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.distplot((y_test-predictions),bins=50,kde=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " - Looking at the visualization we can tell that scatterplot fits around a Straight line .\n", + " - It means that Model is not evaluating randomly \n", + " - And It works Good" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Error Calculation" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn import metrics" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MAE: 2417.9705040478357\n", + "MSE: 10202567.004940853\n", + "RMSE: 3194.145739464756\n" + ] + } + ], + "source": [ + "print('MAE:', metrics.mean_absolute_error(y_test, predictions))\n", + "print('MSE:', metrics.mean_squared_error(y_test, predictions))\n", + "print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, predictions)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Output:\n", + "- This model is predicting values of the fare with an error of 2400 rs.\n", + "- This Output also contains price above 35000 so it's not a bad prediction" + ] + } + ], + "metadata": { + "colab": { + "name": "fligth fare prediction .ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/FLIGHT_FARE_PREDICTION/results/Result.PNG b/FLIGHT_FARE_PREDICTION/results/Result.PNG new file mode 100644 index 0000000..31c6284 Binary files /dev/null and b/FLIGHT_FARE_PREDICTION/results/Result.PNG differ diff --git a/README.md b/README.md index b2af0e0..0bd5783 100644 --- a/README.md +++ b/README.md @@ -123,6 +123,12 @@ Resources to learn Git: - https://en.wikipedia.org/wiki/Support_vector_machine - https://www.youtube.com/watch?v=Y17Y_8RK6pc +## References for Flight Fare Prediction: +- https://www.kaggle.com/nikhilmittal/flight-fare-prediction-mh +- https://scikit-learn.org/stable/modules/generated/sklearn.linear_model. + LinearRegression.html +- https://medium.com/code-to-express/flight-price-prediction-7c83616a13bb + ## References for Rock Paper Scissor: - https://www.geeksforgeeks.org/python-program-implement-rock-paper-scissor-game - https://www.afiniti.com/corporate/rock-paper-scissors