In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import sys, os
import requests
import json
import datetime
import time

In [2]:
#adding scripts path
sys.path.append(os.path.abspath(os.path.join('..')))
#import scripts
from scripts.data_extractor import DataExtractor
from scripts.data_manipulator import DataManipulator
from config.config import open_weather_api_key
from config.config import HOLIDAY_API_KEY

In [3]:
#instantiate objects
extractor = DataExtractor()
manipulator = DataManipulator()

In [4]:
#fetch the data
order_trip_df = extractor.read_csv('../data/generated/order_trip.csv')
order_trip_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25903 entries, 0 to 25902
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Unnamed: 0        25903 non-null  int64  
 1   trip_id           25871 non-null  float64
 2   trip_origin       25871 non-null  object 
 3   trip_destination  25871 non-null  object 
 4   trip_start_time   25871 non-null  object 
 5   trip_end_time     25871 non-null  object 
 6   order_id          25903 non-null  int64  
 7   driver_id         25903 non-null  int64  
 8   driver_action     25903 non-null  object 
 9   lat               25903 non-null  float64
 10  lng               25903 non-null  float64
dtypes: float64(3), int64(3), object(5)
memory usage: 2.2+ MB


In [5]:
order_trip_df['trip_start_time'] = pd.to_datetime(order_trip_df['trip_start_time'])
order_trip_df['trip_end_time'] = pd.to_datetime(order_trip_df['trip_end_time'])
order_trip_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25903 entries, 0 to 25902
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   Unnamed: 0        25903 non-null  int64         
 1   trip_id           25871 non-null  float64       
 2   trip_origin       25871 non-null  object        
 3   trip_destination  25871 non-null  object        
 4   trip_start_time   25871 non-null  datetime64[ns]
 5   trip_end_time     25871 non-null  datetime64[ns]
 6   order_id          25903 non-null  int64         
 7   driver_id         25903 non-null  int64         
 8   driver_action     25903 non-null  object        
 9   lat               25903 non-null  float64       
 10  lng               25903 non-null  float64       
dtypes: datetime64[ns](2), float64(3), int64(3), object(3)
memory usage: 2.2+ MB


In [6]:
start_date = order_trip_df['trip_start_time'].iloc[0]
end_date = order_trip_df['trip_start_time'].iloc[-1]
print(start_date, end_date)

2021-07-01 09:30:59 2021-09-10 11:47:26


### Rain vs No-Rain

In [7]:
with open('../config/city.json') as f:
    data = json.load(f)
data

{'id': 2332459,
 'city': {'id': {'$numberLong': '2332459'},
  'name': 'Lagos',
  'findname': 'LAGOS',
  'country': 'NG',
  'coord': {'lon': 3.39583, 'lat': 6.45306},
  'zoom': {'$numberLong': '1'}}}

In [8]:
#get coordinates and other args ready for the api call
coord = data['city']['coord']
lat = coord['lat']
lon = coord['lon']
start = time.mktime(start_date.timetuple())
end = time.mktime(end_date.timetuple())
print(start, end)
url = f'https://history.openweathermap.org/data/2.5/history/city?lat={lat}&lon={lon}&type=hour&start={start}&end={end}&appid={open_weather_api_key}'

1625121059.0 1631263646.0


In [None]:
#call the api
r = requests.get(url)
j=r.json()
j

unfortunately the open weather api wouldn't allow us to get data between the ranges we want therefore we abandon this feature. 

### Holiday vs Not-Holiday

In [10]:
#define params
year=2021
country="NG"
key=HOLIDAY_API_KEY
url = f'https://holidayapi.com/v1/holidays?key={key}&year={year}&country={country}'

In [11]:
#get holidays
holiday_result = requests.get(url)
holiday_result_json= holiday_result.json()

In [12]:
for h in holiday_result_json['holidays']:
    print(h['date'])

2021-01-01
2021-03-08
2021-03-14
2021-03-20
2021-04-02
2021-04-03
2021-04-04
2021-04-05
2021-05-01
2021-05-13
2021-06-12
2021-06-15
2021-06-20
2021-06-21
2021-07-20
2021-09-22
2021-10-01
2021-10-19
2021-12-21
2021-12-24
2021-12-25
2021-12-26
2021-12-31


In [13]:
def checkHoliday(date:str, holidays:list )-> bool:
    for holiday in holidays:
        if holiday['date'] == date:
            return True
    return False

In [14]:
isHoliday = []
for index, row in order_trip_df.iterrows():
   if checkHoliday(str(row["trip_start_time"].date()), holiday_result_json['holidays']):
      isHoliday.append(1)
   isHoliday.append(0)
order_trip_df["isHoliday"] = isHoliday
    

In [15]:
len(order_trip_df[order_trip_df['isHoliday'] == 1])

0

as we can see there are no accepted orders in holidays

### Weekday vs Weekend

In [16]:
order_trip_df.columns.get_loc
order_trip_df.insert()

Failed to Add WeekDay Column


In [17]:
order_trip_df

Unnamed: 0.1,Unnamed: 0,trip_id,trip_origin,trip_destination,trip_start_time,trip_end_time,order_id,driver_id,driver_action,lat,lng,isHoliday,isWeekDay
0,0,392001.0,"6.6010417,3.2766339","6.4501069,3.3916154",2021-07-01 09:30:59,2021-07-01 09:34:36,392001,243828,accepted,6.602207,3.270465,0,
1,1,392005.0,"6.565087699999999,3.3844415","6.499696300000001,3.3509075",2021-07-01 10:53:36,2021-07-01 11:27:51,392005,245597,accepted,6.549147,3.392184,0,
2,2,392009.0,"6.6636484,3.3082058","6.6185421,3.301634",2021-07-01 06:39:51,2021-07-01 07:41:12,392009,245600,accepted,6.644829,3.289328,0,
3,3,392013.0,"6.4308171,3.4341552","6.435460000000001,3.4846547",2021-07-01 08:44:09,2021-07-01 09:19:11,392013,243892,accepted,6.435331,3.424317,0,
4,4,392014.0,"6.499156300000001,3.3585173","6.4280911,3.5157172",2021-07-01 06:43:07,2021-07-01 07:27:24,392014,243781,accepted,6.498221,3.360042,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
25898,25898,489898.0,"6.442073199999999,3.4788716","6.439366,3.5020578",2022-02-04 17:09:00,2022-02-04 17:51:19,489898,246615,accepted,6.443810,3.464208,0,
25899,25899,490008.0,"6.4461993,3.5497557","6.475137000000001,3.5644741",2022-02-27 09:07:59,2022-02-27 09:17:41,490008,247877,accepted,6.448554,3.574380,0,
25900,25900,497721.0,"6.482082176631842,3.3828646009695547","6.4632478,3.6239759",2021-09-04 14:30:57,2021-09-04 15:18:38,497721,243414,accepted,6.498164,3.359964,0,
25901,25901,510268.0,"6.4494186,3.4016453","6.6070099,3.3501947",2022-01-21 11:05:47,2022-01-21 12:12:39,510268,243532,accepted,6.462251,3.391023,0,


### Traffic Condition