In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("/content/modified_carpooling_mumbai_matched_preferences.csv")

In [3]:
df.head()

Unnamed: 0,User ID,Name,Age,Gender,Area of Residence,Specific Address,Destination Address,Aadhaar Number,Time of Departure,Preferred Mode of Travel,...,Comfort with Carpooling,Peak Demand,Emergency Contact Number,Incentive Preference,Cost Sharing Preference,index,max_ppl,final rating,Index,Max People in Carpool
0,U1000,Suresh Rastogi,35,Male,Navi Mumbai,"39 D Avenue, Navi Mumbai","15 D Avenue, Navi Mumbai",323157265009,19:44,rickshaw,...,Very Comfortable,False,7130985182.0,Loyalty Points,By Time,1.0,3,4,10001.0,2
1,U1001,Mohan Bhattacharya,21,Others,Central Mumbai,"60 C Lane, Central Mumbai","25 B Road, Navi Mumbai",878079171104,7:16,rickshaw,...,Very Comfortable,False,8788315244.0,Cashback,By Distance,2.0,3,3,10002.0,2
2,U1002,Pranav Iyer,53,Female,South Mumbai,"86 D Avenue, South Mumbai","78 A Street, South Mumbai",150465101634,11:20,taxi,...,Somewhat Comfortable,True,9583396659.0,Cashback,Equal Split,3.0,4,5,10003.0,2
3,U1003,Mukesh Gupta,50,Female,South Mumbai,"57 A Street, South Mumbai","15 D Avenue, Navi Mumbai",587551135411,9:32,rickshaw,...,Neutral,True,7576390977.0,Cashback,Equal Split,4.0,3,5,10004.0,2
4,U1004,Swara Agarwal,20,Female,Central Mumbai,"8 B Road, Central Mumbai","92 B Road, Navi Mumbai",644003997364,17:01,cab,...,Neutral,False,9249643854.0,Loyalty Points,By Time,5.0,6,5,10005.0,2


In [4]:
df['max_ppl'].replace('[6]',6, inplace = True)

In [6]:
df['Preferred Mode of Travel'].unique()

array(['rickshaw', 'taxi', 'cab', 'Personal Car', 'Ola', 'Uber',
       'Indrive'], dtype=object)

In [7]:
import pandas as pd
from datetime import datetime, timedelta



def recommend_carpool_without_rating(data, user_preferences):
    """
    Generate carpool recommendations based on user preferences without considering ratings.

    Parameters:
    - data: DataFrame with user data.
    - user_preferences: Dictionary containing user preferences.

    Returns:
    - recommendations: DataFrame with recommended users.
    """

    # Filter by gender preference
    filtered_data = data[data['Gender'] == user_preferences['gender']]

    # Filter by age range preference
    min_age, max_age = user_preferences['age_range']
    filtered_data = filtered_data[(filtered_data['Age'] >= min_age) & (filtered_data['Age'] <= max_age)]

    # Filter by mode of transport preference but allow some flexibility
    preferred_mode_data = filtered_data[filtered_data['Preferred Mode of Travel'] == user_preferences['mode_of_transport']]
    other_modes_data = filtered_data[filtered_data['Preferred Mode of Travel'] != user_preferences['mode_of_transport']]

    # Filter by time of departure if user chooses immediate carpool
    if user_preferences['carpool_now']:
        current_time = datetime.strptime(user_preferences['current_time'], '%H:%M')
        time_lower_bound = (current_time - timedelta(minutes=10)).strftime('%H:%M')
        time_upper_bound = (current_time + timedelta(minutes=10)).strftime('%H:%M')

        preferred_mode_data = preferred_mode_data[(preferred_mode_data['Time of Departure'] >= time_lower_bound) &
                                                  (preferred_mode_data['Time of Departure'] <= time_upper_bound)]
        other_modes_data = other_modes_data[(other_modes_data['Time of Departure'] >= time_lower_bound) &
                                            (other_modes_data['Time of Departure'] <= time_upper_bound)]

    # Filter by cost sharing preference
    preferred_mode_data = preferred_mode_data[preferred_mode_data['Cost Sharing Preference'] == user_preferences['cost_sharing']]
    other_modes_data = other_modes_data[other_modes_data['Cost Sharing Preference'] == user_preferences['cost_sharing']]

    # Concatenate dataframes
    recommendations = pd.concat([preferred_mode_data, other_modes_data])

    # Return up to "number of people that mode can accommodate minus one" from the recommendations
    max_people = int(recommendations.iloc[0]['max_ppl']) if not recommendations.empty else 0
    return recommendations.head(max_people - 1)

In [8]:
# Example usage
user_preferences = {
    'gender': 'Male',
    'age_range': (20, 40),
    'mode_of_transport': 'rickshaw',
    'carpool_now': True,
    'current_time': '19:45',
    'cost_sharing': 'By Time'
}

recommendations = recommend_carpool_without_rating(df, user_preferences)
print(recommendations[['User ID', 'Name', 'Age', 'Gender', 'Preferred Mode of Travel', 'Time of Departure']])


    User ID            Name  Age Gender Preferred Mode of Travel  \
0     U1000  Suresh Rastogi   35   Male                 rickshaw   
595   U1595  Shivansh Reddy   32   Male             Personal Car   

    Time of Departure  
0               19:44  
595             19:42  


In [9]:
import requests
from datetime import datetime

def geocode(api_key, location_name):
    BASE_URL = "https://graphhopper.com/api/1/geocode"
    params = {
        'q': location_name,
        'key': api_key,
        'limit': 1
    }

    response = requests.get(BASE_URL, params=params)
    data = response.json()

    lat = data['hits'][0]['point']['lat']
    lon = data['hits'][0]['point']['lng']

    return lat, lon

def get_route_info(api_key, start_lat_lon, end_lat_lon):
    BASE_URL = "https://graphhopper.com/api/1/route"
    params = {
        'point': [f"{start_lat_lon[0]},{start_lat_lon[1]}", f"{end_lat_lon[0]},{end_lat_lon[1]}"],
        'vehicle': 'car',
        'locale': 'en',
        'key': api_key
    }

    response = requests.get(BASE_URL, params=params)
    data = response.json()

    distance = data['paths'][0]['distance'] / 1000  # Convert to km
    time = data['paths'][0]['time'] / 60000  # Convert to minutes

    return distance, time

def calculate_fare(time, distance, mode_of_transport, base_prices, is_peak_hours):
    # Fare rates per minute for different modes of transport
    fare_rates_per_minute = {
        'auto': 0.5,
        'cab': 0.6,
        'taxi': 0.7
    }

    # Per kilometer prices for different modes of transport during peak and non-peak hours
    per_km_prices = {
        'auto': {'non_peak': 14, 'peak': 16},
        'cab': {'non_peak': 18, 'peak': 20},
        'taxi': {'non_peak': 16, 'peak': 18}
    }

    if mode_of_transport in fare_rates_per_minute:
        rate_per_minute = fare_rates_per_minute[mode_of_transport]

        # Select the per kilometer price based on peak hours
        if is_peak_hours:
            rate_per_km = per_km_prices[mode_of_transport]['peak']
        else:
            rate_per_km = per_km_prices[mode_of_transport]['non_peak']

        fare = (time * rate_per_minute) + (distance * rate_per_km)

        # Ensure fare is above the base price
        if fare < base_prices[mode_of_transport]:
            fare = base_prices[mode_of_transport]

        return fare
    else:
        return None


In [13]:

if __name__ == "__main__":
    API_KEY = 'e462e6d1-d817-4756-8cab-9005a815ff95'  # Replace with your GraphHopper API key
    start_name = input("Enter the starting place name: ")
    end_name = input("Enter the destination place name: ")

    start_lat_lon = geocode(API_KEY, start_name)
    end_lat_lon = geocode(API_KEY, end_name)

    distance, base_time = get_route_info(API_KEY, start_lat_lon, end_lat_lon)

    print(f"The distance from {start_name} to {end_name} is approximately {distance:.2f} km.")

    # Determine the current time and day
    current_time = datetime.now().time()
    current_day = datetime.now().strftime("%A")

    # Define peak hours
    peak_hours_start = datetime.strptime("09:00:00", "%H:%M:%S").time()
    peak_hours_end = datetime.strptime("11:00:00", "%H:%M:%S").time()

    evening_peak_start = datetime.strptime("18:00:00", "%H:%M:%S").time()
    evening_peak_end = datetime.strptime("21:00:00", "%H:%M:%S").time()

    # Check if it's Sunday (no peak hours)
    if current_day == "Sunday":
        time_multiplier = 2.5  # Non-peak hours on Sunday
        peak_hours_text = "No"
    else:
        # Check if it's peak hours
        if (current_time >= peak_hours_start and current_time <= peak_hours_end) or (current_time >= evening_peak_start and current_time <= evening_peak_end):
            time_multiplier = 3.5  # Peak hours, multiply by 3.5
            peak_hours_text = "Yes"
        else:
            time_multiplier = 2.5  # Non-peak hours, multiply by 2.5
            peak_hours_text = "No"

    adjusted_time = base_time * time_multiplier

    print(f"Is it peak hours? {peak_hours_text}")
    print(f"Travel time will take roughly {adjusted_time:.2f} minutes.")

    # Ask for the mode of transport and base prices
    mode_of_transport = input("Enter the mode of transport (auto/cab/taxi): ").lower()

    # Define base prices for each mode of transport
    base_prices = {
        'auto': 23,
        'cab': 30,
        'taxi': 40
    }

    fare = calculate_fare(adjusted_time, distance, mode_of_transport, base_prices, peak_hours_text == "Yes")

    if fare is not None:
        print(f"The estimated fare for a {mode_of_transport} journey is approximately {fare:.2f} INR.")
    else:
        print("Invalid mode of transport.")

Enter the starting place name: bhayandar
Enter the destination place name: andheri
The distance from bhayandar to andheri is approximately 26.17 km.
Is it peak hours? No
Travel time will take roughly 90.26 minutes.
Enter the mode of transport (auto/cab/taxi): auto
The estimated fare for a auto journey is approximately 411.53 INR.


In [11]:
def recommend_carpool_with_rating_and_cosine_similarity(data, user_preferences):
    """
    Generate carpool recommendations based on user preferences using cosine similarities and user ratings.

    Parameters:
    - data: DataFrame with user data.
    - user_preferences: Dictionary containing user preferences.

    Returns:
    - recommendations: DataFrame with recommended users.
    """

    # Convert categorical variables to numerical format for cosine similarity
    data_encoded = pd.get_dummies(data, columns=['Gender', 'Preferred Mode of Travel', 'Cost Sharing Preference'])

    # Create a user preference vector in the same format as the encoded data
    user_vector = {
        'Gender_' + user_preferences['gender']: 1,
        'Preferred Mode of Travel_' + user_preferences['mode_of_transport']: 1,
        'Cost Sharing Preference_' + user_preferences['cost_sharing']: 1,
        'Age': (user_preferences['age_range'][0] + user_preferences['age_range'][1]) / 2  # average age in the range
    }

    # Ensure user vector has the same columns as the data_encoded dataframe
    for column in data_encoded.columns:
        if column not in user_vector:
            user_vector[column] = 0

    # Convert user vector to DataFrame for cosine similarity computation
    user_df = pd.DataFrame([user_vector])

    # Compute cosine similarity between user and all other users in the data
    similarity_scores = cosine_similarity(user_df, data_encoded)

    # Adjust the similarity scores by incorporating the user rating
    # Here, we're giving a weight of 0.8 to the cosine similarity and 0.2 to the user rating to balance them out.
    data['similarity'] = 0.8 * similarity_scores[0] + 0.2 * data['final rating'] / 5

    # Rank users based on their adjusted similarity score to the user preferences
    recommendations = data.sort_values(by='similarity', ascending=False)

    # Further filtering based on provided constraints
    if user_preferences['carpool_now']:
        current_time = datetime.strptime(user_preferences['current_time'], '%H:%M')
        time_lower_bound = (current_time - timedelta(minutes=10)).strftime('%H:%M')
        time_upper_bound = (current_time + timedelta(minutes=10)).strftime('%H:%M')

        recommendations = recommendations[(recommendations['Time of Departure'] >= time_lower_bound) &
                                          (recommendations['Time of Departure'] <= time_upper_bound)]

    # Return up to "number of people that mode can accommodate minus one" from the recommendations
    max_people = int(recommendations.iloc[0]['max_ppl']) if not recommendations.empty else 0
    return recommendations.head(max_people - 1)

# Note: The new function prioritizes users with higher ratings while also considering cosine similarity.


In [12]:
# Assuming the function 'recommend_carpool_without_rating' remains unchanged...

# Example usage
user_preferences = {
    'gender': 'Male',
    'age_range': (20, 40),
    'mode_of_transport': 'rickshaw',
    'carpool_now': True,
    'current_time': '19:45',
    'cost_sharing': 'By Time',
    'Specific Address': '39 D Avenue, Navi Mumbai',  # add your desired starting point
    'Destination Address': 'Street B, Central Mumbai'     # add your desired ending point
}

recommendations = recommend_carpool_without_rating(df, user_preferences)
print(recommendations[['User ID', 'Name', 'Age', 'Gender', 'Preferred Mode of Travel', 'Time of Departure', 'Specific Address', 'Destination Address']])


    User ID            Name  Age Gender Preferred Mode of Travel  \
0     U1000  Suresh Rastogi   35   Male                 rickshaw   
595   U1595  Shivansh Reddy   32   Male             Personal Car   

    Time of Departure             Specific Address       Destination Address  
0               19:44     39 D Avenue, Navi Mumbai  15 D Avenue, Navi Mumbai  
595             19:42  20 A Street, Central Mumbai  15 D Avenue, Navi Mumbai  


In [14]:
df.to_csv('df.csv', index=False)