## Predict passenger count at a stage

### With the help of cumulative passenger profile

In [None]:
import pandas as pd 
import numpy as np
df=pd.read_csv("speed-table.csv")

bus_stages = [
    'T.NAGAR', 'SAIDAPET', 'ANNA UNIV', 'WPTC', 'SRP TOOLS',
    'KANDANCHAV', 'THORAIPAKKAM', 'M K CHAVADI', 'KARAPAKKAM',
    'SHOLINGANALLUR', 'KUMARAN NG', 'CHEMMANCHE', 'NAVALUR',
    'SIPCOT', 'CHURCH', 'PAL. CHEMI', 'HINDUSTAN', 'KELAMBAKKAM',
    'KOMAN NAGAR', 'ENGG', 'CHENGAMMAL', 'KALAVAKKAM','THIRUPORUR'
]

In [None]:
distance_df=pd.read_csv("distance-time.csv")
distance_df.drop(columns=['Unnamed: 0'],inplace=True)
for column in df.columns:
    if column == "TIME":
        continue
    distance = distance_df.loc[distance_df["Time Interval"] == column, "Distance"].iloc[0]
    df[column] = ((distance / df[column]) * 60).apply(np.ceil)
df

In [None]:
from datetime import datetime, timedelta

start_time = input("Enter start time (HH:MM:SS): ")
start_datetime = datetime.strptime(start_time, "%H:%M:%S")
hour_time = start_datetime.hour
stage_times = []
stage_times.append((0, start_datetime.strftime("%H:%M:%S")))


for i in range(1, 23):
    duration_minutes = df.iloc[hour_time, i]
    end_datetime = start_datetime + timedelta(minutes=duration_minutes)
    stage_times.append((i, end_datetime.strftime('%H:%M:%S')))
    start_datetime=end_datetime
    
stage_times_df = pd.DataFrame(stage_times, columns=['Stage Number', 'End Time'])
stages_df = pd.DataFrame(bus_stages, columns=['Bus Stage'])
time_df=pd.concat([stages_df,stage_times_df],axis=1)
time_df

In [None]:
df=pd.read_csv('04-02-19.csv')
bus_no=519
df = df[df['Schedule Name'].str.contains(rf'\b{bus_no}\b')]

cols =['Schedule Name','Ticket Issued Time','Adult','From Stage','To Stage','Source','Destination']
df = df[cols]
df =df[(df['Source']=='T.NAGAR') & (df['Destination']=='THIRUPORUR')]

In [None]:
from_stage=input("Enter From Stage: ").upper()
to_stage=input("Enter To Stage: ").upper()

df=df[(df['From Stage']==from_stage) & (df['To Stage']==to_stage)]
df['Ticket Issued Time'] = pd.to_datetime(df['Ticket Issued Time'], format='%H:%M:%S', errors='coerce')
df['Hour']=df['Ticket Issued Time'].dt.hour

df= df.groupby('Hour')['Schedule Name'].count().reset_index()
df.columns = ['Hour', 'Bus Count']

#All hours
hours= pd.DataFrame({'Hour': range(24)})
df=hours.merge(df, on='Hour', how='left').fillna(0)

df['Bus Count'] = df['Bus Count'].astype(int)
df['Cumulative Boarding'] = df['Bus Count'].cumsum()
df['Minutes'] = df['Hour'] * 60

df

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

def estimated_boarding(target_time):
    target_minutes = pd.to_datetime(target_time).hour * 60 + pd.to_datetime(target_time).minute
    
    previous_index = df[df['Minutes'] <= target_minutes].index[-1]
    next_index = previous_index + 1

    # Get the corresponding minutes and cumulative boarding counts
    previous_minutes = df.loc[previous_index, 'Minutes']
    next_minutes = df.loc[next_index, 'Minutes']
    previous_boarding = df.loc[previous_index, 'Cumulative Boarding']
    next_boarding = df.loc[next_index, 'Cumulative Boarding']

    #Linear Interpolation formating
    ratio = (target_minutes - previous_minutes) / (next_minutes-previous_minutes)
    estimated_boarding = previous_boarding + (next_boarding-previous_boarding)* ratio
    return estimated_boarding


# target_time =input("Enter the time: ")
stage= time_df.loc[time_df['Bus Stage'] == 'SAIDAPET']
target_time= stage.iloc[-1]['End Time']

estimated_count = estimated_boarding(target_time)

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(df['Hour'], df['Cumulative Boarding'], linestyle='-', color='b', label='Cumulative Boarding')
plt.scatter([pd.to_datetime(target_time).hour + pd.to_datetime(target_time).minute / 60], [estimated_count], color='r', marker='o', label=f'Estimated at {target_time}')

# Plot dotted lines to x-axis and y-axis
plt.plot([pd.to_datetime(target_time).hour + pd.to_datetime(target_time).minute / 60, pd.to_datetime(target_time).hour + pd.to_datetime(target_time).minute / 60], [0, estimated_count], linestyle='--', color='r')
plt.plot([0, pd.to_datetime(target_time).hour + pd.to_datetime(target_time).minute / 60], [estimated_count, estimated_count], linestyle='--', color='r')

plt.title(f"{from_stage} - {to_stage}",fontsize=15)
plt.suptitle("Estimated Time")
plt.xlabel('Hour', fontsize=14)
plt.ylabel('Cumulative Boarding', fontsize=14)

plt.xticks(range(24), fontsize=12)
plt.yticks(fontsize=12)
plt.legend()
plt.grid(True)
plt.tight_layout()

# Set x-axis and y-axis limits to ensure lines touch the axis
plt.xlim(0, 24)
plt.ylim(0, max(df['Cumulative Boarding']))
plt.show()

print(estimated_count)