In [1]:
import pandas as pd
from io import StringIO

metro_data = """Metro Line,Train Model,Vagon Number,Capacity
M1A,ABB Group,4,240
M1B,ABB Group,4,240
M2,Hyundai Rotem,8,480
M3,Alstom Metropolis,"4/8",240
M4,CAF,"4/8",240
M5,CAF (Driverless),6,360
M6,Alstom,4,240
M7,Hyundai Rotem (Driverless),4,240
M8,Hyundai Rotem (Driverless),4,40
M9,Alstom Metropolis,4,240
M11,CRRC Zhuzhou,,240"""

metro_df = pd.read_csv(StringIO(metro_data))

station_df = pd.read_csv('/Users/ahsenbeyzaozkul/SPRING2025/YZV202E/AnotherProject/Notebooks/Data/istanbulmetrostations.csv')  
transaction_df = pd.read_csv('/Users/ahsenbeyzaozkul/SPRING2025/YZV202E/AnotherProject/Notebooks/Data/output/final/finalseptemberhourlydata.csv') 

metro_df['Vagon Number'] = metro_df['Vagon Number'].fillna(4)  # Default for M11
metro_df['Capacity'] = metro_df['Capacity'].fillna(240)  # Default for M11
metro_df['Capacity'] = metro_df['Capacity'].astype(int)

terminal_transfer_stations = {
    'M1A': ['Yenikapı', 'Kirazlı-Bağcılar'],
    'M1B': ['Yenikapı', 'Kirazlı-Bağcılar'],
    'M2': ['Yenikapı'],
    'M3': ['Kirazlı-Bağcılar'],
    'M6': ['Levent'],
    'M7': ['Şişli-Mecidiyeköy', 'Mahmutbey'],
    'M11': ['Gayrettepe']
}

transfer_non_terminal_stations = {
    'M2': ['Şişli-Mecidiyeköy', 'Levent', 'Gayrettepe'],
    'M3': ['Mahmutbey', 'İkitelli Sanayi'],
    'M4': ['Kozyatağı'],
    'M5': ['Dudullu'],
    'M7': ['Kağıthane'],
    'M8': ['Dudullu', 'Kozyatağı'],
    'M9': ['Yenibosna'],
    'M11': ['Kağıthane']
}

transaction_agg = transaction_df.groupby(['station_poi_desc_cd', 'line_name', 'transition_date', 'transition_hour'])['number_of_passenger'].sum().reset_index()
transaction_agg.rename(columns={'station_poi_desc_cd': 'Station', 'line_name': 'Line'}, inplace=True)

def calculate_capacity(line, stations, max_capacity, direction, date, hour):
    current_capacity = max_capacity
    capacities = []
    
    for i, station in enumerate(stations):
        is_terminal = (i == 0 or i == len(stations) - 1 or
                       (line in terminal_transfer_stations and station in terminal_transfer_stations[line]))
        
        if is_terminal:
            current_capacity = max_capacity  
        
        entry_data = transaction_agg[
            (transaction_agg['Line'] == line) & 
            (transaction_agg['Station'] == station) & 
            (transaction_agg['transition_date'] == date) & 
            (transaction_agg['transition_hour'] == hour)
        ]
        entries = entry_data['number_of_passenger'].iloc[0] if not entry_data.empty else 0
        
        capacities.append({
            'Line': line,
            'Station': station,
            'Direction': direction,
            'transition_date': date,
            'transition_hour': hour,
            'number_of_passenger': entries,
            'RemainingCapacity': current_capacity
        })
        
        if i < len(stations) - 1:
            passengers = max_capacity - current_capacity
            is_transfer = line in transfer_non_terminal_stations and station in transfer_non_terminal_stations[line]
            exit_rate = 0.45 if is_transfer else 0.22
            exiting = passengers * exit_rate
            current_capacity += exiting
            
            current_capacity = max(0, current_capacity - entries)  
            current_capacity = min(max_capacity, current_capacity)  
    
    return capacities

results = []
for line in station_df['Line'].unique():
    forward_stations = station_df[station_df['Line'] == line]['Station'].tolist()
    reverse_stations = forward_stations[::-1]
    max_capacity = metro_df[metro_df['Metro Line'] == line]['Capacity'].iloc[0]
    
    line_transactions = transaction_agg[transaction_agg['Line'] == line]
    unique_dates_hours = line_transactions[['transition_date', 'transition_hour']].drop_duplicates()
    
    for _, row in unique_dates_hours.iterrows():
        date, hour = row['transition_date'], row['transition_hour']
        # Forward direction
        results.extend(calculate_capacity(line, forward_stations, max_capacity, f"{forward_stations[0]} to {forward_stations[-1]}", date, hour))
        # Reverse direction
        results.extend(calculate_capacity(line, reverse_stations, max_capacity, f"{reverse_stations[0]} to {reverse_stations[-1]}", date, hour))

result_df = pd.DataFrame(results)

result_df['RemainingCapacity'] = result_df['RemainingCapacity'].round(2)

result_df.to_csv('/Users/ahsenbeyzaozkul/SPRING2025/YZV202E/AnotherProject/Notebooks/Data/output/final/metro_capacities_updated.csv', index=False)

print("Sample of calculated capacities (first 20 rows):")
print(result_df.head(20))
print("\nFull results saved to 'metro_capacities_updated.csv'")

Sample of calculated capacities (first 20 rows):
   Line                    Station              Direction transition_date  \
0    M2                  Hacıosman  Hacıosman to Yenikapı      2024-09-01   
1    M2                Darüşşafaka  Hacıosman to Yenikapı      2024-09-01   
2    M2         Atatürk Oto Sanayi  Hacıosman to Yenikapı      2024-09-01   
3    M2                Ayazağa-İTÜ  Hacıosman to Yenikapı      2024-09-01   
4    M2                 Seyrantepe  Hacıosman to Yenikapı      2024-09-01   
5    M2           Sanayi Mahallesi  Hacıosman to Yenikapı      2024-09-01   
6    M2                   4.Levent  Hacıosman to Yenikapı      2024-09-01   
7    M2                     Levent  Hacıosman to Yenikapı      2024-09-01   
8    M2                 Gayrettepe  Hacıosman to Yenikapı      2024-09-01   
9    M2          Şişli-Mecidiyeköy  Hacıosman to Yenikapı      2024-09-01   
10   M2                   Osmanbey  Hacıosman to Yenikapı      2024-09-01   
11   M2                    