# deck,deck_name in dict.items()

In [723]:
import pandas as pd
import numpy as np

df = pd.read_csv('pre_processed.csv')
df.Number = df.Number.astype('Int64')



In [724]:
original = pd.read_csv('pre_processed.csv')

In [725]:

def fill_deck_and_side_from_cabin(df):
    # Define a mask to identify rows where 'Number' is NaN and 'Cabin' is not NaN
    mask = df['Number'].isna() & df['Cabin'].notna()
    
    # Use the mask to update only the filtered rows, converting types appropriately
    df.loc[mask, ['Deck', 'Number', 'Side']] = df.loc[mask, 'Cabin'].apply(
        lambda x: pd.Series({
            'Deck': x.split('/')[0],
            'Number': int(x.split('/')[1]),  # Explicit conversion to integer here
            'Side': x.split('/')[2]
        })
    )

    return df

In [726]:
decks_by_planet = {
    'Earth':['E','F','G'],
    'Europa': ['A','B','C','D','E','T'],
    'Mars': ['D','E','F']
}

decks_by_planet_no_bills = {
    'Earth':['G'],
    'Europa':['B'],
    'Mars': ['E','F']
}

planet_by_deck = {
    'A':['Europa'],'B':['Europa'],'C':['Europa'],'D':['Europa','Mars'],'E':['Europa','Mars','Earth'],
    'F':['Earth','Mars'],'G':['Earth'],'T':['Europa']
}

homeplanets = ['Earth', 'Europa', 'Mars']

all_cabin_sides = ['P','S']

all_cabin_decks = list(df.dropna(subset = ['Deck']).Deck.unique())


In [727]:
def multiple_decks_in_group(df,row):
    return len(df[df.Group == row.Group].dropna(subset ='Deck').Deck.unique()) > 1
        

In [728]:
def fill_potential_decks(df):
    
    def func_potential_decks(row):
        if pd.isna(row.Cabin):
            if row.Bills == 0 and not pd.isna(row.HomePlanet):
                if len(df[df.Group == row.Group].dropna(subset = 'Deck').Deck.unique()) > 1:
                    return decks_by_planet_no_bills[row.HomePlanet]
                
            if not pd.isna(row.HomePlanet):
                return decks_by_planet[row.HomePlanet]
            
            else:
                return all_cabin_decks
            
    df['potential_decks'] = df.apply(func_potential_decks, axis = 1)
    return df

In [729]:
def fill_potential_sides(df):
    
    def func_potential_sides(row):
        if pd.isna(row.Cabin):
            if row.GroupSize > 1:
                group = df[df.Group == row.Group].dropna(subset = 'Side')
                if len(group) > 0:
                    return [group.iloc[0].Side]
            return ['P','S']
        
    df['potential_sides'] = df.apply(func_potential_sides,axis = 1)
    return df

In [751]:
df = fill_potential_decks(df)
df = fill_potential_sides(df)

In [752]:

                    
def rooms_to_fill(df):
    rooms = {}
    for deck in all_cabin_decks:
        rooms[deck] = {'P': [], 'S': []} 
        for side in all_cabin_sides:
            rooms_seen = df[(df.Deck == deck) & (df.Side == side) & (df.Number.notna())].Number.astype(int).tolist()
            largest_room_number = max(rooms_seen, default=-1) 

            for i in range(largest_room_number + 1):
                if i not in rooms_seen:
                    rooms[deck][side].append(f"{deck}/{i}/{side}")

    return rooms
           

In [753]:
def each_room_per_passenger(df):
    passenger_options = {}
    for index,passenger in df[df.Cabin.isna()].iterrows():
        passenger_options[index] = {}
        for deck in passenger.potential_decks:
            passenger_options[index][deck] = {}
            for side in passenger.potential_sides:
                before = df.loc[(df.index < index) & (df.Deck == deck) & (df.Side == side), 'Number'].dropna().unique()
                after = df.loc[(df.index > index) & (df.Deck == deck) & (df.Side == side), 'Number'].dropna().unique()
                
                if len(before) > 0:
                    passenger_options[index][deck][side] = [max(before)]
                else:
                    passenger_options[index][deck][side] = [-1]
                if len(after) > 0:
                    passenger_options[index][deck][side].append(min(after))
                else:
                    passenger_options[index][deck][side].append(-1)
    return passenger_options

# solo group and only one room that fits

In [754]:
def solo_group_one_option(df):
    options = each_room_per_passenger(df)
    for index in options.keys():
        if df.iloc[index].GroupSize != 1:
            continue
        empty_cabin_options_for_passenger = []
        for deck in options[index]:
            for side in options[index][deck]:
                numbers = options[index][deck][side]
                if numbers[0] + 2 == numbers[1] and numbers[0] >= 0:
                    empty_cabin_options_for_passenger.append(f"{deck}/{numbers[0] + 1}/{side}")
                    if numbers[0] + 2 < numbers[1]:
                        empty_cabin_options_for_passenger.append(f"{deck}/{numbers[0] + 2}/{side}")
        if len(empty_cabin_options_for_passenger) == 1:
            df.loc[index,'Cabin'] = empty_cabin_options_for_passenger[0]
    return fill_deck_and_side_from_cabin(df)

                    
    

In [755]:
df = solo_group_one_option(df)

In [756]:
df.isna().sum()

PassengerId            0
HomePlanet            13
CryoSleep            310
Cabin                 34
Destination          274
Age                  270
VIP                  296
RoomService          263
FoodCourt            289
ShoppingMall         306
Spa                  284
VRDeck               268
Name                 294
Set                    0
Transported         4277
Group                  0
GroupNumber            0
Deck                  34
Number                34
Side                  34
FirstName            294
LastName             294
GroupSize              0
Bills                785
potential_decks    12935
potential_sides    12935
dtype: int64

In [757]:
df.iloc[4233]

PassengerId                  3034_01
HomePlanet                    Europa
CryoSleep                      False
Cabin                         B/98/P
Destination              TRAPPIST-1e
Age                             40.0
VIP                            False
RoomService                      0.0
FoodCourt                        0.0
ShoppingMall                     0.0
Spa                              0.0
VRDeck                           0.0
Name               Dscheat Noxnuther
Set                            Train
Transported                    False
Group                           3034
GroupNumber                        1
Deck                               B
Number                            98
Side                               P
FirstName                    Dscheat
LastName                   Noxnuther
GroupSize                          1
Bills                            0.0
potential_decks                 None
potential_sides                 None
Name: 4233, dtype: object

# no free rooms so has to share

In [758]:
def no_free_rooms_so_shares(df):
    options = each_room_per_passenger(df)
    for index in options.keys():
        empty_cabin_options_for_passenger = 0
        for deck in options[index]:
            for side in options[index][deck]:
                numbers = options[index][deck][side]
                if numbers[0] + 2 <= numbers[1] and numbers[0] >= 0:
                    empty_cabin_options_for_passenger += 1
                    
        if empty_cabin_options_for_passenger == 0:
            passenger = df.loc[index]
            if len(df[(df.Group == passenger.Group) & (df.index != index) & (df.Deck.isin(passenger.potential_decks)) & (df.Side.isin(passenger.potential_sides))].Cabin.unique()) == 1:
                df.loc[index,'Cabin'] = df[(df.Group == df.loc[index].Group) & (df.index != index)& (df.Deck.isin(passenger.potential_decks)) & (df.Side.isin(passenger.potential_sides))].Cabin.iloc[0]
    return fill_deck_and_side_from_cabin(df)
    
        
    


In [759]:
df = no_free_rooms_so_shares(df)

In [760]:
df.isna().sum()

PassengerId            0
HomePlanet            13
CryoSleep            310
Cabin                 33
Destination          274
Age                  270
VIP                  296
RoomService          263
FoodCourt            289
ShoppingMall         306
Spa                  284
VRDeck               268
Name                 294
Set                    0
Transported         4277
Group                  0
GroupNumber            0
Deck                  33
Number                33
Side                  33
FirstName            294
LastName             294
GroupSize              0
Bills                785
potential_decks    12935
potential_sides    12935
dtype: int64

# only passenger that can take that cabin

In [761]:

def rooms_to_fill(df):
    rooms = {}
    for deck in all_cabin_decks:
        rooms[deck] = {'P': [], 'S': []} 
        for side in all_cabin_sides:
            rooms_seen = df[(df.Deck == deck) & (df.Side == side) & (df.Number.notna())].Number.astype(int).tolist()
            largest_room_number = max(rooms_seen, default=-1) 

            for i in range(largest_room_number + 1):
                if i not in rooms_seen:
                    rooms[deck][side].append(f"{deck}/{i}/{side}")

    return rooms
           

In [762]:
def only_passenger_that_fits(df):
    free_rooms_dict = rooms_to_fill(df)
    free_passengers = each_room_per_passenger(df)
    for deck,deck_items in free_rooms_dict.items():
        for side,side_items in deck_items.items():
            for cabin in side_items:
                if cabin == 'E/49/P':
                    print('Here')
                passenger_options_for_empty_cabins = []
                for passenger_ind,passenger in free_passengers.items():
                    if deck not in passenger:
                        continue
                    if side not in passenger[deck]:
                        continue
                    number_range = passenger[deck][side]
                    if number_range[0] + 1 == int(cabin.split("/")[1]) == number_range[1] - 1:
                        passenger_options_for_empty_cabins.append(passenger_ind)
                if cabin == 'E/49/P':
                    print(passenger_options_for_empty_cabins)
                        
                if len(passenger_options_for_empty_cabins) == 1:
                    passenger_to_fill_ind = passenger_options_for_empty_cabins[0]
                    if cabin == 'E/49/P':
                        print("fin",passenger_options_for_empty_cabins)
                    
                    df.loc[passenger_to_fill_ind,'Cabin'] = cabin
                    del free_passengers[passenger_to_fill_ind]
                    
                            
    return fill_deck_and_side_from_cabin(df)
    
    
                    
                
        
        
    

In [763]:
df = only_passenger_that_fits(df)

In [764]:
df.isna().sum()

PassengerId            0
HomePlanet            13
CryoSleep            310
Cabin                 31
Destination          274
Age                  270
VIP                  296
RoomService          263
FoodCourt            289
ShoppingMall         306
Spa                  284
VRDeck               268
Name                 294
Set                    0
Transported         4277
Group                  0
GroupNumber            0
Deck                  31
Number                31
Side                  31
FirstName            294
LastName             294
GroupSize              0
Bills                785
potential_decks    12935
potential_sides    12935
dtype: int64

# stop

In [765]:


df.loc[4233,'Cabin'] = 'B/98/P'
df.loc[4254,'Cabin'] = 'B/99/P'
df.loc[6493,'Cabin'] = 'E/300/S'
df.loc[6514,'Cabin'] = 'E/301/S'
df.loc[12892,'Cabin'] = 'F/1785/S' # maybe only one is from this room and the other is joined in the other room
df.loc[12893,'Cabin'] = 'F/1785/S'
df.loc[9265,'Cabin'] = 'F/1267/S'
df.loc[9267,'Cabin'] = 'F/1267/S'
df.loc[8413,'Cabin'] = 'A/57/P'
df.loc[1429,'Cabin'] = 'E/58/P'

df = fill_deck_and_side_from_cabin(df)


# workings


In [766]:
def all_cabin_options_for_each_row(df):
    count = 0
    for index, passenger in df[df.Cabin.isna()].iterrows():
        print("\nindex", index)
        print("passenger",passenger.PassengerId)
        print("GroupSize", passenger.GroupSize)
        options = []
        for deck in passenger.potential_decks:
            for side in passenger.potential_sides:
                
                before_slice = df.iloc[:index]
                after_slice = df.iloc[index+1:] 
                top_room_number_before = np.max(before_slice[(before_slice.Deck == deck) & (before_slice.Side == side)].Number)
                smallest_room_number_after = np.min(after_slice[(after_slice.Deck == deck) & (after_slice.Side == side)].Number)
                if pd.isna(top_room_number_before) or pd.isna(smallest_room_number_after):
                        continue
                if top_room_number_before + 1 != smallest_room_number_after:
                    
                    if top_room_number_before == smallest_room_number_after:
                        continue
                    else:
                        options.append([deck,side,top_room_number_before,smallest_room_number_after])
        print(options)
        print(passenger.potential_sides)
        if len(options) == 0:
            count += 1
    print(count)


In [767]:
all_cabin_options_for_each_row(df)


index 404
passenger 0293_01
GroupSize 1
[['B', 'P', 12, 14], ['C', 'S', 12, 14]]
['P', 'S']

index 421
passenger 0310_01
GroupSize 1
[['B', 'P', 12, 14], ['C', 'S', 12, 14]]
['P', 'S']

index 479
passenger 0348_02
GroupSize 2
[['E', 'P', 19, 22]]
['P']

index 505
passenger 0364_02
GroupSize 2
[['E', 'P', 19, 22]]
['P']

index 517
passenger 0374_02
GroupSize 2
[['E', 'P', 19, 22]]
['P']

index 1466
passenger 1041_01
GroupSize 1
[['C', 'S', 39, 41], ['D', 'S', 35, 37]]
['P', 'S']

index 1543
passenger 1095_01
GroupSize 1
[['C', 'S', 39, 41], ['D', 'S', 35, 37]]
['P', 'S']

index 2442
passenger 1709_03
GroupSize 7
[]
['S']

index 2970
passenger 2092_03
GroupSize 5
[]
['S']

index 3529
passenger 2513_01
GroupSize 1
[['E', 'P', 149, 151], ['F', 'P', 518, 520]]
['P', 'S']

index 3530
passenger 2514_01
GroupSize 1
[['E', 'P', 149, 151], ['F', 'P', 518, 520]]
['P', 'S']

index 4569
passenger 3287_02
GroupSize 3
[]
['S']

index 4751
passenger 3411_02
GroupSize 7
[]
['S']

index 5016
passenger 

In [768]:
df.isna().sum()

PassengerId            0
HomePlanet            13
CryoSleep            310
Cabin                 31
Destination          274
Age                  270
VIP                  296
RoomService          263
FoodCourt            289
ShoppingMall         306
Spa                  284
VRDeck               268
Name                 294
Set                    0
Transported         4277
Group                  0
GroupNumber            0
Deck                  31
Number                31
Side                  31
FirstName            294
LastName             294
GroupSize              0
Bills                785
potential_decks    12935
potential_sides    12935
dtype: int64

# compare to old one

In [769]:
df_to_comp = pd.read_csv('31remaining.csv')

In [770]:
def comp(df1,df2):
    for index,row in df1.iterrows():
        if pd.isna(df1.iloc[index].Cabin) and pd.isna(df2.iloc[index].Cabin):
            continue
        if df1.iloc[index].Cabin != df2.iloc[index].Cabin:
            print(df1.iloc[index].Cabin, df2.iloc[index].Cabin)

comp(df,df_to_comp)
        