# deck,deck_name in dict.items()

In [1339]:
import pandas as pd
import numpy as np

df = pd.read_csv('pre_processed.csv')
df.Number = df.Number.astype('Int64')



In [1340]:
original = pd.read_csv('pre_processed.csv')

In [1341]:

def fill_deck_and_side_from_cabin(df):
    # Define a mask to identify rows where 'Number' is NaN and 'Cabin' is not NaN
    mask = df['Number'].isna() & df['Cabin'].notna()
    
    # Use the mask to update only the filtered rows, converting types appropriately
    df.loc[mask, ['Deck', 'Number', 'Side']] = df.loc[mask, 'Cabin'].apply(
        lambda x: pd.Series({
            'Deck': x.split('/')[0],
            'Number': int(x.split('/')[1]),  # Explicit conversion to integer here
            'Side': x.split('/')[2]
        })
    )

    return df

In [1342]:
decks_by_planet = {
    'Earth':['E','F','G'],
    'Europa': ['A','B','C','D','E','T'],
    'Mars': ['D','E','F']
}

decks_by_planet_no_bills = {
    'Earth':['G'],
    'Europa':['B'],
    'Mars': ['E','F']
}

planet_by_deck = {
    'A':['Europa'],'B':['Europa'],'C':['Europa'],'D':['Europa','Mars'],'E':['Europa','Mars','Earth'],
    'F':['Earth','Mars'],'G':['Earth'],'T':['Europa']
}

homeplanets = ['Earth', 'Europa', 'Mars']

all_cabin_sides = ['P','S']

all_cabin_decks = list(df.dropna(subset = ['Deck']).Deck.unique())


In [1343]:
def multiple_decks_in_group(df,row):
    return len(df[df.Group == row.Group].dropna(subset ='Deck').Deck.unique()) > 1
        

In [1344]:
def fill_potential_decks(df):
    
    def func_potential_decks(row):
        if pd.isna(row.Cabin):
            if row.Bills == 0 and not pd.isna(row.HomePlanet):
                if len(df[df.Group == row.Group].dropna(subset = 'Deck').Deck.unique()) > 1:
                    return decks_by_planet_no_bills[row.HomePlanet]
                
            if not pd.isna(row.HomePlanet):
                return decks_by_planet[row.HomePlanet]
            
            else:
                return all_cabin_decks
            
    df['potential_decks'] = df.apply(func_potential_decks, axis = 1)
    return df

In [1345]:
def fill_potential_sides(df):
    
    def func_potential_sides(row):
        if pd.isna(row.Cabin):
            if row.GroupSize > 1:
                group = df[df.Group == row.Group].dropna(subset = 'Side')
                if len(group) > 0:
                    return [group.iloc[0].Side]
            return ['P','S']
        
    df['potential_sides'] = df.apply(func_potential_sides,axis = 1)
    return df

In [1346]:
df = fill_potential_decks(df)
df = fill_potential_sides(df)

In [1347]:

                    
def rooms_to_fill(df):
    rooms = {}
    for deck in all_cabin_decks:
        rooms[deck] = {'P': [], 'S': []} 
        for side in all_cabin_sides:
            rooms_seen = df[(df.Deck == deck) & (df.Side == side) & (df.Number.notna())].Number.astype(int).tolist()
            largest_room_number = max(rooms_seen, default=-1) 

            for i in range(largest_room_number + 1):
                if i not in rooms_seen:
                    rooms[deck][side].append(f"{deck}/{i}/{side}")

    return rooms
           

In [1348]:



def each_room_per_passenger(df):
    passenger_options = {}

    # Filter passengers without cabins
    passengers_without_cabin = df[df['Cabin'].isna()]

    for index, passenger in passengers_without_cabin.iterrows():
        potential_options = {deck: {side: [] for side in passenger.potential_sides} for deck in passenger.potential_decks}

        for deck in passenger.potential_decks:
            for side in passenger.potential_sides:
                mask = (df['Deck'] == deck) & (df['Side'] == side)

                # Get the rooms before and after the current passenger index
                before = df.loc[mask & (df.index < index), 'Number'].dropna().unique()
                after = df.loc[mask & (df.index > index), 'Number'].dropna().unique()

                potential_options[deck][side] = [max(before, default=-1), min(after, default=-1)]

        passenger_options[index] = potential_options

    return passenger_options





# all imputes

In [1353]:
def all_imputes(df):
    df = solo_group_one_option(df)
    df = no_free_rooms_so_shares(df)
    df = only_passenger_that_fits(df)
    return df
    

In [1354]:
df = all_imputes(df)
df.isna().sum()

PassengerId            0
HomePlanet            13
CryoSleep            310
Cabin                 37
Destination          274
Age                  270
VIP                  296
RoomService          263
FoodCourt            289
ShoppingMall         306
Spa                  284
VRDeck               268
Name                 294
Set                    0
Transported         4277
Group                  0
GroupNumber            0
Deck                  37
Number                37
Side                  37
FirstName            294
LastName             294
GroupSize              0
Bills                785
potential_decks    12671
potential_sides    12671
dtype: int64

# solo group and only one room that fits

In [1322]:
def solo_group_one_option(df):
    potential_options = each_room_per_passenger(df)

    for index in potential_options.keys():
        if df.iloc[index].GroupSize != 1:
            continue
        empty_cabin_options_for_passenger = []
        
        refresh_options = False
        
        for deck, sides in potential_options[index].items():
            for side, numbers in sides.items():
                if numbers[0] + 2 <= numbers[1] and numbers[0] >= 0:
                    empty_cabin_options_for_passenger.append(f"{deck}/{numbers[0] + 1}/{side}")
                    if numbers[0] + 2 < numbers[1]:
                        refresh_options = True

        if len(empty_cabin_options_for_passenger) == 1:
            df.loc[index, 'Cabin'] = empty_cabin_options_for_passenger[0]
            if refresh_options:
                df = fill_deck_and_side_from_cabin(df)
                potential_options = each_room_per_passenger(df)
                

    return fill_deck_and_side_from_cabin(df)


In [1323]:
df = solo_group_one_option(df)

In [1324]:
df.isna().sum()

PassengerId            0
HomePlanet            13
CryoSleep            310
Cabin                 40
Destination          274
Age                  270
VIP                  296
RoomService          263
FoodCourt            289
ShoppingMall         306
Spa                  284
VRDeck               268
Name                 294
Set                    0
Transported         4277
Group                  0
GroupNumber            0
Deck                  40
Number                40
Side                  40
FirstName            294
LastName             294
GroupSize              0
Bills                785
potential_decks    12671
potential_sides    12671
dtype: int64

# no free rooms so has to share

In [1325]:
def no_free_rooms_so_shares(df):
    potential_options = each_room_per_passenger(df)
    
    for index in potential_options.keys():
        empty_cabin_options_for_passenger = 0
        
        for deck in potential_options[index]:
            for side in potential_options[index][deck]:
                numbers = potential_options[index][deck][side]
                if numbers[0] + 2 <= numbers[1] and numbers[0] >= 0:
                    empty_cabin_options_for_passenger += 1
                    
        if empty_cabin_options_for_passenger == 0:
            passenger = df.loc[index]
            potential_cabins = df[(df['Group'] == passenger['Group']) &
                                  (df.index != index) &
                                  (df['Deck'].isin(passenger['potential_decks'])) &
                                  (df['Side'].isin(passenger['potential_sides']))]['Cabin'].unique()
            if len(potential_cabins) == 1:
                df.loc[index, 'Cabin'] = potential_cabins[0]
    return fill_deck_and_side_from_cabin(df)
    


In [1326]:
df = no_free_rooms_so_shares(df)

In [1327]:
df.isna().sum()

PassengerId            0
HomePlanet            13
CryoSleep            310
Cabin                 39
Destination          274
Age                  270
VIP                  296
RoomService          263
FoodCourt            289
ShoppingMall         306
Spa                  284
VRDeck               268
Name                 294
Set                    0
Transported         4277
Group                  0
GroupNumber            0
Deck                  39
Number                39
Side                  39
FirstName            294
LastName             294
GroupSize              0
Bills                785
potential_decks    12671
potential_sides    12671
dtype: int64

# only passenger that can take that cabin

In [1328]:

def rooms_to_fill(df):
    rooms = {}
    for deck in all_cabin_decks:
        rooms[deck] = {'P': [], 'S': []} 
        for side in all_cabin_sides:
            rooms_seen = df[(df.Deck == deck) & (df.Side == side) & (df.Number.notna())].Number.astype(int).tolist()
            largest_room_number = max(rooms_seen, default=-1) 

            for i in range(largest_room_number + 1):
                if i not in rooms_seen:
                    rooms[deck][side].append(f"{deck}/{i}/{side}")

    return rooms
           

In [1329]:
def only_passenger_that_fits(df):
    free_rooms_dict = rooms_to_fill(df)
    free_passengers = each_room_per_passenger(df)
    for deck,deck_items in free_rooms_dict.items():
        for side,side_items in deck_items.items():
            for cabin in side_items:
                if cabin == 'E/49/P':
                    print('Here')
                passenger_options_for_empty_cabins = []
                for passenger_ind,passenger in free_passengers.items():
                    if deck not in passenger:
                        continue
                    if side not in passenger[deck]:
                        continue
                    number_range = passenger[deck][side]
                    if number_range[0] + 1 == int(cabin.split("/")[1]) == number_range[1] - 1:
                        passenger_options_for_empty_cabins.append(passenger_ind)
                if cabin == 'E/49/P':
                    print(passenger_options_for_empty_cabins)
                        
                if len(passenger_options_for_empty_cabins) == 1:
                    passenger_to_fill_ind = passenger_options_for_empty_cabins[0]
                    if cabin == 'E/49/P':
                        print("fin",passenger_options_for_empty_cabins)
                    
                    df.loc[passenger_to_fill_ind,'Cabin'] = cabin
                    del free_passengers[passenger_to_fill_ind]
                    
                            
    return fill_deck_and_side_from_cabin(df)
    
    
                    
                
        
        
    

In [1330]:
df = only_passenger_that_fits(df)

In [1331]:
df.isna().sum()

PassengerId            0
HomePlanet            13
CryoSleep            310
Cabin                 37
Destination          274
Age                  270
VIP                  296
RoomService          263
FoodCourt            289
ShoppingMall         306
Spa                  284
VRDeck               268
Name                 294
Set                    0
Transported         4277
Group                  0
GroupNumber            0
Deck                  37
Number                37
Side                  37
FirstName            294
LastName             294
GroupSize              0
Bills                785
potential_decks    12671
potential_sides    12671
dtype: int64

# stop

In [1332]:

df.loc[1429,'Cabin'] = 'E/58/P'
df.loc[8413,'Cabin'] = 'A/57/P'
df.loc[9265,'Cabin'] = 'F/1267/S'
df.loc[9267,'Cabin'] = 'F/1267/S'

df.loc[12892,'Cabin'] = 'F/1785/S' # maybe only one is from this room and the other is joined in the other room
df.loc[12893,'Cabin'] = 'F/1785/S'

df = fill_deck_and_side_from_cabin(df)


# workings


In [1333]:
def all_cabin_options_for_each_row(df):
    count = 0
    for index, passenger in df[df.Cabin.isna()].iterrows():
        print("\nindex", index)
        print("passenger",passenger.PassengerId)
        print("GroupSize", passenger.GroupSize)
        options = []
        for deck in passenger.potential_decks:
            for side in passenger.potential_sides:
                
                before_slice = df.iloc[:index]
                after_slice = df.iloc[index+1:] 
                top_room_number_before = np.max(before_slice[(before_slice.Deck == deck) & (before_slice.Side == side)].Number)
                smallest_room_number_after = np.min(after_slice[(after_slice.Deck == deck) & (after_slice.Side == side)].Number)
                if pd.isna(top_room_number_before) or pd.isna(smallest_room_number_after):
                        continue
                if top_room_number_before + 1 != smallest_room_number_after:
                    
                    if top_room_number_before == smallest_room_number_after:
                        continue
                    else:
                        options.append([deck,side,top_room_number_before,smallest_room_number_after])
        print(options)
        print(passenger.potential_sides)
        if len(options) == 0:
            count += 1
    print(count)


In [1334]:
all_cabin_options_for_each_row(df)


index 404
passenger 0293_01
GroupSize 1
[['B', 'P', 12, 14], ['C', 'S', 12, 14]]
['P', 'S']

index 421
passenger 0310_01
GroupSize 1
[['B', 'P', 12, 14], ['C', 'S', 12, 14]]
['P', 'S']

index 479
passenger 0348_02
GroupSize 2
[['E', 'P', 19, 22]]
['P']

index 505
passenger 0364_02
GroupSize 2
[['E', 'P', 19, 22]]
['P']

index 517
passenger 0374_02
GroupSize 2
[['E', 'P', 19, 22]]
['P']

index 1466
passenger 1041_01
GroupSize 1
[['C', 'S', 39, 41], ['D', 'S', 35, 37]]
['P', 'S']

index 1543
passenger 1095_01
GroupSize 1
[['C', 'S', 39, 41], ['D', 'S', 35, 37]]
['P', 'S']

index 2442
passenger 1709_03
GroupSize 7
[]
['S']

index 2970
passenger 2092_03
GroupSize 5
[]
['S']

index 3529
passenger 2513_01
GroupSize 1
[['E', 'P', 149, 151], ['F', 'P', 518, 520]]
['P', 'S']

index 3530
passenger 2514_01
GroupSize 1
[['E', 'P', 149, 151], ['F', 'P', 518, 520]]
['P', 'S']

index 4569
passenger 3287_02
GroupSize 3
[]
['S']

index 4751
passenger 3411_02
GroupSize 7
[]
['S']

index 5016
passenger 

In [1335]:
df.isna().sum()

PassengerId            0
HomePlanet            13
CryoSleep            310
Cabin                 31
Destination          274
Age                  270
VIP                  296
RoomService          263
FoodCourt            289
ShoppingMall         306
Spa                  284
VRDeck               268
Name                 294
Set                    0
Transported         4277
Group                  0
GroupNumber            0
Deck                  31
Number                31
Side                  31
FirstName            294
LastName             294
GroupSize              0
Bills                785
potential_decks    12671
potential_sides    12671
dtype: int64

# compare to old one

In [1336]:
df_to_comp = pd.read_csv('31remaining.csv')

In [1355]:
def comp(df1,df2):
    for index,row in df1.iterrows():
        if pd.isna(df1.iloc[index].Cabin) and pd.isna(df2.iloc[index].Cabin):
            continue
        if df1.iloc[index].Cabin != df2.iloc[index].Cabin:
            print(df1.iloc[index].Cabin, df2.iloc[index].Cabin,index)

comp(df,df_to_comp)
        

nan E/58/P 1429
nan A/57/P 8413
nan F/1267/S 9265
nan F/1267/S 9267
nan F/1785/S 12892
nan F/1785/S 12893
