In [3]:
import pandas as pd
import numpy as np

df = pd.read_csv('pre_processed.csv')
df.Number = df.Number.astype('Int64')



In [4]:

def fill_deck_and_side_from_cabin(df):
    # Define a mask to identify rows where 'Number' is NaN and 'Cabin' is not NaN
    mask = df['Number'].isna() & df['Cabin'].notna()
    
    # Use the mask to update only the filtered rows, converting types appropriately
    df.loc[mask, ['Deck', 'Number', 'Side']] = df.loc[mask, 'Cabin'].apply(
        lambda x: pd.Series({
            'Deck': x.split('/')[0],
            'Number': int(x.split('/')[1]),  # Explicit conversion to integer here
            'Side': x.split('/')[2]
        })
    )

    return df

In [5]:
decks_by_planet = {
    'Earth':['E','F','G'],
    'Europa': ['A','B','C','D','E','T'],
    'Mars': ['D','E','F']
}

decks_by_planet_no_bills = {
    'Earth':['G'],
    'Europa':['B'],
    'Mars': ['E','F']
}

planet_by_deck = {
    'A':['Europa'],'B':['Europa'],'C':['Europa'],'D':['Europa','Mars'],'E':['Europa','Mars','Earth'],
    'F':['Earth','Mars'],'G':['Earth'],'T':['Europa']
}

homeplanets = ['Earth', 'Europa', 'Mars']

all_cabin_sides = ['P','S']

all_cabin_decks = list(df.dropna(subset = ['Deck']).Deck.unique())


In [6]:
def multiple_decks_in_group(df,row):
    return len(df[df.Group == row.Group].dropna(subset ='Deck').Deck.unique()) > 1
        

In [7]:
def fill_potential_decks(df):
    
    def func_potential_decks(row):
        if pd.isna(row.Cabin):
            if row.Bills == 0 and not pd.isna(row.HomePlanet):
                if len(df[df.Group == row.Group].dropna(subset = 'Deck').Deck.unique()) > 1:
                    return decks_by_planet_no_bills[row.HomePlanet]
                
            if not pd.isna(row.HomePlanet):
                return decks_by_planet[row.HomePlanet]
            
            else:
                return all_cabin_decks
            
    df['potential_decks'] = df.apply(func_potential_decks, axis = 1)
    return df

In [8]:
def fill_potential_sides(df):
    
    def func_potential_sides(row):
        if pd.isna(row.Cabin):
            if row.GroupSize > 1:
                group = df[df.Group == row.Group].dropna(subset = 'Side')
                if len(group) > 0:
                    return [group.iloc[0].Side]
            return ['P','S']
        
    df['potential_sides'] = df.apply(func_potential_sides,axis = 1)
    return df

In [9]:
df = fill_potential_decks(df)
df = fill_potential_sides(df)

In [10]:

                    
def rooms_to_fill(df):
    rooms = {}
    for deck in all_cabin_decks:
        rooms[deck] = {'P': [], 'S': []} 
        for side in all_cabin_sides:
            rooms_seen = df[(df.Deck == deck) & (df.Side == side) & (df.Number.notna())].Number.astype(int).tolist()
            largest_room_number = max(rooms_seen, default=-1) 

            for i in range(largest_room_number + 1):
                if i not in rooms_seen:
                    rooms[deck][side].append(f"{deck}/{i}/{side}")

    return rooms
           

In [11]:

 
def row_is_compatible_with_cabin(index, row, deck, side, room_number_to_fill):
        if deck not in row.potential_decks or side not in row.potential_sides:
            return False
        
        before = df.loc[(df.index < index) & (df.Deck == deck) & (df.Side == side), 'Number'].dropna().unique()
        after = df.loc[(df.index > index) & (df.Deck == deck) & (df.Side == side), 'Number'].dropna().unique()
        
        if len(before) > 0 and len(after) > 0:
            return max(before) < room_number_to_fill < min(after)
        return False
    
                        
def empty_room_one_compatible(df):
    
    empty_cabins = rooms_to_fill(df)

    for deck, sides in empty_cabins.items():
        for side, cabins in sides.items():
            for cabin in cabins:
                room_number_to_fill = int(cabin.split('/')[1])
                compatible_indices = [
                    index for index, row in df[df.Cabin.isna()].iterrows()
                    if row_is_compatible_with_cabin(index, row, deck, side, room_number_to_fill)
                ]

                if len(compatible_indices) == 1:
                    df.loc[compatible_indices[0], 'Cabin'] = cabin

    return fill_deck_and_side_from_cabin(df)
                
                        

# Trials

In [15]:
def every_option_per_room(df):
    passenger_options = {}
    for index,passenger in df[df.Cabin.isna()].iterrows():
        passenger_options[index] = {}
        for deck in passenger.potential_decks:
            passenger_options[index][deck] = {}
            for side in passenger.potential_sides:
                before = df.loc[(df.index < index) & (df.Deck == deck) & (df.Side == side), 'Number'].dropna().unique()
                after = df.loc[(df.index > index) & (df.Deck == deck) & (df.Side == side), 'Number'].dropna().unique()
                
                if len(before) > 0:
                    passenger_options[index][deck][side] = [max(before)]
                else:
                    passenger_options[index][deck][side] = [-1]
                if len(after) > 0:
                    passenger_options[index][deck][side].append(min(after))
                else:
                    passenger_options[index][deck][side].append(-1)
    return passenger_options


                    
                
        

In [16]:
options = every_option_per_room(df)
options

{15: {'E': {'P': [-1, 0], 'S': [-1, 0]},
  'F': {'P': [2, 3], 'S': [3, 4]},
  'G': {'P': [-1, 0], 'S': [1, 3]}},
 66: {'A': {'S': [0, 1]},
  'B': {'S': [0, 0]},
  'C': {'S': [2, 3]},
  'D': {'S': [1, 2]},
  'E': {'S': [0, 1]},
  'T': {'S': [-1, 0]}},
 137: {'D': {'P': [3, 4], 'S': [1, 2]},
  'E': {'P': [3, 4], 'S': [5, 6]},
  'F': {'P': [19, 21], 'S': [22, 23]}},
 150: {'A': {'P': [-1, 0]},
  'B': {'P': [4, 5]},
  'C': {'P': [1, 2]},
  'D': {'P': [3, 4]},
  'E': {'P': [3, 4]},
  'T': {'P': [-1, 0]}},
 315: {'E': {'P': [10, 11], 'S': [14, 16]},
  'F': {'P': [48, 49], 'S': [42, 43]},
  'G': {'P': [38, 40], 'S': [35, 36]}},
 331: {'D': {'P': [7, 8], 'S': [7, 8]},
  'E': {'P': [10, 11], 'S': [14, 16]},
  'F': {'P': [51, 52], 'S': [45, 46]}},
 336: {'D': {'S': [7, 8]}, 'E': {'S': [14, 16]}, 'F': {'S': [46, 47]}},
 382: {'E': {'P': [14, 15]}, 'F': {'P': [61, 62]}, 'G': {'P': [44, 45]}},
 394: {'A': {'P': [2, 3]},
  'B': {'P': [11, 12]},
  'C': {'P': [6, 7]},
  'D': {'P': [10, 11]},
  'E': {'

# there arent any free rooms for it so has to share


In [32]:
def no_free_rooms_so_shares_2(df):
    free_rooms = rooms_to_fill(df)
    for index,passenger in df[df.Cabin.isna()].iterrows():
        options = False
        for deck in passenger.potential_decks:
            for side in all_cabin_sides:
                if deck not in free_rooms:
                    continue
                if side not in free_rooms[deck]:
                    continue
                potential_room_numbers = free_rooms[deck][side]
                before_slice = df.iloc[:index]
                after_slice = df.iloc[index+1:]
                before_slice = before_slice[(before_slice.Deck == deck) & (before_slice.Side == side)].Number.unique()
                if len(before_slice) > 1:
                    top_room_number_before = max(before_slice)
                else:
                    top_room_number_before = -2
                    
                smallest_room_number_after = min(after_slice[(after_slice.Deck == deck) & (after_slice.Side == side)].Number.unique())
                if top_room_number_before + 1 == smallest_room_number_after:
                    if int(top_room_number_before) + 1 in potential_room_numbers:
                        options = True
        if not options:
            if len(df[(df.Group == passenger.Group) & (df.Deck.isin(passenger.potential_decks))].Cabin.unique()) == 1:
                print(passenger)
    return df
        
                

In [38]:
df4[df4.Cabin.isna()].potential_decks.iloc[0]

"['A', 'B', 'C', 'D', 'E', 'T']"

In [34]:
df4 = no_free_rooms_so_shares_2(df4)

TypeError: only list-like objects are allowed to be passed to isin(), you passed a `str`

In [None]:
def no_free_rooms_so_shares(df):
    for index, passenger in df[df.Cabin.isna()].iterrows():
        options = False
        for deck in passenger.potential_decks:
            for side in all_cabin_sides:
                
                before_slice = df.iloc[:index]
                after_slice = df.iloc[index+1:] 
                top_room_number_before = np.max(before_slice[(before_slice.Deck == deck) & (before_slice.Side == side)].Number)
                smallest_room_number_after = np.min(after_slice[(after_slice.Deck == deck) & (after_slice.Side == side)].Number)
                
                if top_room_number_before + 1 != smallest_room_number_after:
                    if pd.isna(top_room_number_before) and smallest_room_number_after == 0:
                        continue
                    elif top_room_number_before == smallest_room_number_after:
                        continue
                    else:
                        options = True
                        break
            if options:
                break
        if not options:
            other_group_member = df[(df.Group == passenger.Group) & (~df.Cabin.isna()) & (df.Deck.isin(passenger.potential_decks))]
            print(index)
            if len(other_group_member.Cabin.unique()) == 1:
                
                df.loc[index,'Cabin'] = other_group_member.iloc[0].Cabin
                
    df[['Deck','Number','Side']] = df.apply(fill_deck_cabin_side,axis = 1)
    return df
  
            # Slice the DataFrame first and then apply the boolean mask

df2 = no_free_rooms_so_shares(df2)