In [4]:
import pandas as pd
def map_values(data):
    """
    Mapper function that emits tuples of column/row with associated values.
    """
    # Emit column, value and row, value
    for _, row in data.iterrows():
        yield (row['Column'], row['Value'])
        yield (row['Row'], row['Value'])

def reduce_values(key, values, find_min=False):
    """
    Reducer function that finds the maximum value for a given key (column or row).
    If find_min is True, finds the minimum value instead.
    """
    func = min if find_min else max
    extreme_value = func(values)
    return key, extreme_value

def map_reduce(data, find_min=False):
    """
    MapReduce process that applies the map_values and reduce_values functions.
    """
    intermediate = {}
    # Map step
    for key, value in map_values(data):
        if key not in intermediate:
            intermediate[key] = []
        intermediate[key].append(value)

    # Sort step (not needed for max/min but typically part of MapReduce)
    sorted_intermediate = sorted(intermediate.items())

    # Reduce step
    output = {}
    for key, values in sorted_intermediate:
        reduced_key, reduced_value = reduce_values(key, values, find_min)
        output[reduced_key] = reduced_value

    return output

# Output format function
def format_output(output):
    """
    Format the output according to the assignment requirements for Part 1.
    """
    formatted_output = []
    for key, value in output.items():
        formatted_output.append(f'"{key}", {value}')
    return '\n'.join(formatted_output)

In [5]:
df = pd.read_csv('cs4650hw1.dat', names=['Column', 'Row', 'Value'])


# For Part 1
column_max = map_reduce(df[df['Column'].isin(list('ABCDEFGHIJ'))])  # Max for columns
row_min = map_reduce(df[df['Row'].isin(list('KLMNOPQRST'))], find_min=True)  # Min for rows

# Merge the two dictionaries for final output
output_part_1 = {**column_max, **row_min}

# Format the output for display
formatted_output = format_output(output_part_1)
print(formatted_output)

"A", 3
"B", 3
"C", 0
"D", 19
"E", 0
"F", 2
"G", 0
"H", 0
"I", 5
"J", 6
"K", 0
"L", 3
"M", 0
"N", 13
"O", 1
"P", 12
"Q", 0
"R", 2
"S", 5
"T", 2


In [14]:
def map_values_with_location(data):
    """
    Mapper function that emits tuples of column/row with associated values and locations.
    """
    # Emit column, (value, row) and row, (value, column)
    for _, row in data.iterrows():
        yield (row['Column'], (row['Value'], row['Row']))
        yield (row['Row'], (row['Value'], row['Column']))

def reduce_values_with_location(key, values, find_min=False):
    """
    Reducer function that finds the maximum value for a given key (column or row) and an example location.
    If find_min is True, finds the minimum value instead.
    """
    func = min if find_min else max
    if find_min:
        extreme_value, example_location = min(values, key=lambda x: x[0])
    else:
        extreme_value, example_location = max(values, key=lambda x: x[0])
    
    return key, {"value": extreme_value, "example": example_location}

def map_reduce_with_location(data, find_min=False):
    """
    MapReduce process that applies the map_values_with_location and reduce_values_with_location functions.
    """
    intermediate = {}
    # Map step
    for key, value_location in map_values_with_location(data):
        if key not in intermediate:
            intermediate[key] = []
        intermediate[key].append(value_location)

    # Sort step (not needed for max/min but typically part of MapReduce)
    sorted_intermediate = sorted(intermediate.items())

    # Reduce step
    output = {}
    for key, values_locations in sorted_intermediate:
        reduced_key, reduced_value = reduce_values_with_location(key, values_locations, find_min)
        output[reduced_key] = reduced_value

    return output

def format_output_part2(output):
    formatted_output = []
    for key, value in output.items():
        formatted_output.append(f'"{key}", {value}')
    return '\n'.join(formatted_output)

In [15]:
df = pd.read_csv('cs4650hw1.dat', names=['Column', 'Row', 'Value'])
column_max_with_location = map_reduce_with_location(df[df['Column'].isin(list('ABCDEFGHIJ'))])  # Max for columns with location
row_min_with_location = map_reduce_with_location(df[df['Row'].isin(list('KLMNOPQRST'))], find_min=True)  # Min for rows with location

# Merge the two dictionaries for final output
output_part_2 = {**column_max_with_location, **row_min_with_location}

# Format the output for display
formatted_output = format_output_part2(output_part_2)
print(formatted_output)

"A", {'value': 3, 'example': 'T'}
"B", {'value': 3, 'example': 'L'}
"C", {'value': 0, 'example': 'Q'}
"D", {'value': 19, 'example': 'K'}
"E", {'value': 0, 'example': 'M'}
"F", {'value': 2, 'example': 'R'}
"G", {'value': 0, 'example': 'Q'}
"H", {'value': 0, 'example': 'K'}
"I", {'value': 5, 'example': 'S'}
"J", {'value': 6, 'example': 'S'}
"K", {'value': 0, 'example': 'H'}
"L", {'value': 3, 'example': 'H'}
"M", {'value': 0, 'example': 'E'}
"N", {'value': 13, 'example': 'B'}
"O", {'value': 1, 'example': 'H'}
"P", {'value': 12, 'example': 'B'}
"Q", {'value': 0, 'example': 'C'}
"R", {'value': 2, 'example': 'F'}
"S", {'value': 5, 'example': 'I'}
"T", {'value': 2, 'example': 'G'}


In [16]:
def map_values_with_all_locations(data):
    """
    Mapper function that emits tuples of column/row with associated values and all locations.
    """
    # Emit column, (value, row) and row, (value, column)
    for _, row in data.iterrows():
        yield (row['Column'], (row['Value'], row['Row']))
        yield (row['Row'], (row['Value'], row['Column']))

def reduce_values_with_all_locations(key, values, find_min=False):
    """
    Reducer function that finds the maximum/minimum value for a given key (column or row)
    and all example locations where this value was found.
    """
    func = min if find_min else max
    extreme_values = func([value for value, _ in values])
    example_locations = [location for value, location in values if value == extreme_values]
    
    return key, {"value": extreme_values, "examples": example_locations}

def map_reduce_with_all_locations(data, find_min=False):
    """
    MapReduce process that applies the map_values_with_all_locations and reduce_values_with_all_locations functions.
    """
    intermediate = {}
    # Map step
    for key, value_location in map_values_with_all_locations(data):
        if key not in intermediate:
            intermediate[key] = []
        intermediate[key].append(value_location)

    # Reduce step
    output = {}
    for key, values_locations in intermediate.items():
        reduced_key, reduced_value = reduce_values_with_all_locations(key, values_locations, find_min)
        output[reduced_key] = reduced_value

    return output

In [18]:
df = pd.read_csv('cs4650hw1.dat', names=['Column', 'Row', 'Value'])


column_max_with_all_locations = map_reduce_with_all_locations(df[df['Column'].isin(list('ABCDEFGHIJ'))])  # Max for columns with all locations
row_min_with_all_locations = map_reduce_with_all_locations(df[df['Row'].isin(list('KLMNOPQRST'))], find_min=True)  # Min for rows with all locations

# Merge the two dictionaries for final output
output_part_3 = {**column_max_with_all_locations, **row_min_with_all_locations}

# Format the output for display using the previously defined function
formatted_output = format_output_part2(output_part_3)
print(formatted_output)

"A", {'value': 3, 'examples': ['T']}
"K", {'value': 0, 'examples': ['H']}
"L", {'value': 3, 'examples': ['H', 'B']}
"M", {'value': 0, 'examples': ['E']}
"N", {'value': 13, 'examples': ['B']}
"O", {'value': 1, 'examples': ['H']}
"P", {'value': 12, 'examples': ['B']}
"Q", {'value': 0, 'examples': ['C', 'G']}
"R", {'value': 2, 'examples': ['F']}
"S", {'value': 5, 'examples': ['I']}
"T", {'value': 2, 'examples': ['G']}
"B", {'value': 3, 'examples': ['L']}
"C", {'value': 0, 'examples': ['Q']}
"D", {'value': 19, 'examples': ['K', 'N']}
"E", {'value': 0, 'examples': ['M']}
"F", {'value': 2, 'examples': ['R']}
"G", {'value': 0, 'examples': ['Q']}
"H", {'value': 0, 'examples': ['K']}
"I", {'value': 5, 'examples': ['S']}
"J", {'value': 6, 'examples': ['S']}
