In [76]:
# read in CSVs of requirements, ratings, and responses as a dataframe
# Cobb-specific
import numpy as np
import pandas as pd

def ingestRFP(file, start_row = 0, end_row = 999, req_i = 2, rat_i = 5, resp_i =6):
    '''
    filelist is a file path to a CSV for a proposal
    req_i is the column index for requirements
    resp_i is the column index for responses
    start_row is the row to start reading on
    end_row is the row to stop reading on
    outputs a dataframe of requirement-response pairs
    
    '''
    
    # NOTE: in Excel, save as CSV UTF-8
    rfp_df = pd.read_table(file, sep = ",", skipinitialspace = True)
    
    # TODO: stop doing this as a dict; it doesn't maintain the order. 
    # create a stripped dict
    strip_dict = {
        "req" : rfp_df.iloc[start_row:end_row, req_i],
        "rating" : rfp_df.iloc[start_row:end_row, rat_i],
        "resp" : rfp_df.iloc[start_row:end_row, resp_i]
    }
    
    # first rows, then columns
    return pd.DataFrame(strip_dict)



# add date of response
# add Health / Trade / Other
# save as a file 
corpus = ingestRFP('corpus.csv', 7, 11, 2, 6, 7)
# output_response(corpus, corpus)
newRFP = ingestRFP('newRFP.csv', 7, 11, 2, 6, 7)

newRFP

# cps[cps.union.isin(["Union"])]

matchRows = newRFP.req.isin(corpus.req)
matchRows

# 1. exact match to a CSV of requirements with no responses 
# fill the most recent responses
# output to CSV (or XLS)

# 2. score the non-exact matches by some similarity algorithm
# fill the highest scoring responses

# 3. come up with a way of scoring

7      True
8     False
9      True
10    False
Name: req, dtype: bool

In [79]:
def respondToReq(req, corpus):
    '''
    Given a requirement (req.req) and a dataframe 'corpus' of past responses
    Read through the corpus (corpus.req) to find the past requirement that most closely matches the requirement
    Overwrite the response (req.resp) with that requirement's response.
    '''
    
    for index, row in corpus.iterrows():
        if req.req == row.req:
            req.resp = row.resp
    
    return row

def output_response(corpus, RFP):
    '''
    Given a 'corpus' of past answers
    output an RFP response in CSV format
    that duplicates previous responses to all exactly matching requirements 
    match on requirement language
    '''
    
    # read through each line of the RFP
    # using apply
    
    # send the arguments after the first to df.apply() like this:
    filledRFP = RFP.apply(respondToReq, corpus = corpus, axis = "columns") #, corpus)
    # axis = 'columns' tells apply to apply itself to each row. 

    return filledRFP

# (file, start_row = 0, end_row = 999, req_i = 2, rat_i = 5, resp_i =6)

corpus = ingestRFP('C:/Users/ahicken/Documents/proposal_data/naedutf8.csv', 7, 11, 2, 6, 7)
# output_response(corpus, corpus)

newRFP = ingestRFP('C:/Users/ahicken/Documents/proposal_data/adha.csv', 9, 13, 2, 5, 6)

filledRFP = output_response(corpus, newRFP)

filledRFP

Unnamed: 0,req,rating,resp
9,Ability to create child groups to which a user...,S,Each member company will have their own Sub-Po...
10,Ability to create child groups to which a user...,S,Each member company will have their own Sub-Po...
11,Ability to create child groups to which a user...,S,Each member company will have their own Sub-Po...
12,Ability to create child groups to which a user...,S,Each member company will have their own Sub-Po...


In [45]:
 # DataFrame.join(other, on=None, how='left', lsuffix='', rsuffix='', sort=False)[source]

# newRFP.join(corpus, on = 'req', how = 'left' )

# newRFP.join(corpus.set_index(cols), on=cols)



# newRFP

newRFP.merge(corpus, how='left', left_on='req', right_on='req')

Unnamed: 0,req,rating_x,resp_x,rating_y,resp_y
0,Ability to manage registration and access for ...,,,,
1,"Ability to create/configure extended, facilita...",,,,
2,Ability to set enrollment durations for courses,,,,
3,Ability to configure and enforce a certificati...,,,,
