# Converts Data into Format for AMPL .dat 

In [1]:
import pandas as pd
import numpy as np

In [2]:
#Not required for the generation of output text file
from tqdm import tqdm_notebook

## Step 1: Resource Parameter

* Money - the total amount of money that can be spent all counties
* Increment - the level of precision that the model will consider, ex: increment = 100 means the money will spend either 0,100,200,300,... per county
* Limit - Max that the model can spend on a county. Increment should evenly divide this number. Can be tuned later to adjust for computation time without changing optimal value

In [3]:
money = 100000000
increment = 10000
limit = 500000

## Step 2: States & Entry Cost

Read in CSV File with col1 as the states and col2 as the associated entry cost for that state

In [4]:
state = pd.read_csv("Text Files/StatesCost.csv")

In [5]:
#state.head()

## Step 3: Counties & Characteristics

Read in CSV File with county characteristics

In [6]:
county = pd.read_csv("Text Files/CountyChar.csv")

In [7]:
#county.head()

## Step 4: Generate Pairs Parameter

In [8]:
pairs = pd.DataFrame(county[county.columns[0]])

In [9]:
#function to check if state is appended to the end of county
def checkin(s_name, county):
    if "," + s_name in county:
        return 1
    else:
        return 0

In [10]:
#removes commas
def namefix(county):
    return county.replace(',','')

In [11]:
#removes spaces
def spacefix(county):
    return county.replace(' ','')

In [12]:
#removes '
def apfix(county):
    return county.replace("'", '')

In [13]:
#creates the pairing using the above functions
pairs[pairs.columns[0]] = pairs[pairs.columns[0]].apply(apfix)
county[county.columns[0]] = county[county.columns[0]].apply(apfix)
pairs[pairs.columns[0]] = pairs[pairs.columns[0]].apply(spacefix)
county[county.columns[0]] = county[county.columns[0]].apply(spacefix)
for state_name in state[state.columns[0]]:
    pairs[state_name] = np.vectorize(checkin)(state_name, county[county.columns[0]])
pairs[pairs.columns[0]] = pairs[pairs.columns[0]].apply(namefix)
county[county.columns[0]] = county[county.columns[0]].apply(namefix)

In [14]:
#pairs.head()

## Step 5: Run This

In [15]:
#Uncomment an run this cell if the first time using this file or if you want the output file to have a different name
### %%writefile output.txt

In [16]:
#generates file
with open("output.txt", "w") as f:
    f.write("param money := " + str(money) + ";")
    f.write("\n")
    f.write("param increment := " + str(increment) + ";")
    f.write("\n")
    f.write("param limit := " + str(limit) + ";")
    f.write("\n")
    
    f.write("set States := " )
    f.write("\n")
    for i in state.index:
        f.write(state[state.columns[0]][i])
        f.write("\n")
    f.write(";")
    f.write("\n")
    
    f.write("param entry_cost:=")
    f.write("\n")
    for i in state.index:
        f.write(state[state.columns[0]][i] + "     " + str(state[state.columns[1]][i]))
        f.write("\n")
    f.write(";")
    f.write("\n")
    
    f.write("set Counties := " )
    f.write("\n")
    for i in county.index:
        f.write(county[county.columns[0]][i])
        f.write("\n")
    f.write(";")
    f.write("\n")
    
    for col_name in county.columns:
        if col_name != county.columns[0]:
            f.write("param " + col_name + ":=")
            for i in county.index:
                f.write(county[county.columns[0]][i] + "     " + str(county[col_name][i]))
                f.write("\n")
            f.write(";")
            f.write("\n")
            
    f.write("param Pairs: ")
    for i in state.index:
        f.write(state[state.columns[0]][i] + " ")
    f.write(":=")
    f.write("\n")
    for i in pairs.index:
        for col in pairs.columns:
            f.write(str(pairs[col][i]))
            f.write(" ")
        f.write("\n")
    f.write(";")

# Determine Basline

In [17]:
#sample benefit function that should match whats being used in the .mod file
def reduc(c1,c2,c3,i):
    return c3 * 1/(1+c1*(1.001)**(-c2*i))

In [18]:
df = pd.DataFrame()

Baselines at various levels:  
n is the number of states to greedily choose  
*Automate choice of n  
*Make code more concise

In [19]:
df['County'] = county['County']
n=1850
new_col = []
for i in range(df.shape[0]):
     new_col.append(reduc(county['Char_1'][i],county['Char_2'][i],county['Char_3'][i],10_000))
df['50k']=new_col
sum(df['50k'].sort_values(ascending=False)[0:n])

1196142.0385216908

In [20]:
new_col = []
n=940
for i in range(df.shape[0]):
     new_col.append(reduc(county['Char_1'][i],county['Char_2'][i],county['Char_3'][i],100_000))
df['100k']=new_col
sum(df['100k'].sort_values(ascending=False)[0:n])

1398429.8144041542

In [21]:
new_col = []
n=370
for i in range(df.shape[0]):
     new_col.append(reduc(county['Char_1'][i],county['Char_2'][i],county['Char_3'][i],250_000))
df['250k']=new_col
sum(df['250k'].sort_values(ascending=False)[0:n])

2415905.552594636

In [22]:
new_col = []
n=188
for i in range(df.shape[0]):
     new_col.append(reduc(county['Char_1'][i],county['Char_2'][i],county['Char_3'][i],500_000))
df['500k']=new_col
sum(df['500k'].sort_values(ascending=False)[0:188])

9816332.625105586

In [23]:
new_col = []
n=120
for i in range(df.shape[0]):
     new_col.append(reduc(county['Char_1'][i],county['Char_2'][i],county['Char_3'][i],750_000))
df['750k']=new_col
sum(df['750k'].sort_values(ascending=False)[0:n])

17326435.222843345

In [24]:
new_col = []
n=94
for i in range(df.shape[0]):
     new_col.append(reduc(county['Char_1'][i],county['Char_2'][i],county['Char_3'][i],1_000_000))
df['1m']=new_col
sum(df['1m'].sort_values(ascending=False)[0:n])

21495294.035349485