# Converts Data into Format for AMPL .dat 

In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm_notebook

## Step 1: Resource Parameter

* Money - the total amount of money that can be spent all counties
* Increment - the level of precision that the model will consider, ex: increment = 100 means the money will spend either 0,100,200,300,... per county
* Limit - Max that the model can spend on a county. Increment should evenly divide this number. Can be tuned later to adjust for computation time without changing optimal value

In [2]:
money = 100000000
increment = 10000
limit = 500000

## Step 2: States & Entry Cost

Read in CSV File with col1 as the states and col2 as the associated entry cost for that state

In [3]:
state = pd.read_csv("Text Files/StatesCost.csv")

In [4]:
state.head(30)

Unnamed: 0.1,Unnamed: 0,Entry Cost
0,Alabama,320000
1,Alaska,180000
2,Arizona,260000
3,Arkansas,390000
4,California,360000
5,Colorado,150000
6,Connecticut,370000
7,Delaware,310000
8,DistrictofColumbia,210000
9,Florida,380000


## Step 3: Counties & Characteristics

In [5]:
county = pd.read_csv("Text Files/CountyChar.csv")

In [6]:
county.head()

Unnamed: 0,County,Char_1,Char_2,Char_3
0,"AutaugaCounty,Alabama",274,0.00866,35000
1,"BaldwinCounty,Alabama",297,0.00872,197000
2,"BarbourCounty,Alabama",111,0.00189,30000
3,"BibbCounty,Alabama",293,0.00666,173000
4,"BlountCounty,Alabama",292,0.00846,47000


## Step 4: Generate Pairs Parameter

In [7]:
pairs = pd.DataFrame(county[county.columns[0]])

In [8]:
def checkin(s_name, county):
    if "," + s_name in county:
        return 1
    else:
        return 0

In [9]:
def namefix(county):
    return county.replace(',','')

In [10]:
def spacefix(county):
    return county.replace(' ','')

In [11]:
def apfix(county):
    return county.replace("'", '')

In [12]:
pairs[pairs.columns[0]] = pairs[pairs.columns[0]].apply(apfix)
county[county.columns[0]] = county[county.columns[0]].apply(apfix)
pairs[pairs.columns[0]] = pairs[pairs.columns[0]].apply(spacefix)
county[county.columns[0]] = county[county.columns[0]].apply(spacefix)
for state_name in state[state.columns[0]]:
    pairs[state_name] = np.vectorize(checkin)(state_name, county[county.columns[0]])
pairs[pairs.columns[0]] = pairs[pairs.columns[0]].apply(namefix)
county[county.columns[0]] = county[county.columns[0]].apply(namefix)

In [13]:
pairs.head()

Unnamed: 0,County,Alabama,Alaska,Arizona,Arkansas,California,Colorado,Connecticut,Delaware,DistrictofColumbia,...,SouthDakota,Tennessee,Texas,Utah,Vermont,Virginia,Washington,WestVirginia,Wisconsin,Wyoming
0,AutaugaCountyAlabama,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,BaldwinCountyAlabama,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,BarbourCountyAlabama,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,BibbCountyAlabama,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,BlountCountyAlabama,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Step 5: Run This Cell

In [14]:
# %%writefile output.txt

In [15]:
with open("output.txt", "w") as f:
    f.write("param money := " + str(money) + ";")
    f.write("\n")
    f.write("param increment := " + str(increment) + ";")
    f.write("\n")
    f.write("param limit := " + str(limit) + ";")
    f.write("\n")
    
    f.write("set States := " )
    f.write("\n")
    for i in state.index:
        f.write(state[state.columns[0]][i])
        f.write("\n")
    f.write(";")
    f.write("\n")
    
    f.write("param entry_cost:=")
    f.write("\n")
    for i in state.index:
        f.write(state[state.columns[0]][i] + "     " + str(state[state.columns[1]][i]))
        f.write("\n")
    f.write(";")
    f.write("\n")
    
    f.write("set Counties := " )
    f.write("\n")
    for i in county.index:
        f.write(county[county.columns[0]][i])
        f.write("\n")
    f.write(";")
    f.write("\n")
    
    for col_name in county.columns:
        if col_name != county.columns[0]:
            f.write("param " + col_name + ":=")
            for i in county.index:
                f.write(county[county.columns[0]][i] + "     " + str(county[col_name][i]))
                f.write("\n")
            f.write(";")
            f.write("\n")
            
    f.write("param Pairs: ")
    for i in state.index:
        f.write(state[state.columns[0]][i] + " ")
    f.write(":=")
    f.write("\n")
    for i in pairs.index:
        for col in pairs.columns:
            f.write(str(pairs[col][i]))
            f.write(" ")
        f.write("\n")
    f.write(";")

# Determine Basline

In [29]:
def reduc(c1,c2,c3,i):
    return c3 * 1/(1+c1*(1.001)**(-c2*i))

In [17]:
df = pd.DataFrame()

In [30]:
df['County'] = county['County']
new_col = []
for i in range(df.shape[0]):
     new_col.append(reduc(county['Char_1'][i],county['Char_2'][i],county['Char_3'][i],10_000))
df['50k']=new_col
sum(df['50k'].sort_values(ascending=False)[0:1850])

1196142.0385216908

In [31]:
new_col = []
for i in range(df.shape[0]):
     new_col.append(reduc(county['Char_1'][i],county['Char_2'][i],county['Char_3'][i],100_000))
df['100k']=new_col
sum(df['100k'].sort_values(ascending=False)[0:940])

1398429.8144041542

In [32]:
new_col = []
for i in range(df.shape[0]):
     new_col.append(reduc(county['Char_1'][i],county['Char_2'][i],county['Char_3'][i],250_000))
df['250k']=new_col
sum(df['250k'].sort_values(ascending=False)[0:370])

2415905.552594636

In [40]:
new_col = []
for i in range(df.shape[0]):
     new_col.append(reduc(county['Char_1'][i],county['Char_2'][i],county['Char_3'][i],500_000))
df['500k']=new_col
sum(df['500k'].sort_values(ascending=False)[0:188])

9816332.625105586

In [42]:
new_col = []
for i in range(df.shape[0]):
     new_col.append(reduc(county['Char_1'][i],county['Char_2'][i],county['Char_3'][i],750_000))
df['750k']=new_col
sum(df['750k'].sort_values(ascending=False)[0:120])

17326435.222843345

In [39]:
new_col = []
for i in range(df.shape[0]):
     new_col.append(reduc(county['Char_1'][i],county['Char_2'][i],county['Char_3'][i],1_000_000))
df['1m']=new_col
sum(df['1m'].sort_values(ascending=False)[0:94])

17129294.12056565