In [66]:
import pandas as pd
import random

In [67]:
df = pd.read_csv("pairs_info.csv")

In [72]:
def make_iframes(link):
    if type(link)==str:
        id = link[27:]
        iframe = f"""<iframe width="100%" height="401" frameborder="0"
      src="https://observablehq.com/embed/{id}?cells=test"></iframe>"""
        return iframe
    else:
        return ""

In [83]:
df['iframe'] = df['new_link'].apply(lambda x:make_iframes(x))

In [85]:
def get_house_info(df,pair):
    """Given the full dataframe and a specified pair, returns a table
    with the info for two comparable, hypothetical properties in the area,
    along with a list of the iframes for each neighborhood"""
    # create temporary dataframe that includes the two neighborhoods in the pair
    temp_df = df.loc[df['Pair_ID'] == pair]
    # generate a number of beds by using the avegerage median rooms value
    beds = int(temp_df.median_rooms.mean() - .25*temp_df.median_rooms.mean())
    # generate a number of baths
    baths = round(temp_df.median_rooms.mean()) - beds
    # generate a square feet
    sq_feet = beds*602 + baths*151
    # create offset for sqare feet
    offset1 = random.randint(-15,15)
    # prevent offset from being 0
    while offset1 == 0:
        offset1 = random.randint(-15,15)
    # generate year built
    year_built = random.randint(1980,2005)
    # create offset for year built
    offset2 = random.randint(-3,3)
    # prevent offset from being 0
    while offset2 == 0:
        offset2 = random.randint(-3,3)
    # get the iframes
    iframes = temp_df.iframe.to_list()
    property_info = {'Property 1': temp_df.GEOID.to_list()[0],'Property 2': temp_df.GEOID.to_list()[1]}
    housing_info = {
                    'Population Density/sq. mile':temp_df.population_density.astype('int32').to_list(),
                    'Walk Score':temp_df.NatWalkInd.astype('int32').to_list(),
                    'Median Household Income':temp_df.median_income.astype('int32').to_list(),
                    'Median Age of Residents':temp_df.median_age.astype('int32').to_list(),
                    'Median Rooms per Household':temp_df.median_rooms.astype('int32').to_list(),
                    'Bedrooms':[beds, beds],
                    'Bathrooms':[baths, baths],
                    'Square Feet': [sq_feet,sq_feet + offset1],
                    'Year Built': [year_built,year_built + offset2]
                   }              
    df = pd.DataFrame(housing_info).transpose()
    df.columns = ['Property 1', 'Property 2']
    table = df.to_html(justify='center',bold_rows=True)
    return [table,property_info,iframes]

In [91]:
def get_question_html(df,pairs):
    questions = []
    questions_with_iframes = []
    intro = """<div style="text-align: left;">Given the following information 
                about two properties, please choose which is more valuable and
                estimate its dollar amount difference over the other.</div>
                <div style="text-align: left;">&nbsp;
                """
    for pair in good_pairs:
        info = get_house_info(df,pair)
        table = info[0]
        geoids = info[1]
        metadata = f"""Pair number {pair}\nProperty 1 GeoID: {geoids.get('Property 1')}\nProperty 2 GeoID: {geoids.get('Property 2')}"""
        html = intro + table
        questions.append((metadata,html))
        iframe = info[2]
        ending = f"""<br><span style="font-size:19px;">Neighborhood of Property
                    1</span><br>{iframe[0]}<br><br><span style="font-size:19px;">
                    Neighborhood of Property 2</span><br>{iframe[1]}</div>
                    """
        html_iframe = intro + table + ending
        questions_with_iframes.append((metadata,html_iframe))
    return questions,questions_with_iframes

In [87]:
def save_questions(questions,filename):
    with open(filename,"w") as text_file:
        for question in questions:
            text_file.write(f"*******\n{question[0]}\n*******\n\n")
            print(question[1], file=text_file)
            text_file.write(f"\n")

In [88]:
good_pairs = [3,10,11,13,14,15,16,17,18,20,25]

In [92]:
def run(df,pairs):
    questions,questions_with_iframes = get_question_html(df,pairs)
    save_questions(questions,'questions_without_iframes.txt')
    save_questions(questions_with_iframes,'questions_with_iframes.txt')

In [93]:
run(df,good_pairs)